mirror of
https://gitlab.com/Anson-Projects/zine.git
synced 2025-06-15 05:06:39 +00:00
Add Archive Posts
This commit is contained in:
parent
61d8acbb9d
commit
4c82106817
5
Cargo.lock
generated
5
Cargo.lock
generated
@ -27,6 +27,7 @@ dependencies = [
|
||||
"feed-rs",
|
||||
"log",
|
||||
"maud",
|
||||
"rand",
|
||||
"rayon",
|
||||
"reqwest",
|
||||
"rss",
|
||||
@ -1610,9 +1611,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "simple_logger"
|
||||
version = "4.3.3"
|
||||
version = "5.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8e7e46c8c90251d47d08b28b8a419ffb4aede0f87c2eea95e17d1d5bacbf3ef1"
|
||||
checksum = "e8c5dfa5e08767553704aa0ffd9d9794d527103c736aba9854773851fd7497eb"
|
||||
dependencies = [
|
||||
"colored",
|
||||
"log",
|
||||
|
@ -14,10 +14,11 @@ maud = "0.26"
|
||||
chrono = "0.4"
|
||||
scraper = "0.19"
|
||||
rayon = "1.8"
|
||||
simple_logger = "4.3"
|
||||
simple_logger = "5.0"
|
||||
log = "0.4"
|
||||
rss = "2.0"
|
||||
anyhow = "1.0"
|
||||
rand = "0.8"
|
||||
|
||||
[dev-dependencies]
|
||||
clippy = "0.0.302"
|
||||
|
@ -1,6 +1,6 @@
|
||||
# Anson's Blogroll Zine
|
||||
|
||||
Anson's Blogroll Zine is a Rust application designed to aggregate content from multiple RSS feeds, creating a personalized news page. It fetches articles from `feeds.txt` and `featured.txt`, generates HTML cards for each entry, and outputs a single, styled HTML page.
|
||||
Anson's Blogroll Zine is a Rust application designed to aggregate content from multiple RSS feeds, creating a personalized news page. It fetches articles from `feeds.txt` and `featured.txt`, generates HTML cards for each post, and outputs a single, styled HTML page.
|
||||
|
||||
## Algorithm
|
||||
|
||||
|
@ -1,2 +1 @@
|
||||
https://ciechanow.ski/atom.xml
|
||||
https://factorio.com/blog/rss
|
||||
https://ciechanow.ski/atom.xml
|
@ -76,6 +76,7 @@ https://www.brendangregg.com/blog/rss.xml
|
||||
https://www.doscher.com/rss/
|
||||
https://www.elidedbranches.com/feeds/posts/default
|
||||
https://www.evanjones.ca/index.rss
|
||||
https://www.factorio.com/blog/rss
|
||||
https://www.jeffgeerling.com/blog.xml
|
||||
https://www.joelonsoftware.com/feed/
|
||||
https://www.makerstations.io/rss/
|
||||
|
76
src/main.rs
76
src/main.rs
@ -3,6 +3,8 @@ extern crate feed_rs;
|
||||
extern crate maud;
|
||||
extern crate reqwest;
|
||||
|
||||
use rand::seq::SliceRandom;
|
||||
use rand::thread_rng;
|
||||
use std::error::Error;
|
||||
use std::fs::write;
|
||||
use std::fs::DirBuilder;
|
||||
@ -16,54 +18,70 @@ use std::collections::HashMap;
|
||||
fn main() -> Result<(), Box<dyn Error>> {
|
||||
simple_logger::init_with_level(log::Level::Info).unwrap();
|
||||
|
||||
let mut entries = utilities::read_feed("feeds.txt");
|
||||
let all_posts = utilities::read_feed("feeds.txt");
|
||||
|
||||
entries.retain(|entry| entry.score.is_positive());
|
||||
let mut posts = all_posts.clone();
|
||||
posts.retain(|post| post.score.is_positive());
|
||||
|
||||
// Count occurences of main urls
|
||||
let url_counts = entries.iter().fold(HashMap::new(), |mut acc, post| {
|
||||
// Count occurences of main urls to punish blogs that post really frequently
|
||||
// which also filters out blogs that make tiny updates and change the published date
|
||||
let url_counts = posts.iter().fold(HashMap::new(), |mut acc, post| {
|
||||
*acc.entry(post.main_url.clone()).or_insert(0) += 1;
|
||||
acc
|
||||
});
|
||||
|
||||
// Punish blogs that post really often
|
||||
entries.iter_mut().for_each(|entry| {
|
||||
entry.score = (entry.score / url_counts.get(&entry.main_url).unwrap()) as i64;
|
||||
posts.iter_mut().for_each(|post| {
|
||||
post.score = (post.score / url_counts.get(&post.main_url).unwrap()) as i64;
|
||||
});
|
||||
|
||||
let mut featured = utilities::read_feed("featured_feeds.txt");
|
||||
|
||||
// Give featured a small boost in points
|
||||
featured = featured
|
||||
.iter_mut()
|
||||
.map(|post| {
|
||||
post.score *= 1.5 as i64;
|
||||
post.score = (post.score as f64 * 1.5) as i64;
|
||||
post.clone()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
entries.extend(featured);
|
||||
posts.extend(featured);
|
||||
|
||||
entries.par_iter_mut().for_each(utilities::find_image);
|
||||
entries.retain(|entry| entry.score.is_positive());
|
||||
posts.par_iter_mut().for_each(utilities::find_image);
|
||||
posts.par_iter_mut().for_each(utilities::validate);
|
||||
|
||||
entries.sort();
|
||||
posts.sort();
|
||||
|
||||
// Remove bottom 10% from list
|
||||
entries.truncate(entries.len() - (entries.len() as f64 * 0.1).ceil() as usize);
|
||||
|
||||
// Make sure first entry has an image since it is used as the featured post
|
||||
let mut max_iter = 0;
|
||||
while entries.first().unwrap().image_url.is_none() {
|
||||
entries[0].score += -100;
|
||||
entries.sort();
|
||||
|
||||
max_iter += 1;
|
||||
if max_iter > 10000 {
|
||||
break;
|
||||
}
|
||||
// Move the post with an image_url to the head of the list
|
||||
if let Some(pos) = posts.iter().position(|post| post.image_url.is_some()) {
|
||||
let post_with_image = posts.remove(pos);
|
||||
posts.insert(0, post_with_image);
|
||||
}
|
||||
|
||||
let index = site_generator::generate_index(entries.clone());
|
||||
posts.truncate(16);
|
||||
|
||||
let mut old_posts = all_posts;
|
||||
|
||||
old_posts.retain(|p| !posts.contains(p));
|
||||
old_posts.shuffle(&mut thread_rng());
|
||||
|
||||
let mut archive_posts: Vec<utilities::Post> = Vec::new();
|
||||
let archive_size = 100;
|
||||
|
||||
while (archive_posts.len() < archive_size) && (old_posts.len() > 50) {
|
||||
let iter_size = archive_size - archive_posts.len();
|
||||
|
||||
let mut extracted = old_posts
|
||||
.drain(0..=(iter_size + 50))
|
||||
.collect::<Vec<utilities::Post>>();
|
||||
|
||||
extracted.par_iter_mut().for_each(utilities::validate);
|
||||
extracted.retain(|post| post.score != 0);
|
||||
|
||||
archive_posts.extend(extracted);
|
||||
}
|
||||
|
||||
archive_posts.truncate(archive_size);
|
||||
|
||||
let index = site_generator::generate_index(posts.clone(), archive_posts.clone());
|
||||
let index_path = Path::new("output/index.html");
|
||||
DirBuilder::new()
|
||||
.recursive(true)
|
||||
@ -75,7 +93,7 @@ fn main() -> Result<(), Box<dyn Error>> {
|
||||
Err(e) => log::error!("Failed to write to {}: {}", index_path.display(), e),
|
||||
}
|
||||
|
||||
let feed = site_generator::generate_rss(entries.clone());
|
||||
let feed = site_generator::generate_rss(posts.clone());
|
||||
let feed_path = Path::new("output/feed.xml");
|
||||
DirBuilder::new()
|
||||
.recursive(true)
|
||||
|
@ -11,23 +11,23 @@ use rss::{ChannelBuilder, Item, ItemBuilder};
|
||||
|
||||
use crate::utilities;
|
||||
|
||||
fn create_featured_card(entry: &utilities::Post) -> Markup {
|
||||
fn create_featured_card(post: &utilities::Post) -> Markup {
|
||||
html! {
|
||||
article class="featured" {
|
||||
header {
|
||||
@if entry.image_url.is_some() {
|
||||
img src=(entry.image_url.as_ref().unwrap()) alt="Featured image";
|
||||
@if post.image_url.is_some() {
|
||||
img src=(post.image_url.as_ref().unwrap()) alt="Featured image";
|
||||
}
|
||||
hgroup {
|
||||
h2 { (entry.title) }
|
||||
a href=(format!("http://{}", entry.main_url)) { (entry.main_url) }
|
||||
h2 { (post.title) }
|
||||
a href=(format!("http://{}", post.main_url)) { (post.main_url) }
|
||||
}
|
||||
}
|
||||
body {
|
||||
p { (entry.truncated_description) }
|
||||
p { (post.truncated_description) }
|
||||
}
|
||||
footer {
|
||||
a class="grid" href=(entry.link) style="--pico-text-decoration: none;" {
|
||||
a class="grid" href=(post.link) style="--pico-text-decoration: none;" {
|
||||
button class="outline primary" { "Read Featured Post" }
|
||||
}
|
||||
}
|
||||
@ -35,24 +35,24 @@ fn create_featured_card(entry: &utilities::Post) -> Markup {
|
||||
}
|
||||
}
|
||||
|
||||
fn create_post_card(entry: &utilities::Post) -> Markup {
|
||||
fn create_post_card(post: &utilities::Post) -> Markup {
|
||||
html! {
|
||||
article {
|
||||
header {
|
||||
hgroup {
|
||||
h2 { (entry.title) }
|
||||
a href=(format!("http://{}", entry.main_url)) { (entry.main_url) }
|
||||
h2 { (post.title) }
|
||||
a href=(format!("http://{}", post.main_url)) { (post.main_url) }
|
||||
}
|
||||
}
|
||||
body {
|
||||
@if entry.image_url.is_some() {
|
||||
img src=(entry.image_url.as_ref().unwrap()) alt="Entry image";
|
||||
@if post.image_url.is_some() {
|
||||
img src=(post.image_url.as_ref().unwrap());
|
||||
p;
|
||||
}
|
||||
p { (entry.truncated_description) }
|
||||
p { (post.truncated_description) }
|
||||
}
|
||||
footer {
|
||||
a class="grid" href=(entry.link) style="--pico-text-decoration: none;" {
|
||||
a class="grid" href=(post.link) style="--pico-text-decoration: none;" {
|
||||
button class="outline secondary" { "Read Post" }
|
||||
}
|
||||
}
|
||||
@ -110,11 +110,11 @@ fn generate_header() -> Markup {
|
||||
}
|
||||
|
||||
fn about_modal(entries: Vec<utilities::Post>) -> Markup {
|
||||
// Get link for each entry, which is a blog post then,
|
||||
// Get link for each post, which is a blog post then,
|
||||
// convert it to a url to the main page of the blog
|
||||
let mut links = entries
|
||||
.iter()
|
||||
.map(|entry| entry.main_url.as_str())
|
||||
.map(|post| post.main_url.as_str())
|
||||
.collect::<std::collections::HashSet<_>>()
|
||||
.into_iter()
|
||||
.collect::<Vec<_>>();
|
||||
@ -153,9 +153,64 @@ fn about_modal(entries: Vec<utilities::Post>) -> Markup {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn generate_index(mut entries: Vec<utilities::Post>) -> Markup {
|
||||
let featured = entries.first().unwrap().clone();
|
||||
entries.remove(0);
|
||||
pub fn generate_head() -> Markup {
|
||||
html! {
|
||||
head {
|
||||
title { "Anson's Zine | Public RSS Feed" }
|
||||
meta charset="utf-8";
|
||||
meta name="viewport" content="width=device-width, initial-scale=1";
|
||||
meta name="description" content="Blogroll zine of RSS feeds for Anson"
|
||||
link rel="apple-touch-icon" sizes="180x180" href="/favicon/apple-touch-icon.png";
|
||||
link rel="icon" type="image/png" sizes="32x32" href="/favicon/favicon-32x32.png";
|
||||
link rel="icon" type="image/png" sizes="16x16" href="/favicon/favicon-16x16.png";
|
||||
link rel="manifest" href="/favicon/site.webmanifest";
|
||||
link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@picocss/pico@2/css/pico.blue.min.css";
|
||||
link rel="stylesheet" href="style.css";
|
||||
|
||||
// Open Graph meta tags
|
||||
meta property="og:title" content="Anson's Zine | Public RSS Feed";
|
||||
meta property="og:description" content="Blogroll zine of RSS feeds for Anson";
|
||||
meta property="og:url" content="https://blogroll.ansonbiggs.com";
|
||||
meta property="og:type" content="website";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn generate_archive_table(posts: Vec<utilities::Post>) -> Markup {
|
||||
html! {
|
||||
table class="striped" {
|
||||
thead {
|
||||
tr {
|
||||
th { "Title" }
|
||||
th { "Date" }
|
||||
}
|
||||
}
|
||||
tbody {
|
||||
@for post in posts {
|
||||
tr {
|
||||
td {
|
||||
a href=(post.link) { (post.title) }
|
||||
br;
|
||||
small {
|
||||
a href=(format!("http://{}", post.main_url)) { (post.main_url) }
|
||||
}
|
||||
}
|
||||
td {
|
||||
(post.date.format("%B %d, %Y").to_string())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn generate_index(
|
||||
mut posts: Vec<utilities::Post>,
|
||||
archive_posts: Vec<utilities::Post>,
|
||||
) -> Markup {
|
||||
let featured = posts.first().unwrap().clone();
|
||||
posts.remove(0);
|
||||
log::info!(
|
||||
"Featured article: {}, img: {:?}",
|
||||
featured.link,
|
||||
@ -165,39 +220,23 @@ pub fn generate_index(mut entries: Vec<utilities::Post>) -> Markup {
|
||||
html! {
|
||||
(maud::DOCTYPE)
|
||||
html lang="en" {
|
||||
head {
|
||||
title { "Anson's Zine | Public RSS Feed" }
|
||||
meta charset="utf-8";
|
||||
meta name="viewport" content="width=device-width, initial-scale=1";
|
||||
meta name="description" content="Blogroll zine of RSS feeds for Anson"
|
||||
link rel="apple-touch-icon" sizes="180x180" href="/favicon/apple-touch-icon.png";
|
||||
link rel="icon" type="image/png" sizes="32x32" href="/favicon/favicon-32x32.png";
|
||||
link rel="icon" type="image/png" sizes="16x16" href="/favicon/favicon-16x16.png";
|
||||
link rel="manifest" href="/favicon/site.webmanifest";
|
||||
link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@picocss/pico@2/css/pico.blue.min.css";
|
||||
link rel="stylesheet" href="style.css";
|
||||
|
||||
// Open Graph meta tags
|
||||
meta property="og:title" content="Anson's Zine | Public RSS Feed";
|
||||
meta property="og:description" content="Blogroll zine of RSS feeds for Anson";
|
||||
meta property="og:url" content="https://blogroll.ansonbiggs.com";
|
||||
meta property="og:type" content="website";
|
||||
}
|
||||
{(generate_head())}
|
||||
body { main class="container" {
|
||||
{(generate_header())}
|
||||
{(create_featured_card(&featured))}
|
||||
div class="grid" {
|
||||
@for column_entries in utilities::group_by_nth(&entries, 3) {
|
||||
@for column_posts in utilities::group_by_nth(&posts, 3) {
|
||||
div {
|
||||
@for entry in column_entries {
|
||||
{(create_post_card(&entry))}
|
||||
@for post in column_posts {
|
||||
{(create_post_card(&post))}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
h2 {"Random Old Posts"}
|
||||
{(generate_archive_table(archive_posts))}
|
||||
{(generate_footer())}
|
||||
{(about_modal(entries))}
|
||||
{(about_modal(posts))}
|
||||
script src="modal.js" {}
|
||||
script src="minimal-theme-switcher.js" {}
|
||||
}}
|
||||
|
@ -1,4 +1,4 @@
|
||||
use crate::web_fetchers;
|
||||
use crate::web_fetchers::{self, is_valid_url};
|
||||
use chrono::{DateTime, Utc};
|
||||
use feed_rs::model::Entry;
|
||||
use rayon::prelude::*;
|
||||
@ -16,7 +16,7 @@ pub struct Post {
|
||||
pub image_url: Option<String>,
|
||||
pub truncated_description: String,
|
||||
pub main_url: String,
|
||||
pub score: i64,
|
||||
pub score: i64, // Score values still very in flux
|
||||
}
|
||||
|
||||
impl Ord for Post {
|
||||
@ -203,24 +203,46 @@ pub fn group_by_nth<T: Clone>(slice: &[T], n: usize) -> Vec<Vec<T>> {
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn find_image(entry: &mut Post) {
|
||||
if let Some(image_url) = &entry.image_url {
|
||||
pub fn find_image(post: &mut Post) {
|
||||
if let Some(image_url) = &post.image_url {
|
||||
match web_fetchers::is_valid_image_url(image_url) {
|
||||
Ok(true) => {}
|
||||
_ => {
|
||||
entry.image_url = None;
|
||||
post.image_url = None;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
match web_fetchers::fetch_social_image(entry.link.clone()) {
|
||||
match web_fetchers::fetch_social_image(post.link.clone()) {
|
||||
Ok(social_image_url) => {
|
||||
if web_fetchers::is_valid_image_url(&social_image_url).unwrap_or(false) {
|
||||
entry.image_url = Some(social_image_url);
|
||||
post.image_url = Some(social_image_url);
|
||||
}
|
||||
}
|
||||
Err(error) => {
|
||||
log::warn!("{}: {}", error, entry.link.clone());
|
||||
post.score = (post.score as f64 * 0.9) as i64;
|
||||
log::warn!("{}: {}", error, post.link.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn validate(post: &mut Post) {
|
||||
if post.title.is_empty() {
|
||||
post.score = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
if !post.lang.is_empty() && post.lang != "en" {
|
||||
post.score = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
if post.truncated_description.is_empty() {
|
||||
post.score = (post.score as f64 * 0.5) as i64;
|
||||
};
|
||||
|
||||
if !is_valid_url(post.link.as_str()) {
|
||||
post.score = 0;
|
||||
println!("{} is not valid", post.link.as_str());
|
||||
};
|
||||
}
|
||||
|
@ -46,3 +46,9 @@ pub fn is_valid_image_url(url: &str) -> Result<bool, Box<dyn std::error::Error>>
|
||||
ct.to_str().map_or(false, |s| s.starts_with("image/"))
|
||||
}))
|
||||
}
|
||||
|
||||
pub fn is_valid_url(url: &str) -> bool {
|
||||
let client = reqwest::blocking::Client::new();
|
||||
|
||||
client.get(url).send().is_ok()
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user