1
0
mirror of https://gitlab.com/Anson-Projects/zine.git synced 2025-06-15 13:16:39 +00:00

Add Archive Posts

This commit is contained in:
Anson Biggs 2024-05-15 04:44:27 +00:00
parent 61d8acbb9d
commit 4c82106817
9 changed files with 172 additions and 85 deletions

5
Cargo.lock generated
View File

@ -27,6 +27,7 @@ dependencies = [
"feed-rs",
"log",
"maud",
"rand",
"rayon",
"reqwest",
"rss",
@ -1610,9 +1611,9 @@ dependencies = [
[[package]]
name = "simple_logger"
version = "4.3.3"
version = "5.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e7e46c8c90251d47d08b28b8a419ffb4aede0f87c2eea95e17d1d5bacbf3ef1"
checksum = "e8c5dfa5e08767553704aa0ffd9d9794d527103c736aba9854773851fd7497eb"
dependencies = [
"colored",
"log",

View File

@ -14,10 +14,11 @@ maud = "0.26"
chrono = "0.4"
scraper = "0.19"
rayon = "1.8"
simple_logger = "4.3"
simple_logger = "5.0"
log = "0.4"
rss = "2.0"
anyhow = "1.0"
rand = "0.8"
[dev-dependencies]
clippy = "0.0.302"

View File

@ -1,6 +1,6 @@
# Anson's Blogroll Zine
Anson's Blogroll Zine is a Rust application designed to aggregate content from multiple RSS feeds, creating a personalized news page. It fetches articles from `feeds.txt` and `featured.txt`, generates HTML cards for each entry, and outputs a single, styled HTML page.
Anson's Blogroll Zine is a Rust application designed to aggregate content from multiple RSS feeds, creating a personalized news page. It fetches articles from `feeds.txt` and `featured.txt`, generates HTML cards for each post, and outputs a single, styled HTML page.
## Algorithm

View File

@ -1,2 +1 @@
https://ciechanow.ski/atom.xml
https://factorio.com/blog/rss

View File

@ -76,6 +76,7 @@ https://www.brendangregg.com/blog/rss.xml
https://www.doscher.com/rss/
https://www.elidedbranches.com/feeds/posts/default
https://www.evanjones.ca/index.rss
https://www.factorio.com/blog/rss
https://www.jeffgeerling.com/blog.xml
https://www.joelonsoftware.com/feed/
https://www.makerstations.io/rss/

View File

@ -3,6 +3,8 @@ extern crate feed_rs;
extern crate maud;
extern crate reqwest;
use rand::seq::SliceRandom;
use rand::thread_rng;
use std::error::Error;
use std::fs::write;
use std::fs::DirBuilder;
@ -16,54 +18,70 @@ use std::collections::HashMap;
fn main() -> Result<(), Box<dyn Error>> {
simple_logger::init_with_level(log::Level::Info).unwrap();
let mut entries = utilities::read_feed("feeds.txt");
let all_posts = utilities::read_feed("feeds.txt");
entries.retain(|entry| entry.score.is_positive());
let mut posts = all_posts.clone();
posts.retain(|post| post.score.is_positive());
// Count occurences of main urls
let url_counts = entries.iter().fold(HashMap::new(), |mut acc, post| {
// Count occurences of main urls to punish blogs that post really frequently
// which also filters out blogs that make tiny updates and change the published date
let url_counts = posts.iter().fold(HashMap::new(), |mut acc, post| {
*acc.entry(post.main_url.clone()).or_insert(0) += 1;
acc
});
// Punish blogs that post really often
entries.iter_mut().for_each(|entry| {
entry.score = (entry.score / url_counts.get(&entry.main_url).unwrap()) as i64;
posts.iter_mut().for_each(|post| {
post.score = (post.score / url_counts.get(&post.main_url).unwrap()) as i64;
});
let mut featured = utilities::read_feed("featured_feeds.txt");
// Give featured a small boost in points
featured = featured
.iter_mut()
.map(|post| {
post.score *= 1.5 as i64;
post.score = (post.score as f64 * 1.5) as i64;
post.clone()
})
.collect::<Vec<_>>();
entries.extend(featured);
posts.extend(featured);
entries.par_iter_mut().for_each(utilities::find_image);
entries.retain(|entry| entry.score.is_positive());
posts.par_iter_mut().for_each(utilities::find_image);
posts.par_iter_mut().for_each(utilities::validate);
entries.sort();
posts.sort();
// Remove bottom 10% from list
entries.truncate(entries.len() - (entries.len() as f64 * 0.1).ceil() as usize);
// Make sure first entry has an image since it is used as the featured post
let mut max_iter = 0;
while entries.first().unwrap().image_url.is_none() {
entries[0].score += -100;
entries.sort();
max_iter += 1;
if max_iter > 10000 {
break;
}
// Move the post with an image_url to the head of the list
if let Some(pos) = posts.iter().position(|post| post.image_url.is_some()) {
let post_with_image = posts.remove(pos);
posts.insert(0, post_with_image);
}
let index = site_generator::generate_index(entries.clone());
posts.truncate(16);
let mut old_posts = all_posts;
old_posts.retain(|p| !posts.contains(p));
old_posts.shuffle(&mut thread_rng());
let mut archive_posts: Vec<utilities::Post> = Vec::new();
let archive_size = 100;
while (archive_posts.len() < archive_size) && (old_posts.len() > 50) {
let iter_size = archive_size - archive_posts.len();
let mut extracted = old_posts
.drain(0..=(iter_size + 50))
.collect::<Vec<utilities::Post>>();
extracted.par_iter_mut().for_each(utilities::validate);
extracted.retain(|post| post.score != 0);
archive_posts.extend(extracted);
}
archive_posts.truncate(archive_size);
let index = site_generator::generate_index(posts.clone(), archive_posts.clone());
let index_path = Path::new("output/index.html");
DirBuilder::new()
.recursive(true)
@ -75,7 +93,7 @@ fn main() -> Result<(), Box<dyn Error>> {
Err(e) => log::error!("Failed to write to {}: {}", index_path.display(), e),
}
let feed = site_generator::generate_rss(entries.clone());
let feed = site_generator::generate_rss(posts.clone());
let feed_path = Path::new("output/feed.xml");
DirBuilder::new()
.recursive(true)

View File

@ -11,23 +11,23 @@ use rss::{ChannelBuilder, Item, ItemBuilder};
use crate::utilities;
fn create_featured_card(entry: &utilities::Post) -> Markup {
fn create_featured_card(post: &utilities::Post) -> Markup {
html! {
article class="featured" {
header {
@if entry.image_url.is_some() {
img src=(entry.image_url.as_ref().unwrap()) alt="Featured image";
@if post.image_url.is_some() {
img src=(post.image_url.as_ref().unwrap()) alt="Featured image";
}
hgroup {
h2 { (entry.title) }
a href=(format!("http://{}", entry.main_url)) { (entry.main_url) }
h2 { (post.title) }
a href=(format!("http://{}", post.main_url)) { (post.main_url) }
}
}
body {
p { (entry.truncated_description) }
p { (post.truncated_description) }
}
footer {
a class="grid" href=(entry.link) style="--pico-text-decoration: none;" {
a class="grid" href=(post.link) style="--pico-text-decoration: none;" {
button class="outline primary" { "Read Featured Post" }
}
}
@ -35,24 +35,24 @@ fn create_featured_card(entry: &utilities::Post) -> Markup {
}
}
fn create_post_card(entry: &utilities::Post) -> Markup {
fn create_post_card(post: &utilities::Post) -> Markup {
html! {
article {
header {
hgroup {
h2 { (entry.title) }
a href=(format!("http://{}", entry.main_url)) { (entry.main_url) }
h2 { (post.title) }
a href=(format!("http://{}", post.main_url)) { (post.main_url) }
}
}
body {
@if entry.image_url.is_some() {
img src=(entry.image_url.as_ref().unwrap()) alt="Entry image";
@if post.image_url.is_some() {
img src=(post.image_url.as_ref().unwrap());
p;
}
p { (entry.truncated_description) }
p { (post.truncated_description) }
}
footer {
a class="grid" href=(entry.link) style="--pico-text-decoration: none;" {
a class="grid" href=(post.link) style="--pico-text-decoration: none;" {
button class="outline secondary" { "Read Post" }
}
}
@ -110,11 +110,11 @@ fn generate_header() -> Markup {
}
fn about_modal(entries: Vec<utilities::Post>) -> Markup {
// Get link for each entry, which is a blog post then,
// Get link for each post, which is a blog post then,
// convert it to a url to the main page of the blog
let mut links = entries
.iter()
.map(|entry| entry.main_url.as_str())
.map(|post| post.main_url.as_str())
.collect::<std::collections::HashSet<_>>()
.into_iter()
.collect::<Vec<_>>();
@ -153,9 +153,64 @@ fn about_modal(entries: Vec<utilities::Post>) -> Markup {
}
}
pub fn generate_index(mut entries: Vec<utilities::Post>) -> Markup {
let featured = entries.first().unwrap().clone();
entries.remove(0);
pub fn generate_head() -> Markup {
html! {
head {
title { "Anson's Zine | Public RSS Feed" }
meta charset="utf-8";
meta name="viewport" content="width=device-width, initial-scale=1";
meta name="description" content="Blogroll zine of RSS feeds for Anson"
link rel="apple-touch-icon" sizes="180x180" href="/favicon/apple-touch-icon.png";
link rel="icon" type="image/png" sizes="32x32" href="/favicon/favicon-32x32.png";
link rel="icon" type="image/png" sizes="16x16" href="/favicon/favicon-16x16.png";
link rel="manifest" href="/favicon/site.webmanifest";
link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@picocss/pico@2/css/pico.blue.min.css";
link rel="stylesheet" href="style.css";
// Open Graph meta tags
meta property="og:title" content="Anson's Zine | Public RSS Feed";
meta property="og:description" content="Blogroll zine of RSS feeds for Anson";
meta property="og:url" content="https://blogroll.ansonbiggs.com";
meta property="og:type" content="website";
}
}
}
fn generate_archive_table(posts: Vec<utilities::Post>) -> Markup {
html! {
table class="striped" {
thead {
tr {
th { "Title" }
th { "Date" }
}
}
tbody {
@for post in posts {
tr {
td {
a href=(post.link) { (post.title) }
br;
small {
a href=(format!("http://{}", post.main_url)) { (post.main_url) }
}
}
td {
(post.date.format("%B %d, %Y").to_string())
}
}
}
}
}
}
}
pub fn generate_index(
mut posts: Vec<utilities::Post>,
archive_posts: Vec<utilities::Post>,
) -> Markup {
let featured = posts.first().unwrap().clone();
posts.remove(0);
log::info!(
"Featured article: {}, img: {:?}",
featured.link,
@ -165,39 +220,23 @@ pub fn generate_index(mut entries: Vec<utilities::Post>) -> Markup {
html! {
(maud::DOCTYPE)
html lang="en" {
head {
title { "Anson's Zine | Public RSS Feed" }
meta charset="utf-8";
meta name="viewport" content="width=device-width, initial-scale=1";
meta name="description" content="Blogroll zine of RSS feeds for Anson"
link rel="apple-touch-icon" sizes="180x180" href="/favicon/apple-touch-icon.png";
link rel="icon" type="image/png" sizes="32x32" href="/favicon/favicon-32x32.png";
link rel="icon" type="image/png" sizes="16x16" href="/favicon/favicon-16x16.png";
link rel="manifest" href="/favicon/site.webmanifest";
link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@picocss/pico@2/css/pico.blue.min.css";
link rel="stylesheet" href="style.css";
// Open Graph meta tags
meta property="og:title" content="Anson's Zine | Public RSS Feed";
meta property="og:description" content="Blogroll zine of RSS feeds for Anson";
meta property="og:url" content="https://blogroll.ansonbiggs.com";
meta property="og:type" content="website";
}
{(generate_head())}
body { main class="container" {
{(generate_header())}
{(create_featured_card(&featured))}
div class="grid" {
@for column_entries in utilities::group_by_nth(&entries, 3) {
@for column_posts in utilities::group_by_nth(&posts, 3) {
div {
@for entry in column_entries {
{(create_post_card(&entry))}
@for post in column_posts {
{(create_post_card(&post))}
}
}
}
}
h2 {"Random Old Posts"}
{(generate_archive_table(archive_posts))}
{(generate_footer())}
{(about_modal(entries))}
{(about_modal(posts))}
script src="modal.js" {}
script src="minimal-theme-switcher.js" {}
}}

View File

@ -1,4 +1,4 @@
use crate::web_fetchers;
use crate::web_fetchers::{self, is_valid_url};
use chrono::{DateTime, Utc};
use feed_rs::model::Entry;
use rayon::prelude::*;
@ -16,7 +16,7 @@ pub struct Post {
pub image_url: Option<String>,
pub truncated_description: String,
pub main_url: String,
pub score: i64,
pub score: i64, // Score values still very in flux
}
impl Ord for Post {
@ -203,24 +203,46 @@ pub fn group_by_nth<T: Clone>(slice: &[T], n: usize) -> Vec<Vec<T>> {
.collect()
}
pub fn find_image(entry: &mut Post) {
if let Some(image_url) = &entry.image_url {
pub fn find_image(post: &mut Post) {
if let Some(image_url) = &post.image_url {
match web_fetchers::is_valid_image_url(image_url) {
Ok(true) => {}
_ => {
entry.image_url = None;
post.image_url = None;
}
}
} else {
match web_fetchers::fetch_social_image(entry.link.clone()) {
match web_fetchers::fetch_social_image(post.link.clone()) {
Ok(social_image_url) => {
if web_fetchers::is_valid_image_url(&social_image_url).unwrap_or(false) {
entry.image_url = Some(social_image_url);
post.image_url = Some(social_image_url);
}
}
Err(error) => {
log::warn!("{}: {}", error, entry.link.clone());
post.score = (post.score as f64 * 0.9) as i64;
log::warn!("{}: {}", error, post.link.clone());
}
}
}
}
pub fn validate(post: &mut Post) {
if post.title.is_empty() {
post.score = 0;
return;
}
if !post.lang.is_empty() && post.lang != "en" {
post.score = 0;
return;
}
if post.truncated_description.is_empty() {
post.score = (post.score as f64 * 0.5) as i64;
};
if !is_valid_url(post.link.as_str()) {
post.score = 0;
println!("{} is not valid", post.link.as_str());
};
}

View File

@ -46,3 +46,9 @@ pub fn is_valid_image_url(url: &str) -> Result<bool, Box<dyn std::error::Error>>
ct.to_str().map_or(false, |s| s.starts_with("image/"))
}))
}
pub fn is_valid_url(url: &str) -> bool {
let client = reqwest::blocking::Client::new();
client.get(url).send().is_ok()
}