Use Custom Struct

2025-07-31 02:31:32 +00:00 · 2024-04-12 05:45:43 +00:00
parent 841af6aa41
commit f2ff3e3640
9 changed files with 630 additions and 400 deletions
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -1,4 +1,6 @@
 stages:
+  - build
+  - lint
  - test
  - build_site
  - deploy
@@ -13,14 +15,26 @@ cache:
    - target/
    - cargo/

+build:
+  image: rust:latest
+  stage: build
+  script:
+    - cargo build
+
+lint:
+  image: rust:latest
+  stage: lint
+  script:
+    - rustup component add clippy
+    - cargo clippy --all-targets -- -D warnings
+
 test:
  image: rust:latest
  stage: test
  script:
-    - cargo test --verbose  
+    - cargo test --verbose
  rules:
    - if: "$CI_COMMIT_BRANCH != $CI_DEFAULT_BRANCH"
-    

 build_site:
  image: rust:latest
@@ -44,8 +58,6 @@ pages:
  rules:
    - if: "$CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH"

-
-
 workflow:
  name: $CI_COMMIT_REF_NAME
  rules:
@@ -54,4 +66,4 @@ workflow:
        CI_COMMIT_REF_NAME: $SCHEDULED_PIPELINE_NAME
    - if: $SCHEDULED_BUILD_PIPELINE != 'true'
      variables:
-        CI_COMMIT_REF_NAME: $DEFAULT_PIPELINE_NAME
+        CI_COMMIT_REF_NAME: $DEFAULT_PIPELINE_NAME
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "aggregate_rss_zine"
 description = "Aggregate feed of RSS feeds I enjoy in the form of a newspaper."
-version = "0.2.0"
+version = "0.3.0"
 edition = "2021"
 authors = ["Anson Biggs"]
 homepage = "https://zine.ansonbiggs.com"
@@ -10,11 +10,14 @@ license = "MIT"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

 [dependencies]
-feed-rs = "1.4.0"
-reqwest = { version = "0.11.24", features = ["blocking"] }
-maud = "0.26.0"
-chrono = "0.4.33"
-scraper = "0.19.0"
-rayon = "1.8.1"
-simple_logger = "4.3.3"
+feed-rs = "1.4"
+reqwest = { version = "0.12", features = ["blocking"] }
+maud = "0.26"
+chrono = "0.4"
+scraper = "0.19"
+rayon = "1.8"
+simple_logger = "4.3"
 log = "0.4"
+
+[dev-dependencies]
+clippy = "0.0.302"
--- a/feeds.txt
+++ b/feeds.txt
@@ -11,10 +11,10 @@ https://blog.andymatuschak.org/rss
 https://blog.benjojo.co.uk/rss.xml
 https://blog.codinghorror.com/rss/
 https://blog.frost.kiwi/feed.xml
+https://c.pgdm.ch/atom.xml
 https://calebporzio.com/feed
 https://chrisnicholas.dev/rss.xml
 https://christianselig.com/index.xml
-https://ciechanow.ski/atom.xml
 https://danluu.com/atom.xml
 https://darekkay.com/atom.xml
 https://decomposition.al/atom.xml
@@ -47,6 +47,7 @@ https://steveklabnik.com/feed.xml
 https://taylor.town/feed.xml
 https://vickiboykis.com/index.xml
 https://vitalik.eth.limo/feed.xml
+https://www.bitsaboutmoney.com/archive/rss/
 https://www.construction-physics.com/feed
 https://www.elidedbranches.com/feeds/posts/default
 https://www.jeffgeerling.com/blog.xml
--- a/src/index_generator.rs
+++ b/src/index_generator.rs
@@ -5,76 +5,28 @@ extern crate reqwest;

 use chrono::DateTime;
 use chrono::Utc;
-use feed_rs::model::Entry;
+
 use maud::{html, Markup};
-use std::env;

 use crate::utilities;
-use crate::web_fetchers;
-
-fn create_featured_card(entry: &Entry) -> Markup {
-    let title = entry
-        .title
-        .as_ref()
-        .map_or_else(|| "".to_string(), |t| t.content.clone());
-
-    let link = entry.links.first().unwrap();
-    let lang = link.clone().href_lang.unwrap_or("en".to_string());
-
-    if lang != "en" {
-        log::warn!("Non english! {} {}", lang, link.href);
-    }
-
-    let mut image_url = entry
-        .media
-        .first()
-        .and_then(|m| m.content.first())
-        .and_then(|c| c.url.as_ref().map(|u| u.to_string()))
-        .unwrap_or_default();
-
-    // Fallback to fetching social image if direct extraction didn't work
-    if image_url.is_empty() {
-        log::info!(
-            "Falling back to searching for a social image for {}",
-            link.href
-        );
-        image_url = web_fetchers::fetch_social_image(link.href.as_str()).unwrap_or_default();
-    }
-
-    let description = entry.content.as_ref().map_or_else(
-        || {
-            entry
-                .summary
-                .as_ref()
-                .map_or_else(|| "".to_string(), |summary| summary.content.clone())
-        },
-        |content| {
-            content
-                .body
-                .as_ref()
-                .map_or_else(|| "".to_string(), |body| body.clone())
-        },
-    );
-
-    let cleaned_description = utilities::strip_html_tags(&description);
-    let truncated_description = utilities::truncate_description(&cleaned_description, 500);
-
-    let main_url = utilities::get_root_url(link.href.as_str());

+fn create_featured_card(entry: &utilities::Post) -> Markup {
    html! {
        article class="featured" {
            header  {
-                img src=(image_url) alt="Entry image";
+                @if entry.image_url.is_some() {
+                    img src=(entry.image_url.as_ref().unwrap()) alt="Featured image";
+                }
                hgroup {
-                    h2 { (title) }
-                    a href=(format!("http://{}", main_url)) { (main_url) }
+                    h2 { (entry.title) }
+                    a href=(format!("http://{}", entry.main_url)) { (entry.main_url) }
                }
            }
            body {
-                p { (truncated_description) }
+                p { (entry.truncated_description) }
            }
            footer {
-                a class="grid" href=(link.href) style="--pico-text-decoration: none;" {
+                a class="grid" href=(entry.link) style="--pico-text-decoration: none;" {
                    button class="outline primary" { "Read Featured Post" }
                }
            }
@@ -82,75 +34,24 @@ fn create_featured_card(entry: &Entry) -> Markup {
    }
 }

-fn create_post_card(entry: &Entry) -> Markup {
-    let title = entry
-        .title
-        .as_ref()
-        .map_or_else(|| "".to_string(), |t| t.content.clone());
-
-    let link = entry.links.first().unwrap();
-    let lang = link.clone().href_lang.unwrap_or("en".to_string());
-
-    if lang != "en" {
-        log::warn!("Non english! {} {}", lang, link.href);
-    }
-
-    let mut image_url = entry
-        .media
-        .first()
-        .and_then(|m| m.content.first())
-        .and_then(|c| c.url.as_ref().map(|u| u.to_string()))
-        .unwrap_or_default();
-
-    // Fallback to fetching social image if direct extraction didn't work
-    if image_url.is_empty() {
-        log::info!(
-            "Falling back to searching for a social image for {}",
-            link.href
-        );
-        image_url = web_fetchers::fetch_social_image(link.href.as_str()).unwrap_or_default();
-    }
-    if image_url.is_empty() {
-        log::warn!("No image could be gathered for {}", link.href);
-    }
-
-    let description = entry.content.as_ref().map_or_else(
-        || {
-            entry
-                .summary
-                .as_ref()
-                .map_or_else(|| "".to_string(), |summary| summary.content.clone())
-        },
-        |content| {
-            content
-                .body
-                .as_ref()
-                .map_or_else(|| "".to_string(), |body| body.clone())
-        },
-    );
-
-    let cleaned_description = utilities::strip_html_tags(&description);
-    let truncated_description = utilities::truncate_description(&cleaned_description, 500);
-
-    let main_url = utilities::get_root_url(link.href.as_str());
-
+fn create_post_card(entry: &utilities::Post) -> Markup {
    html! {
        article {
            header {
                hgroup {
-                    h2 { (title) }
-                    a href=(format!("http://{}", main_url)) { (main_url) }
+                    h2 { (entry.title) }
+                    a href=(format!("http://{}", entry.main_url)) { (entry.main_url) }
                }
            }
            body {
-                @if !image_url.is_empty() {
-                    img src=(image_url) alt="Entry image";
+                @if entry.image_url.is_some() {
+                    img src=(entry.image_url.as_ref().unwrap()) alt="Entry image";
                    p;
                }
-                p { (truncated_description) }
+                p { (entry.truncated_description) }
            }
            footer {
-                a class="grid" href=(link.href) style="--pico-text-decoration: none;" {
+                a class="grid" href=(entry.link) style="--pico-text-decoration: none;" {
                    button class="outline secondary" { "Read Post" }
                }
            }
@@ -201,16 +102,15 @@ fn generate_header() -> Markup {
    }
 }

-fn about_modal(entries: Vec<Entry>) -> Markup {
+fn about_modal(entries: Vec<utilities::Post>) -> Markup {
    // Get link for each entry, which is a blog post then,
    // convert it to a url to the main page of the blog
    let mut links = entries
        .iter()
-        .map(|entry| entry.links.first().unwrap().href.as_str())
-        .map(utilities::get_root_url)
-        .collect::<std::collections::HashSet<&str>>()
+        .map(|entry| entry.main_url.as_str())
+        .collect::<std::collections::HashSet<_>>()
        .into_iter()
-        .collect::<Vec<&str>>();
+        .collect::<Vec<_>>();

    // Alphabetical to be fair to everytone :)
    links.sort();
@@ -234,7 +134,7 @@ fn about_modal(entries: Vec<Entry>) -> Markup {
                }
                ul {
                    @for link in links {
-                        li {a href=("http://".to_owned() + link) {(link)}}
+                        li {a href=(format!("{}{}","http://".to_owned() , link)) {(link)}}
                    }
                }
                p {
@@ -246,24 +146,14 @@ fn about_modal(entries: Vec<Entry>) -> Markup {
    }
 }

-pub fn generate_index(mut entries: Vec<Entry>, featured: Entry) -> Markup {
-    let running_in_gitlab = env::var("CI").map(|val| val == "true").unwrap_or(false);
-
-    if running_in_gitlab {
-        log::info!("Building for deployment");
-        entries.truncate(30);
-    } else {
-        log::info!("Building for development");
-        entries.truncate(6);
-    }
-
-    let featured_card: maud::PreEscaped<String>;
-    if (utilities::get_entry_date(&featured)) > (chrono::Utc::now() - chrono::Duration::days(3)) {
-        featured_card = create_featured_card(&featured);
-        entries.retain(|entry| entry != &featured);
-    } else {
-        featured_card = html! {};
-    }
+pub fn generate_index(mut entries: Vec<utilities::Post>) -> Markup {
+    let featured = entries.first().unwrap().clone();
+    entries.remove(0);
+    log::info!(
+        "Featured article: {}, img: {:?}",
+        featured.link,
+        featured.image_url
+    );

    html! {
        (maud::DOCTYPE)
@@ -282,7 +172,7 @@ pub fn generate_index(mut entries: Vec<Entry>, featured: Entry) -> Markup {
            }
            body { main class="container" {
                {(generate_header())}
-                (featured_card)
+                {(create_featured_card(&featured))}
                div class="grid" {
                    @for column_entries in utilities::group_by_nth(&entries, 3) {
                        div {
--- a/src/main.rs
+++ b/src/main.rs
@@ -7,24 +7,50 @@ use std::error::Error;
 use std::fs::write;
 use std::fs::DirBuilder;
 use std::path::Path;
-use utilities::read_feed;
-
 mod index_generator;
 mod utilities;
 mod web_fetchers;
+use rayon::prelude::*;

 fn main() -> Result<(), Box<dyn Error>> {
    simple_logger::init_with_level(log::Level::Info).unwrap();

-    let featured = read_feed("featured_feeds.txt").first().unwrap().clone();
-    let entries = read_feed("feeds.txt");
+    let mut featured = utilities::read_feed("featured_feeds.txt");

-    log::info!(
-        "Featured article: {}",
-        entries[0].links.first().unwrap().href.as_str()
-    );
+    featured = featured
+        .iter_mut()
+        .map(|post| {
+            post.score += 60 * 24 * 2;
+            post.clone()
+        })
+        .collect::<Vec<_>>();

-    let index = index_generator::generate_index(entries, featured);
+    let mut entries = utilities::read_feed("feeds.txt");
+
+    entries.extend(featured);
+
+    entries.retain(|entry| entry.score.is_positive());
+
+    entries.par_iter_mut().for_each(|entry| {
+        if entry.image_url.is_some() {
+            entry.score += 300;
+        } else {
+            match web_fetchers::fetch_social_image(entry.link.clone()) {
+                Ok(social_image_url) => {
+                    entry.image_url = Some(social_image_url);
+                }
+                Err(error) => {
+                    log::info!("{}: {}", error, entry.link.clone());
+                    entry.score += -600;
+                }
+            }
+        }
+    });
+
+    entries.retain(|entry| entry.score.is_positive());
+    entries.sort();
+
+    let index = index_generator::generate_index(entries);

    let output_path = Path::new("output/index.html");
    DirBuilder::new()
--- a/src/utilities.rs
+++ b/src/utilities.rs
@@ -1,11 +1,95 @@
 use crate::web_fetchers;
+use chrono::{DateTime, Duration, Utc};
 use feed_rs::model::Entry;
 use rayon::prelude::*;
 use scraper::{Html, Selector};
-use std::cmp::Reverse;
+
+use std::cmp::Ordering;
 use std::fs;

-pub fn read_feed(path: &str) -> Vec<Entry> {
+#[derive(Clone, PartialEq, Eq)]
+pub struct Post {
+    pub title: String,
+    pub link: String,
+    pub date: DateTime<Utc>,
+    pub lang: String,
+    pub image_url: Option<String>,
+    pub truncated_description: String,
+    pub main_url: String,
+    pub score: i64,
+}
+
+impl Ord for Post {
+    fn cmp(&self, other: &Self) -> Ordering {
+        self.score.partial_cmp(&other.score).unwrap().reverse()
+    }
+}
+
+impl PartialOrd for Post {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl Post {
+    fn from_entry(entry: &feed_rs::model::Entry) -> Self {
+        let title = entry
+            .title
+            .as_ref()
+            .map_or_else(|| "".to_string(), |t| t.content.clone());
+
+        let link = entry.links.first().unwrap();
+
+        let date = get_entry_date(entry);
+
+        let lang = link.clone().href_lang.unwrap_or("en".to_string());
+
+        if lang != "en" {
+            log::warn!("Non english! {} {}", lang, link.href);
+        }
+
+        let image_url = entry
+            .media
+            .first()
+            .and_then(|m| m.content.first())
+            .and_then(|c| c.url.as_ref().map(|u| u.to_string()));
+
+        let description = entry.content.as_ref().map_or_else(
+            || {
+                entry
+                    .summary
+                    .as_ref()
+                    .map_or_else(|| "".to_string(), |summary| summary.content.clone())
+            },
+            |content| {
+                content
+                    .body
+                    .as_ref()
+                    .map_or_else(|| "".to_string(), |body| body.clone())
+            },
+        );
+
+        let cleaned_description = strip_html_tags(&description);
+        let truncated_description = truncate_description(&cleaned_description, 500);
+
+        let main_url = get_root_url(link.href.as_str());
+
+        let score = (date - (Utc::now() - Duration::days(14))).num_minutes();
+
+        Post {
+            title,
+            link: link.href.clone(),
+            date,
+            lang,
+            image_url,
+            truncated_description,
+            main_url,
+            score,
+        }
+    }
+}
+
+pub fn read_feed(path: &str) -> Vec<Post> {
    let binding = fs::read_to_string(path).unwrap();
    let feed_urls: Vec<&str> = binding.lines().collect();

@@ -27,9 +111,12 @@ pub fn read_feed(path: &str) -> Vec<Entry> {
    }

    entries.retain(validate_entry_date);
-    entries.sort_by_key(|entry| Reverse(get_entry_date(entry)));

    entries
+        .par_iter()
+        .map(Post::from_entry)
+        .filter(|entry| entry.date < chrono::Utc::now())
+        .collect::<Vec<_>>()
 }

 fn validate_entry_date(entry: &Entry) -> bool {
@@ -42,17 +129,17 @@ fn validate_entry_date(entry: &Entry) -> bool {
    }
 }

-pub fn get_entry_date(entry: &Entry) -> chrono::DateTime<chrono::Utc> {
+pub fn get_entry_date(entry: &Entry) -> DateTime<Utc> {
    entry.published.unwrap_or(entry.updated.unwrap_or_default())
 }

-pub fn get_root_url(input_url: &str) -> &str {
+pub fn get_root_url(input_url: &str) -> String {
    let mut url = input_url;

    url = url.strip_prefix("https://").unwrap_or(url);
    url = url.strip_prefix("http://").unwrap_or(url);

-    url.split_once('/').unwrap().0
+    url.split_once('/').unwrap().0.to_string()
 }

 pub fn truncate_description(description: &str, max_length: usize) -> String {
--- a/src/web_fetchers.rs
+++ b/src/web_fetchers.rs
@@ -17,7 +17,7 @@ pub fn fetch_feed(url: &str) -> Result<Vec<Entry>, Box<dyn Error>> {
    Ok(feed.entries)
 }

-pub fn fetch_social_image(url: &str) -> Result<String, Box<dyn std::error::Error>> {
+pub fn fetch_social_image(url: String) -> Result<String, Box<dyn std::error::Error>> {
    let html = reqwest::blocking::get(url)?.text()?;
    let document = Html::parse_document(&html);
    let selector = Selector::parse("meta[property=\"og:image\"]").unwrap();
@@ -25,8 +25,11 @@ pub fn fetch_social_image(url: &str) -> Result<String, Box<dyn std::error::Error
    let image_url = document
        .select(&selector)
        .next()
-        .and_then(|element| element.value().attr("content"))
-        .unwrap_or("");
+        .and_then(|element| element.value().attr("content"));

-    Ok(image_url.to_string())
+    if let Some(url) = image_url {
+        Ok(url.to_string())
+    } else {
+        Err("No social image found".into())
+    }
 }
--- a/tests/feed_validation.rs
+++ b/tests/feed_validation.rs
@@ -1,39 +1,15 @@
-use feed_rs::parser;
-use reqwest::blocking::get;
+use std::collections::HashSet;
 use std::fs;

 // Function to read URLs from a file
-fn read_feed() -> Vec<String> {
-    let binding = fs::read_to_string("feeds.txt").unwrap();
+fn read_feed(path: &str) -> Vec<String> {
+    let binding = fs::read_to_string(path).unwrap();
    binding.lines().map(|s| s.to_owned()).collect()
 }

-// Function to fetch and parse a feed, returning true if successful
-fn fetch_and_parse_feed(url: &str) -> bool {
-    let content = match get(url) {
-        Ok(response) => response.text().unwrap_or_default(),
-        Err(_) => return false,
-    };
-
-    parser::parse(content.as_bytes()).is_ok()
-}
-
-#[test]
-fn test_that_urls_point_to_valid_feeds() {
-    let urls = read_feed();
-
-    for url in urls {
-        assert!(
-            fetch_and_parse_feed(&url),
-            "Feed at URL failed validation: {}",
-            url
-        );
-    }
-}
-
 #[test]
 fn test_if_feeds_are_in_alphabetical_order() {
-    let mut urls = read_feed();
+    let mut urls = read_feed("feeds.txt");

    if !urls.windows(2).all(|w| w[0] < w[1]) {
        println!("Sorted feeds.txt:");
@@ -46,3 +22,10 @@ fn test_if_feeds_are_in_alphabetical_order() {
        panic!("feeds.txt was not sorted!")
    }
 }
+
+#[test]
+fn test_if_feeds_lists_have_overlapping_feed() {
+    let set1: HashSet<_> = read_feed("feeds.txt").into_iter().collect();
+    let set2: HashSet<_> = read_feed("featured_feeds.txt").into_iter().collect();
+    assert!(set1.is_disjoint(&set2));
+}