zinetest/src/main.rs

extern crate feed_rs;
extern crate maud;
extern crate reqwest;

use feed_rs::model::Entry;
use feed_rs::parser;
use maud::{html, Markup};
use reqwest::blocking::get;
use scraper::{Html, Selector};
use std::cmp::Reverse;
use std::error::Error;
use std::fs;
use std::fs::write;
use std::fs::DirBuilder;
use std::path::Path;

use rayon::prelude::*;

fn fetch_feed(url: &str) -> Result<Vec<Entry>, Box<dyn Error>> {
    let content = get(url)?.text()?;
    let feed = parser::parse(content.as_bytes())?;
    println!("Feed {} returned {} items", url, feed.entries.len());
    Ok(feed.entries)
}

fn fetch_social_image(url: &str) -> Result<String, Box<dyn std::error::Error>> {
    let html = reqwest::blocking::get(url)?.text()?;
    let document = Html::parse_document(&html);
    let selector = Selector::parse("meta[property=\"og:image\"]").unwrap();

    let image_url = document
        .select(&selector)
        .next()
        .and_then(|element| element.value().attr("content"))
        .unwrap_or("");

    Ok(image_url.to_string())
}
fn create_html_card(entry: &Entry) -> Markup {
    let title = entry
        .title
        .as_ref()
        .map_or_else(|| "".to_string(), |t| t.content.clone());

    let link = entry.links.first().unwrap();
    let lang = link.clone().href_lang.unwrap_or("en".to_string());

    if lang != "en" {
        println!("Non english! {} {}", lang, link.href);
    }

    let mut image_url = entry
        .media
        .first()
        .and_then(|m| m.content.first())
        .and_then(|c| c.url.as_ref().map(|u| u.to_string()))
        .unwrap_or_default();

    // Fallback to fetching social image if direct extraction didn't work
    if image_url.is_empty() {
        println!(
            "Falling back to searching for a social image for {}",
            link.href
        );
        image_url = fetch_social_image(link.href.as_str()).unwrap_or_default();
    }

    let description = entry.content.as_ref().map_or_else(
        || {
            entry
                .summary
                .as_ref()
                .map_or_else(|| "".to_string(), |summary| summary.content.clone())
        },
        |content| {
            content
                .body
                .as_ref()
                .map_or_else(|| "".to_string(), |body| body.clone())
        },
    );

    let cleaned_description = strip_html_tags(&description);
    let truncated_description = truncate_description(&cleaned_description, 500);

    let main_url = get_root_url(link.href.as_str());

    html! {
        article {
            header {
                hgroup {
                    h2 { (title) }
                    a href=(format!("http://{}", main_url)) { (main_url) }
                }
            }
            body {
                @if !image_url.is_empty() {
                    img src=(image_url) alt="Entry image";
                    p;
                }
                p { (truncated_description) }
            }
            footer {
                a class="grid" href=(link.href) style="--pico-text-decoration: none;" {
                    button class="outline secondary" { "Read Post" }
                }
            }
        }
    }
}

fn get_root_url(input_url: &str) -> &str {
    let mut url = input_url;

    url = url.strip_prefix("https://").unwrap_or(url);
    url = url.strip_prefix("http://").unwrap_or(url);

    url.split_once('/').unwrap().0
}

fn truncate_description(description: &str, max_length: usize) -> String {
    let description_trimmed = description.trim();
    if description_trimmed.len() > max_length {
        let mut char_boundary = max_length;
        for (idx, _) in description_trimmed.char_indices() {
            if idx > max_length {
                break;
            }
            char_boundary = idx;
        }
        format!("{}...", &description_trimmed[..char_boundary])
    } else {
        description_trimmed.to_string()
    }
}

fn strip_html_tags(html: &str) -> String {
    let document = Html::parse_document(html);
    // Use the wildcard selector to select all nodes and extract their text.
    let selector = Selector::parse("*").unwrap();
    let mut text_content = String::new();

    for element in document.select(&selector) {
        let text = element.text().collect::<Vec<_>>().join(" ");
        text_content.push_str(&text);
        text_content.push(' ');
    }

    text_content.trim().to_string()
}

fn generate_header() -> Markup {
    html! {
        header {
            nav {
                ul {
                    li { h1 { "Anson's Aggregated Feed" }}
                }
                ul {
                    li { button data-target="about" onclick="toggleModal(event)" { "About" } }
                    li {
                        details class="dropdown" {
                            summary role="button" class="outline secondary" { "Theme" }
                            ul {
                                li { a href="#" data-theme-switcher="auto" { "Auto" }}
                                li { a href="#" data-theme-switcher="light" { "Light" }}
                                li { a href="#" data-theme-switcher="dark" { "Dark" }}
                            }
                        }
                    }
                }
            }
        }
    }
}

fn about_modal(entries: Vec<Entry>) -> Markup {
    // Get link for each entry, which is a blog post then,
    // convert it to a url to the main page of the blog
    let mut links = entries
        .iter()
        .map(|entry| entry.links.first().unwrap().href.as_str())
        .map(get_root_url)
        .collect::<std::collections::HashSet<&str>>()
        .into_iter()
        .collect::<Vec<&str>>();

    // Alphabetical to be fair to everytone :)
    links.sort();

    html! {
        dialog id="about" {
            article {
                header {
                    a href="#" aria-label="Close" rel="prev" {}
                    p { strong { "About" }}
                }
                p {
                    "When looking for a RSS reader I came across "
                    a href="https://news.russellsaw.io/" {"news.russellsaw.io"}
                    " I thought the idea of building my own personalised newspaper was cool. \
                    So, I decided to build a clone using my own subscribed RSS feeds."
                }
                p {
                    "This page updates daily at 8:11ish AM Mountain Time. The following blogs are"
                    " in the subscription list:"
                }
                ul {
                    @for link in links {
                        li {a href=(link) {(link)}}
                    }
                }
            }
        }
    }
}

fn generate_footer() -> Markup {
    html! {
        footer class="container" {
            small {
                p {
                    a href="https://ansonbiggs.com" { "Anson Biggs" }
                    " - 2024 - "
                    a href="https://gitlab.com/MisterBiggs/zine" { "Source Code" }
                }
            }
        }
    }
}
fn group_by_nth<T: Clone>(slice: &[T], n: usize) -> Vec<Vec<T>> {
    (0..n)
        .map(|i| {
            slice
                .iter()
                .enumerate()
                .filter_map(|(index, value)| {
                    if index % n == i {
                        Some(value.clone())
                    } else {
                        None
                    }
                })
                .collect()
        })
        .collect()
}

fn generate_index(entries: Vec<Entry>) -> Markup {
    html! {
        (maud::DOCTYPE)
        html {
            head {
                title { "Anson's Zine" }
                meta charset="utf-8";
                meta name="viewport" content="width=device-width, initial-scale=1";
                meta name="description" content="Aggregate newspaper of RSS feeds for Anson"
                link rel="icon" href="data:image/svg+xml,<svg xmlns=%22http://www.w3.org/2000/svg%22 viewBox=%220 0 100 100%22><text y=%22.9em%22 font-size=%2290%22>📰</text></svg>";
                link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@picocss/pico@2/css/pico.blue.min.css";
                link rel="stylesheet" href="style.css";
            }
            body { main class="container" {
                {(generate_header())}
                div class="grid" {
                    @for column_entries in group_by_nth(&entries, 3) {
                        div {
                            @for entry in column_entries {
                                {(create_html_card(&entry))}
                            }
                        }

                    }
                }
                {(generate_footer())}
                {(about_modal(entries))}
                script src="modal.js" {}
                script src="minimal-theme-switcher.js" {}
            }}
        }
    }
}

fn main() -> Result<(), Box<dyn Error>> {
    let binding = fs::read_to_string("feeds.txt").unwrap();
    let feed_urls: Vec<&str> = binding.lines().collect();

    let raw_entries: Vec<Result<Vec<Entry>, String>> = feed_urls
        .into_par_iter()
        .map(|url| {
            fetch_feed(url).map_err(|e| format!("Failed to fetch or parse feed {}: {}", url, e))
        })
        .collect();

    // Flatten the entries and filter out the errors
    let mut entries: Vec<Entry> = Vec::new();
    for entry in raw_entries {
        match entry {
            Ok(mut feed_entries) => entries.append(&mut feed_entries),
            Err(e) => println!("{}", e),
        }
    }
    // Remove any entries that don't have a timestamp, and then sort by timestamps
    entries.retain(|entry| entry.published.is_some() || entry.updated.is_some());
    entries
        .sort_by_key(|entry| Reverse(entry.published.unwrap_or(entry.updated.unwrap_or_default())));

    entries.truncate(30);

    let html_string = generate_index(entries).into_string();

    let output_path = Path::new("output/index.html");
    DirBuilder::new()
        .recursive(true)
        .create(output_path.parent().unwrap())
        .unwrap();

    match write(output_path, html_string) {
        Ok(_) => println!("Successfully wrote to {}", output_path.display()),
        Err(e) => eprintln!("Failed to write to {}: {}", output_path.display(), e),
    }

    Ok(())
}