extern crate chrono; extern crate feed_rs; extern crate maud; extern crate reqwest; use chrono::DateTime; use chrono::Utc; use feed_rs::model::Entry; use feed_rs::parser; use maud::{html, Markup}; use reqwest::blocking::get; use scraper::{Html, Selector}; use std::cmp::Reverse; use std::error::Error; use std::fs; use std::fs::write; use std::fs::DirBuilder; use std::path::Path; use rayon::prelude::*; fn fetch_feed(url: &str) -> Result, Box> { let content = get(url)?.text()?; let feed = parser::parse(content.as_bytes())?; println!("Feed {} returned {} items", url, feed.entries.len()); Ok(feed.entries) } fn fetch_social_image(url: &str) -> Result> { let html = reqwest::blocking::get(url)?.text()?; let document = Html::parse_document(&html); let selector = Selector::parse("meta[property=\"og:image\"]").unwrap(); let image_url = document .select(&selector) .next() .and_then(|element| element.value().attr("content")) .unwrap_or(""); Ok(image_url.to_string()) } fn create_html_card(entry: &Entry) -> Markup { let title = entry .title .as_ref() .map_or_else(|| "".to_string(), |t| t.content.clone()); let link = entry.links.first().unwrap(); let lang = link.clone().href_lang.unwrap_or("en".to_string()); if lang != "en" { println!("Non english! {} {}", lang, link.href); } let mut image_url = entry .media .first() .and_then(|m| m.content.first()) .and_then(|c| c.url.as_ref().map(|u| u.to_string())) .unwrap_or_default(); // Fallback to fetching social image if direct extraction didn't work if image_url.is_empty() { println!( "Falling back to searching for a social image for {}", link.href ); image_url = fetch_social_image(link.href.as_str()).unwrap_or_default(); } let description = entry.content.as_ref().map_or_else( || { entry .summary .as_ref() .map_or_else(|| "".to_string(), |summary| summary.content.clone()) }, |content| { content .body .as_ref() .map_or_else(|| "".to_string(), |body| body.clone()) }, ); let cleaned_description = strip_html_tags(&description); let truncated_description = truncate_description(&cleaned_description, 500); let main_url = get_root_url(link.href.as_str()); html! { article { header { hgroup { h2 { (title) } a href=(format!("http://{}", main_url)) { (main_url) } } } body { @if !image_url.is_empty() { img src=(image_url) alt="Entry image"; p; } p { (truncated_description) } } footer { a class="grid" href=(link.href) style="--pico-text-decoration: none;" { button class="outline secondary" { "Read Post" } } } } } } fn get_root_url(input_url: &str) -> &str { let mut url = input_url; url = url.strip_prefix("https://").unwrap_or(url); url = url.strip_prefix("http://").unwrap_or(url); url.split_once('/').unwrap().0 } fn truncate_description(description: &str, max_length: usize) -> String { let description_trimmed = description.trim(); if description_trimmed.len() > max_length { let mut char_boundary = max_length; for (idx, _) in description_trimmed.char_indices() { if idx > max_length { break; } char_boundary = idx; } format!("{}...", &description_trimmed[..char_boundary]) } else { description_trimmed.to_string() } } fn strip_html_tags(html: &str) -> String { let document = Html::parse_document(html); // Use the wildcard selector to select all nodes and extract their text. let selector = Selector::parse("*").unwrap(); let mut text_content = String::new(); for element in document.select(&selector) { let text = element.text().collect::>().join(" "); text_content.push_str(&text); text_content.push(' '); } text_content.trim().to_string() } fn generate_header() -> Markup { html! { header { nav { ul { li { h1 { "Anson's Aggregated Feed" }} } ul { li { button data-target="about" onclick="toggleModal(event)" { "About" } } li { details class="dropdown" { summary role="button" class="outline secondary" { "Theme" } ul { li { a href="#" data-theme-switcher="auto" { "Auto" }} li { a href="#" data-theme-switcher="light" { "Light" }} li { a href="#" data-theme-switcher="dark" { "Dark" }} } } } } } } } } fn about_modal(entries: Vec) -> Markup { // Get link for each entry, which is a blog post then, // convert it to a url to the main page of the blog let mut links = entries .iter() .map(|entry| entry.links.first().unwrap().href.as_str()) .map(get_root_url) .collect::>() .into_iter() .collect::>(); // Alphabetical to be fair to everytone :) links.sort(); html! { dialog id="about" { article { header { a href="#" aria-label="Close" rel="prev" {} p { strong { "About" }} } p { "When looking for a RSS reader I came across " a href="https://news.russellsaw.io/" {"news.russellsaw.io"} " I thought the idea of building my own personalised newspaper was cool. \ So, I decided to build a clone using my own subscribed RSS feeds." } p { "This page updates daily at 8:11ish AM Mountain Time. The following blogs are" " featured on the page currently:" } ul { @for link in links { li {a href=(link) {(link)}} } } p { "For the full list of feeds that are followed see the raw list " a href="https://gitlab.com/MisterBiggs/zine/-/blob/master/feeds.txt" { "here." } } } } } } fn generate_footer() -> Markup { let utc: DateTime = Utc::now(); let formatted_utc = utc.format("%Y-%m-%d %H:%M:%S").to_string(); html! { footer class="container" { small { p { a href="https://ansonbiggs.com" { "Anson Biggs" } " - " a href="https://gitlab.com/MisterBiggs/zine" { "Source Code" } " - " "Page generated at: " em data-tooltip="8AM Mountain Time" { (formatted_utc) " UTC" } } } } } } fn group_by_nth(slice: &[T], n: usize) -> Vec> { (0..n) .map(|i| { slice .iter() .enumerate() .filter_map(|(index, value)| { if index % n == i { Some(value.clone()) } else { None } }) .collect() }) .collect() } fn generate_index(entries: Vec) -> Markup { html! { (maud::DOCTYPE) html { head { title { "Anson's Zine" } meta charset="utf-8"; meta name="viewport" content="width=device-width, initial-scale=1"; meta name="description" content="Aggregate newspaper of RSS feeds for Anson" link rel="icon" href="data:image/svg+xml,📰"; link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@picocss/pico@2/css/pico.blue.min.css"; link rel="stylesheet" href="style.css"; } body { main class="container" { {(generate_header())} div class="grid" { @for column_entries in group_by_nth(&entries, 3) { div { @for entry in column_entries { {(create_html_card(&entry))} } } } } {(generate_footer())} {(about_modal(entries))} script src="modal.js" {} script src="minimal-theme-switcher.js" {} }} } } } fn main() -> Result<(), Box> { let binding = fs::read_to_string("feeds.txt").unwrap(); let feed_urls: Vec<&str> = binding.lines().collect(); let raw_entries: Vec, String>> = feed_urls .into_par_iter() .map(|url| { fetch_feed(url).map_err(|e| format!("Failed to fetch or parse feed {}: {}", url, e)) }) .collect(); // Flatten the entries and filter out the errors let mut entries: Vec = Vec::new(); for entry in raw_entries { match entry { Ok(mut feed_entries) => entries.append(&mut feed_entries), Err(e) => println!("{}", e), } } // Remove any entries that don't have a timestamp, and then sort by timestamps entries.retain(|entry| entry.published.is_some() || entry.updated.is_some()); entries .sort_by_key(|entry| Reverse(entry.published.unwrap_or(entry.updated.unwrap_or_default()))); entries.truncate(30); let html_string = generate_index(entries).into_string(); let output_path = Path::new("output/index.html"); DirBuilder::new() .recursive(true) .create(output_path.parent().unwrap()) .unwrap(); match write(output_path, html_string) { Ok(_) => println!("Successfully wrote to {}", output_path.display()), Err(e) => eprintln!("Failed to write to {}: {}", output_path.display(), e), } Ok(()) }