diff --git a/featured_feeds.txt b/featured_feeds.txt new file mode 100644 index 0000000..9380a0b --- /dev/null +++ b/featured_feeds.txt @@ -0,0 +1,2 @@ +https://ciechanow.ski/atom.xml +https://factorio.com/blog/rss \ No newline at end of file diff --git a/feeds.txt b/feeds.txt index e7a8627..7d3bd3f 100644 --- a/feeds.txt +++ b/feeds.txt @@ -1,11 +1,13 @@ https://0xd34df00d.me/rss.xml https://adamsilver.io/atom.xml https://alexturek.com/feed +https://api.quantamagazine.org/feed/ https://austinhenley.com/blog/feed.rss https://bcantrill.dtrace.org/feed/ https://blog.andymatuschak.org/rss https://blog.benjojo.co.uk/rss.xml https://blog.codinghorror.com/rss/ +https://blog.frost.kiwi/feed.xml https://calebporzio.com/feed https://chrisnicholas.dev/rss.xml https://ciechanow.ski/atom.xml @@ -14,10 +16,14 @@ https://decomposition.al/atom.xml https://fabiensanglard.net/rss.xml https://factorio.com/blog/rss https://fasterthanli.me/index.xml +https://gaultier.github.io/blog/feed.xml +https://granary.io/url?input=html&output=rss&url=https://jamesg.blog +https://jatan.space/rss/ https://jvns.ca/atom.xml https://kinduff.com/feed.xml https://magnuschatt.medium.com/feed https://mary.codes/rss.xml +https://matklad.github.io/feed.xml https://mtlynch.io/posts/index.xml https://newsletter.danhon.com/rss https://notes.ansonbiggs.com/rss/ @@ -26,8 +32,14 @@ https://orbitalindex.com/feed.xml https://projects.ansonbiggs.com/index.xml https://simonwillison.net/atom/entries/ https://stephango.com/feed.xml +https://steveklabnik.com/feed.xml https://taylor.town/feed.xml +https://unzip.dev/rss/ +https://vickiboykis.com/index.xml https://www.construction-physics.com/feed https://www.elidedbranches.com/feeds/posts/default https://www.jeffgeerling.com/blog.xml -https://xeiaso.net/blog.rss +https://www.makerstations.io/rss/ +https://www.micahlerner.com/feed.xml +https://www.troyhunt.com/rss/ +https://xeiaso.net/blog.rss \ No newline at end of file diff --git a/output/style.css b/output/style.css index e69de29..49754b2 100644 --- a/output/style.css +++ b/output/style.css @@ -0,0 +1,6 @@ +.featured-title { + display: flex; + flex-direction: column; + justify-content: center; + align-items: center; +} \ No newline at end of file diff --git a/src/index_generator.rs b/src/index_generator.rs new file mode 100644 index 0000000..e369bc6 --- /dev/null +++ b/src/index_generator.rs @@ -0,0 +1,291 @@ +extern crate chrono; +extern crate feed_rs; +extern crate maud; +extern crate reqwest; + +use chrono::DateTime; +use chrono::Utc; +use feed_rs::model::Entry; +use maud::{html, Markup}; +use std::env; + +use crate::utilities; +use crate::web_fetchers; + +fn create_featured_card(entry: &Entry) -> Markup { + let title = entry + .title + .as_ref() + .map_or_else(|| "".to_string(), |t| t.content.clone()); + + let link = entry.links.first().unwrap(); + let lang = link.clone().href_lang.unwrap_or("en".to_string()); + + if lang != "en" { + println!("Non english! {} {}", lang, link.href); + } + + let mut image_url = entry + .media + .first() + .and_then(|m| m.content.first()) + .and_then(|c| c.url.as_ref().map(|u| u.to_string())) + .unwrap_or_default(); + + // Fallback to fetching social image if direct extraction didn't work + if image_url.is_empty() { + println!( + "Falling back to searching for a social image for {}", + link.href + ); + image_url = web_fetchers::fetch_social_image(link.href.as_str()).unwrap_or_default(); + } + + let description = entry.content.as_ref().map_or_else( + || { + entry + .summary + .as_ref() + .map_or_else(|| "".to_string(), |summary| summary.content.clone()) + }, + |content| { + content + .body + .as_ref() + .map_or_else(|| "".to_string(), |body| body.clone()) + }, + ); + + let cleaned_description = utilities::strip_html_tags(&description); + let truncated_description = utilities::truncate_description(&cleaned_description, 500); + + let main_url = utilities::get_root_url(link.href.as_str()); + dbg!(main_url); + + html! { + article { + header class="grid" { + img src=(image_url) alt="Entry image"; + hgroup class="featured-title" { + h2 { (title) } + a href=(format!("http://{}", main_url)) { (main_url) } + } + } + body { + p { (truncated_description) } + } + footer { + a class="grid" href=(link.href) style="--pico-text-decoration: none;" { + button class="outline primary" { "Read Post" } + } + } + } + } +} + +fn create_post_card(entry: &Entry) -> Markup { + let title = entry + .title + .as_ref() + .map_or_else(|| "".to_string(), |t| t.content.clone()); + + let link = entry.links.first().unwrap(); + let lang = link.clone().href_lang.unwrap_or("en".to_string()); + + if lang != "en" { + println!("Non english! {} {}", lang, link.href); + } + + let mut image_url = entry + .media + .first() + .and_then(|m| m.content.first()) + .and_then(|c| c.url.as_ref().map(|u| u.to_string())) + .unwrap_or_default(); + + // Fallback to fetching social image if direct extraction didn't work + if image_url.is_empty() { + println!( + "Falling back to searching for a social image for {}", + link.href + ); + image_url = web_fetchers::fetch_social_image(link.href.as_str()).unwrap_or_default(); + } + + let description = entry.content.as_ref().map_or_else( + || { + entry + .summary + .as_ref() + .map_or_else(|| "".to_string(), |summary| summary.content.clone()) + }, + |content| { + content + .body + .as_ref() + .map_or_else(|| "".to_string(), |body| body.clone()) + }, + ); + + let cleaned_description = utilities::strip_html_tags(&description); + let truncated_description = utilities::truncate_description(&cleaned_description, 500); + + let main_url = utilities::get_root_url(link.href.as_str()); + + html! { + article { + header { + hgroup { + h2 { (title) } + a href=(format!("http://{}", main_url)) { (main_url) } + } + } + body { + @if !image_url.is_empty() { + img src=(image_url) alt="Entry image"; + p; + } + p { (truncated_description) } + } + footer { + a class="grid" href=(link.href) style="--pico-text-decoration: none;" { + button class="outline secondary" { "Read Post" } + } + } + } + } +} + +fn generate_footer() -> Markup { + let utc: DateTime = Utc::now(); + let formatted_utc = utc.format("%Y-%m-%d %H:%M:%S").to_string(); + + html! { + footer class="container" { + small { + p { + a href="https://ansonbiggs.com" { "Anson Biggs" } + " - " + a href="https://gitlab.com/MisterBiggs/zine" { "Source Code" } + " - " + "Page generated at: " em data-tooltip="8AM Mountain Time" { (formatted_utc) " UTC" } + } + } + } + } +} +fn generate_header() -> Markup { + html! { + header { + nav { + ul { + li { h1 { "Anson's Aggregated Feed" }} + } + ul { + li { button data-target="about" onclick="toggleModal(event)" { "About" } } + li { + details class="dropdown" { + summary role="button" class="outline secondary" { "Theme" } + ul { + li { a href="#" data-theme-switcher="auto" { "Auto" }} + li { a href="#" data-theme-switcher="light" { "Light" }} + li { a href="#" data-theme-switcher="dark" { "Dark" }} + } + } + } + } + } + } + } +} + +fn about_modal(entries: Vec) -> Markup { + // Get link for each entry, which is a blog post then, + // convert it to a url to the main page of the blog + let mut links = entries + .iter() + .map(|entry| entry.links.first().unwrap().href.as_str()) + .map(utilities::get_root_url) + .collect::>() + .into_iter() + .collect::>(); + + // Alphabetical to be fair to everytone :) + links.sort(); + + html! { + dialog id="about" { + article { + header { + a href="#" aria-label="Close" rel="prev" {} + p { strong { "About" }} + } + p { + "When looking for a RSS reader I came across " + a href="https://news.russellsaw.io/" {"news.russellsaw.io"} + " I thought the idea of building my own personalised newspaper was cool. \ + So, I decided to build a clone using my own subscribed RSS feeds." + } + p { + "This page updates daily at 8:11ish AM Mountain Time. The following blogs are" + " featured on the page currently:" + } + ul { + @for link in links { + li {a href=(link) {(link)}} + } + } + p { + "For the full list of feeds that are followed see the raw list " + a href="https://gitlab.com/MisterBiggs/zine/-/blob/master/feeds.txt" { "here." } + } + } + } + } +} + +pub fn generate_index(mut entries: Vec) -> Markup { + let running_in_gitlab = env::var("CI").map(|val| val == "true").unwrap_or(false); + + let featured = entries.remove(0); + if running_in_gitlab { + println!("Building for deployment"); + entries.truncate(30); + } else { + println!("Building for development"); + entries.truncate(6); + } + + html! { + (maud::DOCTYPE) + html { + head { + title { "Anson's Zine" } + meta charset="utf-8"; + meta name="viewport" content="width=device-width, initial-scale=1"; + meta name="description" content="Aggregate newspaper of RSS feeds for Anson" + link rel="icon" href="data:image/svg+xml,📰"; + link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@picocss/pico@2/css/pico.blue.min.css"; + link rel="stylesheet" href="style.css"; + } + body { main class="container" { + {(generate_header())} + (create_featured_card(&featured)) + div class="grid" { + @for column_entries in utilities::group_by_nth(&entries, 3) { + div { + @for entry in column_entries { + {(create_post_card(&entry))} + } + } + + } + } + {(generate_footer())} + {(about_modal(entries))} + script src="modal.js" {} + script src="minimal-theme-switcher.js" {} + }} + } + } +} diff --git a/src/main.rs b/src/main.rs index be0d57f..83d3252 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,333 +3,29 @@ extern crate feed_rs; extern crate maud; extern crate reqwest; -use chrono::DateTime; -use chrono::Utc; -use feed_rs::model::Entry; -use feed_rs::parser; -use maud::{html, Markup}; -use reqwest::blocking::get; -use scraper::{Html, Selector}; -use std::cmp::Reverse; -use std::env; use std::error::Error; -use std::fs; use std::fs::write; use std::fs::DirBuilder; use std::path::Path; +use utilities::read_feed; -use rayon::prelude::*; - -fn fetch_feed(url: &str) -> Result, Box> { - let content = get(url)?.text()?; - let feed = parser::parse(content.as_bytes())?; - println!("\tFeed {} returned {} items", url, feed.entries.len()); - Ok(feed.entries) -} - -fn fetch_social_image(url: &str) -> Result> { - let html = reqwest::blocking::get(url)?.text()?; - let document = Html::parse_document(&html); - let selector = Selector::parse("meta[property=\"og:image\"]").unwrap(); - - let image_url = document - .select(&selector) - .next() - .and_then(|element| element.value().attr("content")) - .unwrap_or(""); - - Ok(image_url.to_string()) -} -fn create_html_card(entry: &Entry) -> Markup { - let title = entry - .title - .as_ref() - .map_or_else(|| "".to_string(), |t| t.content.clone()); - - let link = entry.links.first().unwrap(); - let lang = link.clone().href_lang.unwrap_or("en".to_string()); - - if lang != "en" { - println!("Non english! {} {}", lang, link.href); - } - - let mut image_url = entry - .media - .first() - .and_then(|m| m.content.first()) - .and_then(|c| c.url.as_ref().map(|u| u.to_string())) - .unwrap_or_default(); - - // Fallback to fetching social image if direct extraction didn't work - if image_url.is_empty() { - println!( - "Falling back to searching for a social image for {}", - link.href - ); - image_url = fetch_social_image(link.href.as_str()).unwrap_or_default(); - } - - let description = entry.content.as_ref().map_or_else( - || { - entry - .summary - .as_ref() - .map_or_else(|| "".to_string(), |summary| summary.content.clone()) - }, - |content| { - content - .body - .as_ref() - .map_or_else(|| "".to_string(), |body| body.clone()) - }, - ); - - let cleaned_description = strip_html_tags(&description); - let truncated_description = truncate_description(&cleaned_description, 500); - - let main_url = get_root_url(link.href.as_str()); - - html! { - article { - header { - hgroup { - h2 { (title) } - a href=(format!("http://{}", main_url)) { (main_url) } - } - } - body { - @if !image_url.is_empty() { - img src=(image_url) alt="Entry image"; - p; - } - p { (truncated_description) } - } - footer { - a class="grid" href=(link.href) style="--pico-text-decoration: none;" { - button class="outline secondary" { "Read Post" } - } - } - } - } -} - -fn get_root_url(input_url: &str) -> &str { - let mut url = input_url; - - url = url.strip_prefix("https://").unwrap_or(url); - url = url.strip_prefix("http://").unwrap_or(url); - - url.split_once('/').unwrap().0 -} - -fn truncate_description(description: &str, max_length: usize) -> String { - let description_trimmed = description.trim(); - if description_trimmed.len() > max_length { - let mut char_boundary = max_length; - for (idx, _) in description_trimmed.char_indices() { - if idx > max_length { - break; - } - char_boundary = idx; - } - format!("{}...", &description_trimmed[..char_boundary]) - } else { - description_trimmed.to_string() - } -} - -fn strip_html_tags(html: &str) -> String { - let document = Html::parse_document(html); - // Use the wildcard selector to select all nodes and extract their text. - let selector = Selector::parse("*").unwrap(); - let mut text_content = String::new(); - - for element in document.select(&selector) { - let text = element.text().collect::>().join(" "); - text_content.push_str(&text); - text_content.push(' '); - } - - text_content.trim().to_string() -} - -fn generate_header() -> Markup { - html! { - header { - nav { - ul { - li { h1 { "Anson's Aggregated Feed" }} - } - ul { - li { button data-target="about" onclick="toggleModal(event)" { "About" } } - li { - details class="dropdown" { - summary role="button" class="outline secondary" { "Theme" } - ul { - li { a href="#" data-theme-switcher="auto" { "Auto" }} - li { a href="#" data-theme-switcher="light" { "Light" }} - li { a href="#" data-theme-switcher="dark" { "Dark" }} - } - } - } - } - } - } - } -} - -fn about_modal(entries: Vec) -> Markup { - // Get link for each entry, which is a blog post then, - // convert it to a url to the main page of the blog - let mut links = entries - .iter() - .map(|entry| entry.links.first().unwrap().href.as_str()) - .map(get_root_url) - .collect::>() - .into_iter() - .collect::>(); - - // Alphabetical to be fair to everytone :) - links.sort(); - - html! { - dialog id="about" { - article { - header { - a href="#" aria-label="Close" rel="prev" {} - p { strong { "About" }} - } - p { - "When looking for a RSS reader I came across " - a href="https://news.russellsaw.io/" {"news.russellsaw.io"} - " I thought the idea of building my own personalised newspaper was cool. \ - So, I decided to build a clone using my own subscribed RSS feeds." - } - p { - "This page updates daily at 8:11ish AM Mountain Time. The following blogs are" - " featured on the page currently:" - } - ul { - @for link in links { - li {a href=(link) {(link)}} - } - } - p { - "For the full list of feeds that are followed see the raw list " - a href="https://gitlab.com/MisterBiggs/zine/-/blob/master/feeds.txt" { "here." } - } - } - } - } -} - -fn generate_footer() -> Markup { - let utc: DateTime = Utc::now(); - let formatted_utc = utc.format("%Y-%m-%d %H:%M:%S").to_string(); - - html! { - footer class="container" { - small { - p { - a href="https://ansonbiggs.com" { "Anson Biggs" } - " - " - a href="https://gitlab.com/MisterBiggs/zine" { "Source Code" } - " - " - "Page generated at: " em data-tooltip="8AM Mountain Time" { (formatted_utc) " UTC" } - } - } - } - } -} - -fn group_by_nth(slice: &[T], n: usize) -> Vec> { - (0..n) - .map(|i| { - slice - .iter() - .enumerate() - .filter_map(|(index, value)| { - if index % n == i { - Some(value.clone()) - } else { - None - } - }) - .collect() - }) - .collect() -} - -fn generate_index(mut entries: Vec) -> Markup { - let running_in_gitlab = env::var("CI").map(|val| val == "true").unwrap_or(false); - - if running_in_gitlab { - println!("Building for deployment"); - entries.truncate(30); - } else { - println!("Building for development"); - entries.truncate(10); - } - - html! { - (maud::DOCTYPE) - html { - head { - title { "Anson's Zine" } - meta charset="utf-8"; - meta name="viewport" content="width=device-width, initial-scale=1"; - meta name="description" content="Aggregate newspaper of RSS feeds for Anson" - link rel="icon" href="data:image/svg+xml,📰"; - link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@picocss/pico@2/css/pico.blue.min.css"; - link rel="stylesheet" href="style.css"; - } - body { main class="container" { - {(generate_header())} - div class="grid" { - @for column_entries in group_by_nth(&entries, 3) { - div { - @for entry in column_entries { - {(create_html_card(&entry))} - } - } - - } - } - {(generate_footer())} - {(about_modal(entries))} - script src="modal.js" {} - script src="minimal-theme-switcher.js" {} - }} - } - } -} +mod index_generator; +mod utilities; +mod web_fetchers; fn main() -> Result<(), Box> { - let binding = fs::read_to_string("feeds.txt").unwrap(); - let feed_urls: Vec<&str> = binding.lines().collect(); + let featured = read_feed("featured_feeds.txt").first().unwrap().clone(); + let mut entries = read_feed("feeds.txt"); - println!("Fetching feeds:"); - let raw_entries: Vec, String>> = feed_urls - .into_par_iter() - .map(|url| { - fetch_feed(url).map_err(|e| format!("Failed to fetch or parse feed {}: {}", url, e)) - }) - .collect(); + // Move featured article to the front + entries.retain(|entry| entry != &featured); + entries.insert(0, featured); + println!( + "Featured article: {}", + entries[0].links.first().unwrap().href.as_str() + ); - let mut entries: Vec = Vec::new(); - for entry in raw_entries { - match entry { - Ok(mut feed_entries) => entries.append(&mut feed_entries), - Err(e) => println!("{}", e), - } - } - - // Remove any entries that don't have a timestamp, and then sort by timestamps - entries.retain(|entry| entry.published.is_some() || entry.updated.is_some()); - entries - .sort_by_key(|entry| Reverse(entry.published.unwrap_or(entry.updated.unwrap_or_default()))); - - let html_string = generate_index(entries).into_string(); + let index = index_generator::generate_index(entries); let output_path = Path::new("output/index.html"); DirBuilder::new() @@ -337,7 +33,7 @@ fn main() -> Result<(), Box> { .create(output_path.parent().unwrap()) .unwrap(); - match write(output_path, html_string) { + match write(output_path, index.into_string()) { Ok(_) => println!("Successfully wrote to {}", output_path.display()), Err(e) => eprintln!("Failed to write to {}: {}", output_path.display(), e), } diff --git a/src/utilities.rs b/src/utilities.rs new file mode 100644 index 0000000..acc35ca --- /dev/null +++ b/src/utilities.rs @@ -0,0 +1,93 @@ +use crate::web_fetchers; +use feed_rs::model::Entry; +use rayon::prelude::*; +use scraper::{Html, Selector}; +use std::cmp::Reverse; +use std::fs; + +pub fn read_feed(path: &str) -> Vec { + let binding = fs::read_to_string(path).unwrap(); + let feed_urls: Vec<&str> = binding.lines().collect(); + + println!("Fetching feeds:"); + let raw_entries: Vec, String>> = feed_urls + .into_par_iter() + .map(|url| { + web_fetchers::fetch_feed(url) + .map_err(|e| format!("Failed to fetch or parse feed {}: {}", url, e)) + }) + .collect(); + + let mut entries: Vec = Vec::new(); + for entry in raw_entries { + match entry { + Ok(mut feed_entries) => entries.append(&mut feed_entries), + Err(e) => println!("{}", e), + } + } + + // Remove any entries that don't have a timestamp, and then sort by timestamps + entries.retain(|entry| entry.published.is_some() || entry.updated.is_some()); + entries + .sort_by_key(|entry| Reverse(entry.published.unwrap_or(entry.updated.unwrap_or_default()))); + + entries +} + +pub fn get_root_url(input_url: &str) -> &str { + let mut url = input_url; + + url = url.strip_prefix("https://").unwrap_or(url); + url = url.strip_prefix("http://").unwrap_or(url); + + url.split_once('/').unwrap().0 +} + +pub fn truncate_description(description: &str, max_length: usize) -> String { + let description_trimmed = description.trim(); + if description_trimmed.len() > max_length { + let mut char_boundary = max_length; + for (idx, _) in description_trimmed.char_indices() { + if idx > max_length { + break; + } + char_boundary = idx; + } + format!("{}...", &description_trimmed[..char_boundary]) + } else { + description_trimmed.to_string() + } +} + +pub fn strip_html_tags(html: &str) -> String { + let document = Html::parse_document(html); + // Use the wildcard selector to select all nodes and extract their text. + let selector = Selector::parse("*").unwrap(); + let mut text_content = String::new(); + + for element in document.select(&selector) { + let text = element.text().collect::>().join(" "); + text_content.push_str(&text); + text_content.push(' '); + } + + text_content.trim().to_string() +} + +pub fn group_by_nth(slice: &[T], n: usize) -> Vec> { + (0..n) + .map(|i| { + slice + .iter() + .enumerate() + .filter_map(|(index, value)| { + if index % n == i { + Some(value.clone()) + } else { + None + } + }) + .collect() + }) + .collect() +} diff --git a/src/web_fetchers.rs b/src/web_fetchers.rs new file mode 100644 index 0000000..e96b620 --- /dev/null +++ b/src/web_fetchers.rs @@ -0,0 +1,28 @@ +use feed_rs::model::Entry; +use feed_rs::parser; + +use reqwest::blocking::get; +use scraper::{Html, Selector}; + +use std::error::Error; + +pub fn fetch_feed(url: &str) -> Result, Box> { + let content = get(url)?.text()?; + let feed = parser::parse(content.as_bytes())?; + println!("\tFeed {} returned {} items", url, feed.entries.len()); + Ok(feed.entries) +} + +pub fn fetch_social_image(url: &str) -> Result> { + let html = reqwest::blocking::get(url)?.text()?; + let document = Html::parse_document(&html); + let selector = Selector::parse("meta[property=\"og:image\"]").unwrap(); + + let image_url = document + .select(&selector) + .next() + .and_then(|element| element.value().attr("content")) + .unwrap_or(""); + + Ok(image_url.to_string()) +}