From e5ff6551e35e9781ca74f0208a4e5453395bd9e7 Mon Sep 17 00:00:00 2001 From: Anson Biggs Date: Fri, 12 Apr 2024 06:21:56 +0000 Subject: [PATCH] Add RSS feed --- .gitignore | 3 +- Cargo.lock | 131 +++++++++++++++++++++++++++++++++++++++++- Cargo.toml | 3 +- README.md | 14 ++++- src/main.rs | 25 +++++--- src/site_generator.rs | 32 ++++++++++- 6 files changed, 195 insertions(+), 13 deletions(-) diff --git a/.gitignore b/.gitignore index daaa467..4f5b728 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /target -/output/index.html \ No newline at end of file +/output/index.html +/output/feed.xml \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 09806a4..eb3bcbc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -28,6 +28,7 @@ dependencies = [ "maud", "rayon", "reqwest", + "rss", "scraper", "simple_logger", ] @@ -81,6 +82,19 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" +[[package]] +name = "atom_syndication" +version = "0.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "571832dcff775e26562e8e6930cd483de5587301d40d3a3b85d532b6383e15a7" +dependencies = [ + "chrono", + "derive_builder", + "diligent-date-parser", + "never", + "quick-xml 0.30.0", +] + [[package]] name = "autocfg" version = "1.2.0" @@ -270,6 +284,41 @@ dependencies = [ "syn 2.0.58", ] +[[package]] +name = "darling" +version = "0.14.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b750cb3417fd1b327431a470f388520309479ab0bf5e323505daf0290cd3850" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.14.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "109c1ca6e6b7f82cc233a97004ea8ed7ca123a9af07a8230878fcfda9b158bf0" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn 1.0.109", +] + +[[package]] +name = "darling_macro" +version = "0.14.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4aab4dbc9f7611d8b55048a3a16d2d010c2c8334e46304b40ac1cc14bf3b48e" +dependencies = [ + "darling_core", + "quote", + "syn 1.0.109", +] + [[package]] name = "deranged" version = "0.3.11" @@ -279,6 +328,37 @@ dependencies = [ "powerfmt", ] +[[package]] +name = "derive_builder" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d67778784b508018359cbc8696edb3db78160bab2c2a28ba7f56ef6932997f8" +dependencies = [ + "derive_builder_macro", +] + +[[package]] +name = "derive_builder_core" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c11bdc11a0c47bc7d37d582b5285da6849c96681023680b906673c5707af7b0f" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "derive_builder_macro" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebcda35c7a396850a55ffeac740804b40ffec779b98fffbb1738f4033f0ee79e" +dependencies = [ + "derive_builder_core", + "syn 1.0.109", +] + [[package]] name = "derive_more" version = "0.99.17" @@ -290,6 +370,15 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "diligent-date-parser" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6cf7fe294274a222363f84bcb63cdea762979a0443b4cf1f4f8fd17c86b1182" +dependencies = [ + "chrono", +] + [[package]] name = "dirs" version = "1.0.5" @@ -368,7 +457,7 @@ dependencies = [ "chrono", "lazy_static", "mime", - "quick-xml", + "quick-xml 0.31.0", "regex", "serde", "serde_json", @@ -677,6 +766,12 @@ dependencies = [ "cc", ] +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + [[package]] name = "idna" version = "0.5.0" @@ -844,6 +939,12 @@ dependencies = [ "tempfile", ] +[[package]] +name = "never" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c96aba5aa877601bb3f6dd6a63a969e1f82e60646e81e71b14496995e9853c91" + [[package]] name = "new_debug_unreachable" version = "1.0.6" @@ -1140,6 +1241,16 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "quick-xml" +version = "0.30.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eff6510e86862b57b210fd8cbe8ed3f0d7d600b9c2863cd4549a2e033c66e956" +dependencies = [ + "encoding_rs", + "memchr", +] + [[package]] name = "quick-xml" version = "0.31.0" @@ -1307,6 +1418,18 @@ dependencies = [ "winreg", ] +[[package]] +name = "rss" +version = "2.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7b2c77eb4450d7d5f98df52c381cd6c4e19b75dad9209a9530b85a44510219a" +dependencies = [ + "atom_syndication", + "derive_builder", + "never", + "quick-xml 0.30.0", +] + [[package]] name = "rust-argon2" version = "0.8.3" @@ -1559,6 +1682,12 @@ dependencies = [ "quote", ] +[[package]] +name = "strsim" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" + [[package]] name = "syn" version = "1.0.109" diff --git a/Cargo.toml b/Cargo.toml index 22f0945..95fbc80 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,8 +7,6 @@ authors = ["Anson Biggs"] homepage = "https://zine.ansonbiggs.com" license = "MIT" -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - [dependencies] feed-rs = "1.4" reqwest = { version = "0.12", features = ["blocking"] } @@ -18,6 +16,7 @@ scraper = "0.19" rayon = "1.8" simple_logger = "4.3" log = "0.4" +rss = "2.0" [dev-dependencies] clippy = "0.0.302" diff --git a/README.md b/README.md index 805b9d4..ccdfeb2 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,18 @@ Anson's Aggregated Feed is a Rust application designed to aggregate content from multiple RSS feeds, creating a personalized news page. It fetches articles from specified RSS feed URLs, generates HTML cards for each entry, and outputs a single, styled HTML page. This project showcases the use of Rust for web content aggregation and manipulation, leveraging several powerful crates for parsing RSS feeds, handling dates and times, making HTTP requests, and rendering HTML. +## Algorithm + +Long term I would like some sort of algorithm that is a small improvement over just sorting the feed by posting date. Right now the only aspects taken into account are: + +- Whether the feed is in the featured list +- Publish date of the post +- Bonus points if the feed includes an image, negative points if there isn't even a social image. + ## Featured Feeds -These are feeds that are extremely high quality and don't post on a regular schedule. There are blogs (factorio for example) that I think are consistently high quality but I want to save this space for very special posts. \ No newline at end of file +These are feeds that are extremely high quality and don't post on a regular schedule. There are blogs (factorio for example) that I think are consistently high quality but I want to save this space for very special posts. + +## RSS Feed + +The site also aggregates everything that is on the homepage into an rss feed. Available at https://zine.ansonbiggs.com/feed.xml diff --git a/src/main.rs b/src/main.rs index deedea2..5e5209c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -50,17 +50,28 @@ fn main() -> Result<(), Box> { entries.retain(|entry| entry.score.is_positive()); entries.sort(); - let index = site_generator::generate_index(entries); - - let output_path = Path::new("output/index.html"); + let index = site_generator::generate_index(entries.clone()); + let index_path = Path::new("output/index.html"); DirBuilder::new() .recursive(true) - .create(output_path.parent().unwrap()) + .create(index_path.parent().unwrap()) .unwrap(); - match write(output_path, index.into_string()) { - Ok(_) => log::info!("Successfully wrote to {}", output_path.display()), - Err(e) => log::error!("Failed to write to {}: {}", output_path.display(), e), + match write(index_path, index.into_string()) { + Ok(_) => log::info!("Successfully wrote to {}", index_path.display()), + Err(e) => log::error!("Failed to write to {}: {}", index_path.display(), e), + } + + let feed = site_generator::generate_rss(entries.clone()); + let feed_path = Path::new("output/feed.xml"); + DirBuilder::new() + .recursive(true) + .create(feed_path.parent().unwrap()) + .unwrap(); + + match write(feed_path, feed) { + Ok(_) => log::info!("Successfully wrote to {}", feed_path.display()), + Err(e) => log::error!("Failed to write to {}: {}", feed_path.display(), e), } Ok(()) diff --git a/src/site_generator.rs b/src/site_generator.rs index 16d2de9..04e33cb 100644 --- a/src/site_generator.rs +++ b/src/site_generator.rs @@ -7,6 +7,7 @@ use chrono::DateTime; use chrono::Utc; use maud::{html, Markup}; +use rss::{ChannelBuilder, Item, ItemBuilder}; use crate::utilities; @@ -69,14 +70,17 @@ fn generate_footer() -> Markup { p { a href="https://ansonbiggs.com" { "Anson Biggs" } " - " + a href=("/feed.xml") target="_blank" rel="noopener noreferrer" { "RSS Feed" } + " - " a href="https://gitlab.com/MisterBiggs/zine" { "Source Code" } " - " - "Page generated at: " em data-tooltip="Usually daily 8AM Mountain Time" { (formatted_utc) " UTC" } + "Page generated at: " em data-tooltip="Automatic builds daily 8AM Mountain Time" { (formatted_utc) " UTC" } } } } } } + fn generate_header() -> Markup { html! { header { @@ -86,6 +90,9 @@ fn generate_header() -> Markup { } ul { li { button data-target="about" onclick="toggleModal(event)" { "About" } } + li { + + } li { details class="dropdown" { summary role="button" class="outline secondary" { "Theme" } @@ -191,3 +198,26 @@ pub fn generate_index(mut entries: Vec) -> Markup { } } } + +pub fn generate_rss(posts: Vec) -> String { + let items: Vec = posts + .iter() + .map(|post| { + ItemBuilder::default() + .title(post.title.clone()) + .link(post.link.clone()) + .pub_date(post.date.to_rfc2822()) + .description(post.truncated_description.clone()) + .build() + }) + .collect(); + + let channel = ChannelBuilder::default() + .title("Anson's Aggregate RSS Feed") + .link("https://zine.ansonbiggs.com/feed") + .description("All the feeds I like, aggregated into one place.") + .items(items) + .build(); + + channel.to_string() +}