diff --git a/Cargo.lock b/Cargo.lock index eb3bcbc..c2e251e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -21,6 +21,7 @@ checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" name = "aggregate_rss_zine" version = "0.3.0" dependencies = [ + "anyhow", "chrono", "clippy", "feed-rs", @@ -70,6 +71,12 @@ dependencies = [ "libc", ] +[[package]] +name = "anyhow" +version = "1.0.82" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f538837af36e6f6a9be0faa67f9a314f8119e4e4b5867c6ab40ed60360142519" + [[package]] name = "arrayref" version = "0.3.7" diff --git a/Cargo.toml b/Cargo.toml index 95fbc80..d891d49 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,6 +17,7 @@ rayon = "1.8" simple_logger = "4.3" log = "0.4" rss = "2.0" +anyhow = "1.0" [dev-dependencies] clippy = "0.0.302" diff --git a/src/utilities.rs b/src/utilities.rs index 81842f8..fa3d46e 100644 --- a/src/utilities.rs +++ b/src/utilities.rs @@ -1,12 +1,12 @@ use crate::web_fetchers; -use chrono::{DateTime, Duration, Utc}; +use chrono::{DateTime, Utc}; use feed_rs::model::Entry; use rayon::prelude::*; use scraper::{Html, Selector}; +use anyhow::Result; use std::cmp::Ordering; use std::fs; - #[derive(Clone, PartialEq, Eq)] pub struct Post { pub title: String, @@ -32,20 +32,23 @@ impl PartialOrd for Post { } impl Post { - fn from_entry(entry: &feed_rs::model::Entry) -> Self { + fn from_entry(entry: &feed_rs::model::Entry) -> Result { let title = entry .title .as_ref() .map_or_else(|| "".to_string(), |t| t.content.clone()); - let link = entry.links.first().unwrap(); + let link = entry + .links + .first() + .ok_or_else(|| anyhow::anyhow!("No links for post {:?}", entry))?; let date = get_entry_date(entry); let lang = link.clone().href_lang.unwrap_or("en".to_string()); if lang != "en" { - log::warn!("Non english! {} {}", lang, link.href); + log::warn!("Not English! {} {}", lang, link.href); } let image_url = entry @@ -74,9 +77,9 @@ impl Post { let main_url = get_root_url(link.href.as_str()); - let score = (date - (Utc::now() - Duration::days(14))).num_minutes(); + let score = (date - (chrono::Utc::now() - chrono::Duration::days(14))).num_minutes(); - Post { + Ok(Post { title, link: link.href.clone(), date, @@ -85,7 +88,7 @@ impl Post { truncated_description, main_url, score, - } + }) } } @@ -114,8 +117,14 @@ pub fn read_feed(path: &str) -> Vec { entries .par_iter() - .map(Post::from_entry) - .filter(|entry| entry.date < chrono::Utc::now()) + .map(|entry| { + Post::from_entry(entry).map_err(|e| { + log::warn!("Failed to process entry: {}", e); + e + }) + }) + .filter_map(Result::ok) + .filter(|post| post.date < chrono::Utc::now()) .collect::>() } @@ -160,7 +169,6 @@ pub fn truncate_description(description: &str, max_length: usize) -> String { pub fn strip_html_tags(html: &str) -> String { let document = Html::parse_document(html); - // Use the wildcard selector to select all nodes and extract their text. let selector = Selector::parse("*").unwrap(); let mut text_content = String::new();