1
0
mirror of https://gitlab.com/Anson-Projects/zine.git synced 2025-06-16 13:36:40 +00:00

make code more robust to errors

This commit is contained in:
Anson Biggs 2024-04-13 20:30:17 +00:00
parent 0bbe9b2805
commit a02b877e5c
3 changed files with 27 additions and 11 deletions

7
Cargo.lock generated
View File

@ -21,6 +21,7 @@ checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
name = "aggregate_rss_zine" name = "aggregate_rss_zine"
version = "0.3.0" version = "0.3.0"
dependencies = [ dependencies = [
"anyhow",
"chrono", "chrono",
"clippy", "clippy",
"feed-rs", "feed-rs",
@ -70,6 +71,12 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "anyhow"
version = "1.0.82"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f538837af36e6f6a9be0faa67f9a314f8119e4e4b5867c6ab40ed60360142519"
[[package]] [[package]]
name = "arrayref" name = "arrayref"
version = "0.3.7" version = "0.3.7"

View File

@ -17,6 +17,7 @@ rayon = "1.8"
simple_logger = "4.3" simple_logger = "4.3"
log = "0.4" log = "0.4"
rss = "2.0" rss = "2.0"
anyhow = "1.0"
[dev-dependencies] [dev-dependencies]
clippy = "0.0.302" clippy = "0.0.302"

View File

@ -1,12 +1,12 @@
use crate::web_fetchers; use crate::web_fetchers;
use chrono::{DateTime, Duration, Utc}; use chrono::{DateTime, Utc};
use feed_rs::model::Entry; use feed_rs::model::Entry;
use rayon::prelude::*; use rayon::prelude::*;
use scraper::{Html, Selector}; use scraper::{Html, Selector};
use anyhow::Result;
use std::cmp::Ordering; use std::cmp::Ordering;
use std::fs; use std::fs;
#[derive(Clone, PartialEq, Eq)] #[derive(Clone, PartialEq, Eq)]
pub struct Post { pub struct Post {
pub title: String, pub title: String,
@ -32,20 +32,23 @@ impl PartialOrd for Post {
} }
impl Post { impl Post {
fn from_entry(entry: &feed_rs::model::Entry) -> Self { fn from_entry(entry: &feed_rs::model::Entry) -> Result<Post> {
let title = entry let title = entry
.title .title
.as_ref() .as_ref()
.map_or_else(|| "".to_string(), |t| t.content.clone()); .map_or_else(|| "".to_string(), |t| t.content.clone());
let link = entry.links.first().unwrap(); let link = entry
.links
.first()
.ok_or_else(|| anyhow::anyhow!("No links for post {:?}", entry))?;
let date = get_entry_date(entry); let date = get_entry_date(entry);
let lang = link.clone().href_lang.unwrap_or("en".to_string()); let lang = link.clone().href_lang.unwrap_or("en".to_string());
if lang != "en" { if lang != "en" {
log::warn!("Non english! {} {}", lang, link.href); log::warn!("Not English! {} {}", lang, link.href);
} }
let image_url = entry let image_url = entry
@ -74,9 +77,9 @@ impl Post {
let main_url = get_root_url(link.href.as_str()); let main_url = get_root_url(link.href.as_str());
let score = (date - (Utc::now() - Duration::days(14))).num_minutes(); let score = (date - (chrono::Utc::now() - chrono::Duration::days(14))).num_minutes();
Post { Ok(Post {
title, title,
link: link.href.clone(), link: link.href.clone(),
date, date,
@ -85,7 +88,7 @@ impl Post {
truncated_description, truncated_description,
main_url, main_url,
score, score,
} })
} }
} }
@ -114,8 +117,14 @@ pub fn read_feed(path: &str) -> Vec<Post> {
entries entries
.par_iter() .par_iter()
.map(Post::from_entry) .map(|entry| {
.filter(|entry| entry.date < chrono::Utc::now()) Post::from_entry(entry).map_err(|e| {
log::warn!("Failed to process entry: {}", e);
e
})
})
.filter_map(Result::ok)
.filter(|post| post.date < chrono::Utc::now())
.collect::<Vec<_>>() .collect::<Vec<_>>()
} }
@ -160,7 +169,6 @@ pub fn truncate_description(description: &str, max_length: usize) -> String {
pub fn strip_html_tags(html: &str) -> String { pub fn strip_html_tags(html: &str) -> String {
let document = Html::parse_document(html); let document = Html::parse_document(html);
// Use the wildcard selector to select all nodes and extract their text.
let selector = Selector::parse("*").unwrap(); let selector = Selector::parse("*").unwrap();
let mut text_content = String::new(); let mut text_content = String::new();