1
0
mirror of https://gitlab.com/Anson-Projects/zine.git synced 2025-06-16 05:26:40 +00:00

make code more robust to errors

This commit is contained in:
Anson Biggs 2024-04-13 20:30:17 +00:00
parent 0bbe9b2805
commit a02b877e5c
3 changed files with 27 additions and 11 deletions

7
Cargo.lock generated
View File

@ -21,6 +21,7 @@ checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
name = "aggregate_rss_zine"
version = "0.3.0"
dependencies = [
"anyhow",
"chrono",
"clippy",
"feed-rs",
@ -70,6 +71,12 @@ dependencies = [
"libc",
]
[[package]]
name = "anyhow"
version = "1.0.82"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f538837af36e6f6a9be0faa67f9a314f8119e4e4b5867c6ab40ed60360142519"
[[package]]
name = "arrayref"
version = "0.3.7"

View File

@ -17,6 +17,7 @@ rayon = "1.8"
simple_logger = "4.3"
log = "0.4"
rss = "2.0"
anyhow = "1.0"
[dev-dependencies]
clippy = "0.0.302"

View File

@ -1,12 +1,12 @@
use crate::web_fetchers;
use chrono::{DateTime, Duration, Utc};
use chrono::{DateTime, Utc};
use feed_rs::model::Entry;
use rayon::prelude::*;
use scraper::{Html, Selector};
use anyhow::Result;
use std::cmp::Ordering;
use std::fs;
#[derive(Clone, PartialEq, Eq)]
pub struct Post {
pub title: String,
@ -32,20 +32,23 @@ impl PartialOrd for Post {
}
impl Post {
fn from_entry(entry: &feed_rs::model::Entry) -> Self {
fn from_entry(entry: &feed_rs::model::Entry) -> Result<Post> {
let title = entry
.title
.as_ref()
.map_or_else(|| "".to_string(), |t| t.content.clone());
let link = entry.links.first().unwrap();
let link = entry
.links
.first()
.ok_or_else(|| anyhow::anyhow!("No links for post {:?}", entry))?;
let date = get_entry_date(entry);
let lang = link.clone().href_lang.unwrap_or("en".to_string());
if lang != "en" {
log::warn!("Non english! {} {}", lang, link.href);
log::warn!("Not English! {} {}", lang, link.href);
}
let image_url = entry
@ -74,9 +77,9 @@ impl Post {
let main_url = get_root_url(link.href.as_str());
let score = (date - (Utc::now() - Duration::days(14))).num_minutes();
let score = (date - (chrono::Utc::now() - chrono::Duration::days(14))).num_minutes();
Post {
Ok(Post {
title,
link: link.href.clone(),
date,
@ -85,7 +88,7 @@ impl Post {
truncated_description,
main_url,
score,
}
})
}
}
@ -114,8 +117,14 @@ pub fn read_feed(path: &str) -> Vec<Post> {
entries
.par_iter()
.map(Post::from_entry)
.filter(|entry| entry.date < chrono::Utc::now())
.map(|entry| {
Post::from_entry(entry).map_err(|e| {
log::warn!("Failed to process entry: {}", e);
e
})
})
.filter_map(Result::ok)
.filter(|post| post.date < chrono::Utc::now())
.collect::<Vec<_>>()
}
@ -160,7 +169,6 @@ pub fn truncate_description(description: &str, max_length: usize) -> String {
pub fn strip_html_tags(html: &str) -> String {
let document = Html::parse_document(html);
// Use the wildcard selector to select all nodes and extract their text.
let selector = Selector::parse("*").unwrap();
let mut text_content = String::new();