mirror of
https://gitlab.com/Anson-Projects/zine.git
synced 2025-06-15 21:26:38 +00:00
make code more robust to errors
This commit is contained in:
parent
0bbe9b2805
commit
a02b877e5c
7
Cargo.lock
generated
7
Cargo.lock
generated
@ -21,6 +21,7 @@ checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
|
||||
name = "aggregate_rss_zine"
|
||||
version = "0.3.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"chrono",
|
||||
"clippy",
|
||||
"feed-rs",
|
||||
@ -70,6 +71,12 @@ dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anyhow"
|
||||
version = "1.0.82"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f538837af36e6f6a9be0faa67f9a314f8119e4e4b5867c6ab40ed60360142519"
|
||||
|
||||
[[package]]
|
||||
name = "arrayref"
|
||||
version = "0.3.7"
|
||||
|
@ -17,6 +17,7 @@ rayon = "1.8"
|
||||
simple_logger = "4.3"
|
||||
log = "0.4"
|
||||
rss = "2.0"
|
||||
anyhow = "1.0"
|
||||
|
||||
[dev-dependencies]
|
||||
clippy = "0.0.302"
|
||||
|
@ -1,12 +1,12 @@
|
||||
use crate::web_fetchers;
|
||||
use chrono::{DateTime, Duration, Utc};
|
||||
use chrono::{DateTime, Utc};
|
||||
use feed_rs::model::Entry;
|
||||
use rayon::prelude::*;
|
||||
use scraper::{Html, Selector};
|
||||
|
||||
use anyhow::Result;
|
||||
use std::cmp::Ordering;
|
||||
use std::fs;
|
||||
|
||||
#[derive(Clone, PartialEq, Eq)]
|
||||
pub struct Post {
|
||||
pub title: String,
|
||||
@ -32,20 +32,23 @@ impl PartialOrd for Post {
|
||||
}
|
||||
|
||||
impl Post {
|
||||
fn from_entry(entry: &feed_rs::model::Entry) -> Self {
|
||||
fn from_entry(entry: &feed_rs::model::Entry) -> Result<Post> {
|
||||
let title = entry
|
||||
.title
|
||||
.as_ref()
|
||||
.map_or_else(|| "".to_string(), |t| t.content.clone());
|
||||
|
||||
let link = entry.links.first().unwrap();
|
||||
let link = entry
|
||||
.links
|
||||
.first()
|
||||
.ok_or_else(|| anyhow::anyhow!("No links for post {:?}", entry))?;
|
||||
|
||||
let date = get_entry_date(entry);
|
||||
|
||||
let lang = link.clone().href_lang.unwrap_or("en".to_string());
|
||||
|
||||
if lang != "en" {
|
||||
log::warn!("Non english! {} {}", lang, link.href);
|
||||
log::warn!("Not English! {} {}", lang, link.href);
|
||||
}
|
||||
|
||||
let image_url = entry
|
||||
@ -74,9 +77,9 @@ impl Post {
|
||||
|
||||
let main_url = get_root_url(link.href.as_str());
|
||||
|
||||
let score = (date - (Utc::now() - Duration::days(14))).num_minutes();
|
||||
let score = (date - (chrono::Utc::now() - chrono::Duration::days(14))).num_minutes();
|
||||
|
||||
Post {
|
||||
Ok(Post {
|
||||
title,
|
||||
link: link.href.clone(),
|
||||
date,
|
||||
@ -85,7 +88,7 @@ impl Post {
|
||||
truncated_description,
|
||||
main_url,
|
||||
score,
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@ -114,8 +117,14 @@ pub fn read_feed(path: &str) -> Vec<Post> {
|
||||
|
||||
entries
|
||||
.par_iter()
|
||||
.map(Post::from_entry)
|
||||
.filter(|entry| entry.date < chrono::Utc::now())
|
||||
.map(|entry| {
|
||||
Post::from_entry(entry).map_err(|e| {
|
||||
log::warn!("Failed to process entry: {}", e);
|
||||
e
|
||||
})
|
||||
})
|
||||
.filter_map(Result::ok)
|
||||
.filter(|post| post.date < chrono::Utc::now())
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
@ -160,7 +169,6 @@ pub fn truncate_description(description: &str, max_length: usize) -> String {
|
||||
|
||||
pub fn strip_html_tags(html: &str) -> String {
|
||||
let document = Html::parse_document(html);
|
||||
// Use the wildcard selector to select all nodes and extract their text.
|
||||
let selector = Selector::parse("*").unwrap();
|
||||
let mut text_content = String::new();
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user