mirror of
https://gitlab.com/Anson-Projects/zine.git
synced 2025-06-16 05:26:40 +00:00
make code more robust to errors
This commit is contained in:
parent
0bbe9b2805
commit
a02b877e5c
7
Cargo.lock
generated
7
Cargo.lock
generated
@ -21,6 +21,7 @@ checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
|
|||||||
name = "aggregate_rss_zine"
|
name = "aggregate_rss_zine"
|
||||||
version = "0.3.0"
|
version = "0.3.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"anyhow",
|
||||||
"chrono",
|
"chrono",
|
||||||
"clippy",
|
"clippy",
|
||||||
"feed-rs",
|
"feed-rs",
|
||||||
@ -70,6 +71,12 @@ dependencies = [
|
|||||||
"libc",
|
"libc",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "anyhow"
|
||||||
|
version = "1.0.82"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f538837af36e6f6a9be0faa67f9a314f8119e4e4b5867c6ab40ed60360142519"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "arrayref"
|
name = "arrayref"
|
||||||
version = "0.3.7"
|
version = "0.3.7"
|
||||||
|
@ -17,6 +17,7 @@ rayon = "1.8"
|
|||||||
simple_logger = "4.3"
|
simple_logger = "4.3"
|
||||||
log = "0.4"
|
log = "0.4"
|
||||||
rss = "2.0"
|
rss = "2.0"
|
||||||
|
anyhow = "1.0"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
clippy = "0.0.302"
|
clippy = "0.0.302"
|
||||||
|
@ -1,12 +1,12 @@
|
|||||||
use crate::web_fetchers;
|
use crate::web_fetchers;
|
||||||
use chrono::{DateTime, Duration, Utc};
|
use chrono::{DateTime, Utc};
|
||||||
use feed_rs::model::Entry;
|
use feed_rs::model::Entry;
|
||||||
use rayon::prelude::*;
|
use rayon::prelude::*;
|
||||||
use scraper::{Html, Selector};
|
use scraper::{Html, Selector};
|
||||||
|
|
||||||
|
use anyhow::Result;
|
||||||
use std::cmp::Ordering;
|
use std::cmp::Ordering;
|
||||||
use std::fs;
|
use std::fs;
|
||||||
|
|
||||||
#[derive(Clone, PartialEq, Eq)]
|
#[derive(Clone, PartialEq, Eq)]
|
||||||
pub struct Post {
|
pub struct Post {
|
||||||
pub title: String,
|
pub title: String,
|
||||||
@ -32,20 +32,23 @@ impl PartialOrd for Post {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Post {
|
impl Post {
|
||||||
fn from_entry(entry: &feed_rs::model::Entry) -> Self {
|
fn from_entry(entry: &feed_rs::model::Entry) -> Result<Post> {
|
||||||
let title = entry
|
let title = entry
|
||||||
.title
|
.title
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.map_or_else(|| "".to_string(), |t| t.content.clone());
|
.map_or_else(|| "".to_string(), |t| t.content.clone());
|
||||||
|
|
||||||
let link = entry.links.first().unwrap();
|
let link = entry
|
||||||
|
.links
|
||||||
|
.first()
|
||||||
|
.ok_or_else(|| anyhow::anyhow!("No links for post {:?}", entry))?;
|
||||||
|
|
||||||
let date = get_entry_date(entry);
|
let date = get_entry_date(entry);
|
||||||
|
|
||||||
let lang = link.clone().href_lang.unwrap_or("en".to_string());
|
let lang = link.clone().href_lang.unwrap_or("en".to_string());
|
||||||
|
|
||||||
if lang != "en" {
|
if lang != "en" {
|
||||||
log::warn!("Non english! {} {}", lang, link.href);
|
log::warn!("Not English! {} {}", lang, link.href);
|
||||||
}
|
}
|
||||||
|
|
||||||
let image_url = entry
|
let image_url = entry
|
||||||
@ -74,9 +77,9 @@ impl Post {
|
|||||||
|
|
||||||
let main_url = get_root_url(link.href.as_str());
|
let main_url = get_root_url(link.href.as_str());
|
||||||
|
|
||||||
let score = (date - (Utc::now() - Duration::days(14))).num_minutes();
|
let score = (date - (chrono::Utc::now() - chrono::Duration::days(14))).num_minutes();
|
||||||
|
|
||||||
Post {
|
Ok(Post {
|
||||||
title,
|
title,
|
||||||
link: link.href.clone(),
|
link: link.href.clone(),
|
||||||
date,
|
date,
|
||||||
@ -85,7 +88,7 @@ impl Post {
|
|||||||
truncated_description,
|
truncated_description,
|
||||||
main_url,
|
main_url,
|
||||||
score,
|
score,
|
||||||
}
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -114,8 +117,14 @@ pub fn read_feed(path: &str) -> Vec<Post> {
|
|||||||
|
|
||||||
entries
|
entries
|
||||||
.par_iter()
|
.par_iter()
|
||||||
.map(Post::from_entry)
|
.map(|entry| {
|
||||||
.filter(|entry| entry.date < chrono::Utc::now())
|
Post::from_entry(entry).map_err(|e| {
|
||||||
|
log::warn!("Failed to process entry: {}", e);
|
||||||
|
e
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.filter_map(Result::ok)
|
||||||
|
.filter(|post| post.date < chrono::Utc::now())
|
||||||
.collect::<Vec<_>>()
|
.collect::<Vec<_>>()
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -160,7 +169,6 @@ pub fn truncate_description(description: &str, max_length: usize) -> String {
|
|||||||
|
|
||||||
pub fn strip_html_tags(html: &str) -> String {
|
pub fn strip_html_tags(html: &str) -> String {
|
||||||
let document = Html::parse_document(html);
|
let document = Html::parse_document(html);
|
||||||
// Use the wildcard selector to select all nodes and extract their text.
|
|
||||||
let selector = Selector::parse("*").unwrap();
|
let selector = Selector::parse("*").unwrap();
|
||||||
let mut text_content = String::new();
|
let mut text_content = String::new();
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user