1
0
mirror of https://gitlab.com/Anson-Projects/zine.git synced 2025-07-27 00:31:23 +00:00

Clean up code

This commit is contained in:
2024-12-10 21:32:01 -07:00
parent 3323b8a989
commit b54d1d339d
5 changed files with 164 additions and 192 deletions

View File

@@ -17,44 +17,42 @@ use rayon::prelude::*;
fn main() -> Result<(), Box<dyn Error>> {
simple_logger::init_with_level(log::Level::Info).unwrap();
let main_post_count = 24;
let archive_size = 100;
let all_posts = utilities::read_feed("feeds.txt");
let mut all_posts = utilities::read_feed("feeds.txt");
// Give featured a small boost in points
let featured = utilities::read_feed("featured_feeds.txt")
.iter_mut()
.map(|post| {
post.score += post.score / 2;
post.clone()
})
.collect::<Vec<_>>();
all_posts.extend(featured);
let mut posts = all_posts.clone();
posts.retain(|post| post.score.is_positive());
posts.par_iter_mut().for_each(utilities::validate);
posts.sort();
// Move the post with an image_url to the head of the list which is the featured post
if let Some(pos) = posts.iter().position(|post| post.image_url.is_some()) {
let post_with_image = posts.remove(pos);
posts.insert(0, post_with_image);
}
// Keep only the first occurence of each main_url
{
let mut seen_urls = HashSet::new();
posts.retain(|post| seen_urls.insert(post.main_url.clone()));
}
let mut featured = utilities::read_feed("featured_feeds.txt");
// Give featured a small boost in points
featured = featured
.iter_mut()
.map(|post| {
post.score = (post.score as f64 * 1.5) as i64;
post.clone()
})
.collect::<Vec<_>>();
posts.extend(featured);
posts.par_iter_mut().for_each(utilities::find_image);
posts.par_iter_mut().for_each(utilities::validate);
posts.sort();
// Move the post with an image_url to the head of the list
if let Some(pos) = posts.iter().position(|post| post.image_url.is_some()) {
let post_with_image = posts.remove(pos);
posts.insert(0, post_with_image);
}
utilities::retain_most_recent_based_on_main_url(&mut posts);
posts.truncate(24);
posts.truncate(main_post_count);
let mut old_posts = all_posts;
@@ -62,7 +60,6 @@ fn main() -> Result<(), Box<dyn Error>> {
old_posts.shuffle(&mut thread_rng());
let mut archive_posts: Vec<utilities::Post> = Vec::new();
let archive_size = 100;
while (archive_posts.len() < archive_size) && (old_posts.len() > 50) {
let iter_size = archive_size - archive_posts.len();

View File

@@ -100,7 +100,7 @@ fn generate_header() -> Markup {
li { a href="#" data-theme-switcher="auto" { "Auto" }}
li { a href="#" data-theme-switcher="light" { "Light" }}
li { a href="#" data-theme-switcher="dark" { "Dark" }}
}
}
}

View File

@@ -3,7 +3,6 @@ use chrono::{DateTime, Utc};
use feed_rs::model::Entry;
use rayon::prelude::*;
use scraper::{Html, Selector};
use std::collections::HashSet;
use anyhow::Result;
use html_escape::decode_html_entities;
@@ -216,6 +215,44 @@ pub fn group_by_nth<T: Clone>(slice: &[T], n: usize) -> Vec<Vec<T>> {
.collect()
}
pub fn validate(post: &mut Post) {
if post.title.is_empty() {
post.score = 0;
println!("{} has no title", post.link.as_str());
return;
}
if !post.lang.is_empty() && post.lang != "en" {
post.score = 0;
println!("{} is not english", post.link.as_str());
return;
}
if post.truncated_description.is_empty() {
post.score = post.score / 2;
};
if !is_valid_url(post.link.as_str()) {
post.score = 0;
println!("post link {} is not valid", post.link.as_str());
return;
};
if !is_valid_url(&("http://".to_owned() + post.main_url.as_str())) {
post.score = 0;
println!("main_url {} is not valid", post.main_url.as_str());
return;
};
find_image(post);
if let Some(image_url) = &post.image_url {
if image_url.ends_with("favicon.ico") {
post.image_url = None;
}
}
}
pub fn find_image(post: &mut Post) {
if let Some(image_url) = &post.image_url {
match web_fetchers::is_valid_image_url(image_url) {
@@ -238,44 +275,3 @@ pub fn find_image(post: &mut Post) {
}
}
}
pub fn validate(post: &mut Post) {
if post.title.is_empty() {
post.score = 0;
println!("{} has no title", post.link.as_str());
return;
}
if !post.lang.is_empty() && post.lang != "en" {
post.score = 0;
println!("{} is not english", post.link.as_str());
return;
}
if post.truncated_description.is_empty() {
post.score = (post.score as f64 * 0.5) as i64;
};
if !is_valid_url(post.link.as_str()) {
post.score = 0;
println!("post link {} is not valid", post.link.as_str());
return;
};
if !is_valid_url(&("http://".to_owned() + post.main_url.as_str())) {
post.score = 0;
println!("main_url {} is not valid", post.main_url.as_str());
return;
};
if let Some(image_url) = &post.image_url {
if image_url.ends_with("favicon.ico") {
post.image_url = None;
}
}
}
pub fn retain_most_recent_based_on_main_url(posts: &mut Vec<Post>) {
let mut seen_urls = HashSet::new();
posts.retain(|post| seen_urls.insert(post.main_url.clone()));
}