From 7008f06d45693e035c74be5e4516cd40ebc6f265 Mon Sep 17 00:00:00 2001 From: Anson Biggs Date: Wed, 24 Apr 2024 05:07:40 +0000 Subject: [PATCH] more algorithm --- src/main.rs | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/main.rs b/src/main.rs index 5e5209c..b625e3f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -11,6 +11,7 @@ mod site_generator; mod utilities; mod web_fetchers; use rayon::prelude::*; +use std::collections::HashMap; fn main() -> Result<(), Box> { simple_logger::init_with_level(log::Level::Info).unwrap(); @@ -33,7 +34,7 @@ fn main() -> Result<(), Box> { entries.par_iter_mut().for_each(|entry| { if entry.image_url.is_some() { - entry.score += 300; + entry.score += 1440; } else { match web_fetchers::fetch_social_image(entry.link.clone()) { Ok(social_image_url) => { @@ -41,15 +42,30 @@ fn main() -> Result<(), Box> { } Err(error) => { log::info!("{}: {}", error, entry.link.clone()); - entry.score += -600; + entry.score += -1440; } } } }); entries.retain(|entry| entry.score.is_positive()); + + // Cunt occurences of main urls + let url_counts = entries.iter().fold(HashMap::new(), |mut acc, post| { + *acc.entry(post.main_url.clone()).or_insert(0) += 1; + acc + }); + + // Punish blogs that post really often + entries.iter_mut().for_each(|entry| { + entry.score = (entry.score / url_counts.get(&entry.main_url).unwrap()) as i64; + }); + entries.sort(); + // Remove bottom 10% from list + entries.truncate(entries.len() - (entries.len() as f64 * 0.1).ceil() as usize); + let index = site_generator::generate_index(entries.clone()); let index_path = Path::new("output/index.html"); DirBuilder::new()