From ec6da65e5e397a822d5769cb41b6cd32d9b9674c Mon Sep 17 00:00:00 2001 From: Anson Biggs Date: Sun, 28 Apr 2024 18:37:22 +0000 Subject: [PATCH] Algorithm Tweaks --- src/main.rs | 57 +++++++++++++----------------------------------- src/utilities.rs | 28 +++++++++++++++++++++++- 2 files changed, 42 insertions(+), 43 deletions(-) diff --git a/src/main.rs b/src/main.rs index 49db38c..6dbfef4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -16,50 +16,8 @@ use std::collections::HashMap; fn main() -> Result<(), Box> { simple_logger::init_with_level(log::Level::Info).unwrap(); - let mut featured = utilities::read_feed("featured_feeds.txt"); - - featured = featured - .iter_mut() - .map(|post| { - post.score += 60 * 24 * 2; - post.clone() - }) - .collect::>(); - let mut entries = utilities::read_feed("feeds.txt"); - entries.extend(featured); - - entries.retain(|entry| entry.score.is_positive()); - - entries.par_iter_mut().for_each(|entry| { - if let Some(image_url) = &entry.image_url { - match web_fetchers::is_valid_image_url(image_url) { - Ok(true) => { - entry.score += 1440; - } - _ => { - entry.image_url = None; - entry.score += -1440; - } - } - } else { - match web_fetchers::fetch_social_image(entry.link.clone()) { - Ok(social_image_url) => { - if web_fetchers::is_valid_image_url(&social_image_url).unwrap_or(false) { - entry.image_url = Some(social_image_url); - entry.score += 1440; - } else { - entry.score += -1440; - } - } - Err(error) => { - log::info!("{}: {}", error, entry.link.clone()); - entry.score += -1440; - } - } - } - }); entries.retain(|entry| entry.score.is_positive()); // Count occurences of main urls @@ -73,6 +31,21 @@ fn main() -> Result<(), Box> { entry.score = (entry.score / url_counts.get(&entry.main_url).unwrap()) as i64; }); + let mut featured = utilities::read_feed("featured_feeds.txt"); + + featured = featured + .iter_mut() + .map(|post| { + post.score *= 1.5 as i64; + post.clone() + }) + .collect::>(); + + entries.extend(featured); + + entries.par_iter_mut().for_each(utilities::find_image); + entries.retain(|entry| entry.score.is_positive()); + entries.sort(); // Remove bottom 10% from list diff --git a/src/utilities.rs b/src/utilities.rs index fa3d46e..10ba060 100644 --- a/src/utilities.rs +++ b/src/utilities.rs @@ -77,7 +77,11 @@ impl Post { let main_url = get_root_url(link.href.as_str()); - let score = (date - (chrono::Utc::now() - chrono::Duration::days(14))).num_minutes(); + let mut score = (date - (chrono::Utc::now() - chrono::Duration::days(21))).num_minutes(); + + if score > 0 { + score = score.pow(2); // I think a pow will help keep newer stuff at the top + } Ok(Post { title, @@ -198,3 +202,25 @@ pub fn group_by_nth(slice: &[T], n: usize) -> Vec> { }) .collect() } + +pub fn find_image(entry: &mut Post) { + if let Some(image_url) = &entry.image_url { + match web_fetchers::is_valid_image_url(image_url) { + Ok(true) => {} + _ => { + entry.image_url = None; + } + } + } else { + match web_fetchers::fetch_social_image(entry.link.clone()) { + Ok(social_image_url) => { + if web_fetchers::is_valid_image_url(&social_image_url).unwrap_or(false) { + entry.image_url = Some(social_image_url); + } + } + Err(error) => { + log::warn!("{}: {}", error, entry.link.clone()); + } + } + } +}