From 2b72ffadeef923c100a2a3e514a81c705f4f1e57 Mon Sep 17 00:00:00 2001 From: Anson Biggs Date: Fri, 31 May 2024 23:34:24 -0600 Subject: [PATCH] Update Main Feeds Algorithm to Only Keep First Occurence of Each main_url --- src/main.rs | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/src/main.rs b/src/main.rs index e55e008..fbad2aa 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,6 +5,7 @@ extern crate reqwest; use rand::seq::SliceRandom; use rand::thread_rng; +use std::collections::HashSet; use std::error::Error; use std::fs::write; use std::fs::DirBuilder; @@ -13,7 +14,6 @@ mod site_generator; mod utilities; mod web_fetchers; use rayon::prelude::*; -use std::collections::HashMap; fn main() -> Result<(), Box> { simple_logger::init_with_level(log::Level::Info).unwrap(); @@ -23,15 +23,11 @@ fn main() -> Result<(), Box> { let mut posts = all_posts.clone(); posts.retain(|post| post.score.is_positive()); - // Count occurences of main urls to punish blogs that post really frequently - // which also filters out blogs that make tiny updates and change the published date - let url_counts = posts.iter().fold(HashMap::new(), |mut acc, post| { - *acc.entry(post.main_url.clone()).or_insert(0) += 1; - acc - }); - posts.iter_mut().for_each(|post| { - post.score = (post.score / url_counts.get(&post.main_url).unwrap()) as i64; - }); + // Keep only the first occurence of each main_url + { + let mut seen_urls = HashSet::new(); + posts.retain(|post| seen_urls.insert(post.main_url.clone())); + } let mut featured = utilities::read_feed("featured_feeds.txt"); // Give featured a small boost in points