1
0
mirror of https://gitlab.com/Anson-Projects/zine.git synced 2025-06-15 13:16:39 +00:00

Update Main Feeds Algorithm to Only Keep First Occurence of Each main_url

This commit is contained in:
Anson Biggs 2024-05-31 23:34:24 -06:00
parent 015a1a7f19
commit 2b72ffadee

View File

@ -5,6 +5,7 @@ extern crate reqwest;
use rand::seq::SliceRandom;
use rand::thread_rng;
use std::collections::HashSet;
use std::error::Error;
use std::fs::write;
use std::fs::DirBuilder;
@ -13,7 +14,6 @@ mod site_generator;
mod utilities;
mod web_fetchers;
use rayon::prelude::*;
use std::collections::HashMap;
fn main() -> Result<(), Box<dyn Error>> {
simple_logger::init_with_level(log::Level::Info).unwrap();
@ -23,15 +23,11 @@ fn main() -> Result<(), Box<dyn Error>> {
let mut posts = all_posts.clone();
posts.retain(|post| post.score.is_positive());
// Count occurences of main urls to punish blogs that post really frequently
// which also filters out blogs that make tiny updates and change the published date
let url_counts = posts.iter().fold(HashMap::new(), |mut acc, post| {
*acc.entry(post.main_url.clone()).or_insert(0) += 1;
acc
});
posts.iter_mut().for_each(|post| {
post.score = (post.score / url_counts.get(&post.main_url).unwrap()) as i64;
});
// Keep only the first occurence of each main_url
{
let mut seen_urls = HashSet::new();
posts.retain(|post| seen_urls.insert(post.main_url.clone()));
}
let mut featured = utilities::read_feed("featured_feeds.txt");
// Give featured a small boost in points