mirror of
https://gitlab.com/Anson-Projects/zine.git
synced 2025-06-15 13:16:39 +00:00
Update Main Feeds Algorithm to Only Keep First Occurence of Each main_url
This commit is contained in:
parent
015a1a7f19
commit
2b72ffadee
16
src/main.rs
16
src/main.rs
@ -5,6 +5,7 @@ extern crate reqwest;
|
||||
|
||||
use rand::seq::SliceRandom;
|
||||
use rand::thread_rng;
|
||||
use std::collections::HashSet;
|
||||
use std::error::Error;
|
||||
use std::fs::write;
|
||||
use std::fs::DirBuilder;
|
||||
@ -13,7 +14,6 @@ mod site_generator;
|
||||
mod utilities;
|
||||
mod web_fetchers;
|
||||
use rayon::prelude::*;
|
||||
use std::collections::HashMap;
|
||||
|
||||
fn main() -> Result<(), Box<dyn Error>> {
|
||||
simple_logger::init_with_level(log::Level::Info).unwrap();
|
||||
@ -23,15 +23,11 @@ fn main() -> Result<(), Box<dyn Error>> {
|
||||
let mut posts = all_posts.clone();
|
||||
posts.retain(|post| post.score.is_positive());
|
||||
|
||||
// Count occurences of main urls to punish blogs that post really frequently
|
||||
// which also filters out blogs that make tiny updates and change the published date
|
||||
let url_counts = posts.iter().fold(HashMap::new(), |mut acc, post| {
|
||||
*acc.entry(post.main_url.clone()).or_insert(0) += 1;
|
||||
acc
|
||||
});
|
||||
posts.iter_mut().for_each(|post| {
|
||||
post.score = (post.score / url_counts.get(&post.main_url).unwrap()) as i64;
|
||||
});
|
||||
// Keep only the first occurence of each main_url
|
||||
{
|
||||
let mut seen_urls = HashSet::new();
|
||||
posts.retain(|post| seen_urls.insert(post.main_url.clone()));
|
||||
}
|
||||
|
||||
let mut featured = utilities::read_feed("featured_feeds.txt");
|
||||
// Give featured a small boost in points
|
||||
|
Loading…
x
Reference in New Issue
Block a user