1
0
mirror of https://gitlab.com/Anson-Projects/zine.git synced 2025-06-15 13:16:39 +00:00

Keep only first instance of each blog

This commit is contained in:
Anson Biggs 2024-05-20 22:12:24 -06:00
parent fc66f7bf8c
commit 174e622e3d
2 changed files with 9 additions and 0 deletions

View File

@ -56,6 +56,8 @@ fn main() -> Result<(), Box<dyn Error>> {
posts.insert(0, post_with_image);
}
utilities::retain_first_main_url(&mut posts);
posts.truncate(16);
let mut old_posts = all_posts;

View File

@ -3,10 +3,12 @@ use chrono::{DateTime, Utc};
use feed_rs::model::Entry;
use rayon::prelude::*;
use scraper::{Html, Selector};
use std::collections::HashSet;
use anyhow::Result;
use std::cmp::Ordering;
use std::fs;
#[derive(Clone, PartialEq, Eq)]
pub struct Post {
pub title: String,
@ -246,3 +248,8 @@ pub fn validate(post: &mut Post) {
println!("{} is not valid", post.link.as_str());
};
}
pub fn retain_first_main_url(posts: &mut Vec<Post>) {
let mut seen_urls = HashSet::new();
posts.retain(|post| seen_urls.insert(post.main_url.clone()));
}