1
0
mirror of https://gitlab.com/Anson-Projects/zine.git synced 2025-06-15 21:26:38 +00:00

Keep only first instance of each blog

This commit is contained in:
Anson Biggs 2024-05-20 22:12:24 -06:00
parent fc66f7bf8c
commit 174e622e3d
2 changed files with 9 additions and 0 deletions

View File

@ -56,6 +56,8 @@ fn main() -> Result<(), Box<dyn Error>> {
posts.insert(0, post_with_image); posts.insert(0, post_with_image);
} }
utilities::retain_first_main_url(&mut posts);
posts.truncate(16); posts.truncate(16);
let mut old_posts = all_posts; let mut old_posts = all_posts;

View File

@ -3,10 +3,12 @@ use chrono::{DateTime, Utc};
use feed_rs::model::Entry; use feed_rs::model::Entry;
use rayon::prelude::*; use rayon::prelude::*;
use scraper::{Html, Selector}; use scraper::{Html, Selector};
use std::collections::HashSet;
use anyhow::Result; use anyhow::Result;
use std::cmp::Ordering; use std::cmp::Ordering;
use std::fs; use std::fs;
#[derive(Clone, PartialEq, Eq)] #[derive(Clone, PartialEq, Eq)]
pub struct Post { pub struct Post {
pub title: String, pub title: String,
@ -246,3 +248,8 @@ pub fn validate(post: &mut Post) {
println!("{} is not valid", post.link.as_str()); println!("{} is not valid", post.link.as_str());
}; };
} }
pub fn retain_first_main_url(posts: &mut Vec<Post>) {
let mut seen_urls = HashSet::new();
posts.retain(|post| seen_urls.insert(post.main_url.clone()));
}