From 174e622e3da151b9a3374d445e8002f8ad54aee0 Mon Sep 17 00:00:00 2001 From: Anson Biggs Date: Mon, 20 May 2024 22:12:24 -0600 Subject: [PATCH] Keep only first instance of each blog --- src/main.rs | 2 ++ src/utilities.rs | 7 +++++++ 2 files changed, 9 insertions(+) diff --git a/src/main.rs b/src/main.rs index 768cc78..e55e008 100644 --- a/src/main.rs +++ b/src/main.rs @@ -56,6 +56,8 @@ fn main() -> Result<(), Box> { posts.insert(0, post_with_image); } + utilities::retain_first_main_url(&mut posts); + posts.truncate(16); let mut old_posts = all_posts; diff --git a/src/utilities.rs b/src/utilities.rs index 60e7810..405bd2c 100644 --- a/src/utilities.rs +++ b/src/utilities.rs @@ -3,10 +3,12 @@ use chrono::{DateTime, Utc}; use feed_rs::model::Entry; use rayon::prelude::*; use scraper::{Html, Selector}; +use std::collections::HashSet; use anyhow::Result; use std::cmp::Ordering; use std::fs; + #[derive(Clone, PartialEq, Eq)] pub struct Post { pub title: String, @@ -246,3 +248,8 @@ pub fn validate(post: &mut Post) { println!("{} is not valid", post.link.as_str()); }; } + +pub fn retain_first_main_url(posts: &mut Vec) { + let mut seen_urls = HashSet::new(); + posts.retain(|post| seen_urls.insert(post.main_url.clone())); +}