1
0
mirror of https://gitlab.com/Anson-Projects/zine.git synced 2025-06-16 13:36:40 +00:00

Algorithm Tweaks

This commit is contained in:
Anson Biggs 2024-04-28 18:37:22 +00:00
parent 28ebb01d1b
commit ec6da65e5e
2 changed files with 42 additions and 43 deletions

View File

@ -16,50 +16,8 @@ use std::collections::HashMap;
fn main() -> Result<(), Box<dyn Error>> { fn main() -> Result<(), Box<dyn Error>> {
simple_logger::init_with_level(log::Level::Info).unwrap(); simple_logger::init_with_level(log::Level::Info).unwrap();
let mut featured = utilities::read_feed("featured_feeds.txt");
featured = featured
.iter_mut()
.map(|post| {
post.score += 60 * 24 * 2;
post.clone()
})
.collect::<Vec<_>>();
let mut entries = utilities::read_feed("feeds.txt"); let mut entries = utilities::read_feed("feeds.txt");
entries.extend(featured);
entries.retain(|entry| entry.score.is_positive());
entries.par_iter_mut().for_each(|entry| {
if let Some(image_url) = &entry.image_url {
match web_fetchers::is_valid_image_url(image_url) {
Ok(true) => {
entry.score += 1440;
}
_ => {
entry.image_url = None;
entry.score += -1440;
}
}
} else {
match web_fetchers::fetch_social_image(entry.link.clone()) {
Ok(social_image_url) => {
if web_fetchers::is_valid_image_url(&social_image_url).unwrap_or(false) {
entry.image_url = Some(social_image_url);
entry.score += 1440;
} else {
entry.score += -1440;
}
}
Err(error) => {
log::info!("{}: {}", error, entry.link.clone());
entry.score += -1440;
}
}
}
});
entries.retain(|entry| entry.score.is_positive()); entries.retain(|entry| entry.score.is_positive());
// Count occurences of main urls // Count occurences of main urls
@ -73,6 +31,21 @@ fn main() -> Result<(), Box<dyn Error>> {
entry.score = (entry.score / url_counts.get(&entry.main_url).unwrap()) as i64; entry.score = (entry.score / url_counts.get(&entry.main_url).unwrap()) as i64;
}); });
let mut featured = utilities::read_feed("featured_feeds.txt");
featured = featured
.iter_mut()
.map(|post| {
post.score *= 1.5 as i64;
post.clone()
})
.collect::<Vec<_>>();
entries.extend(featured);
entries.par_iter_mut().for_each(utilities::find_image);
entries.retain(|entry| entry.score.is_positive());
entries.sort(); entries.sort();
// Remove bottom 10% from list // Remove bottom 10% from list

View File

@ -77,7 +77,11 @@ impl Post {
let main_url = get_root_url(link.href.as_str()); let main_url = get_root_url(link.href.as_str());
let score = (date - (chrono::Utc::now() - chrono::Duration::days(14))).num_minutes(); let mut score = (date - (chrono::Utc::now() - chrono::Duration::days(21))).num_minutes();
if score > 0 {
score = score.pow(2); // I think a pow will help keep newer stuff at the top
}
Ok(Post { Ok(Post {
title, title,
@ -198,3 +202,25 @@ pub fn group_by_nth<T: Clone>(slice: &[T], n: usize) -> Vec<Vec<T>> {
}) })
.collect() .collect()
} }
pub fn find_image(entry: &mut Post) {
if let Some(image_url) = &entry.image_url {
match web_fetchers::is_valid_image_url(image_url) {
Ok(true) => {}
_ => {
entry.image_url = None;
}
}
} else {
match web_fetchers::fetch_social_image(entry.link.clone()) {
Ok(social_image_url) => {
if web_fetchers::is_valid_image_url(&social_image_url).unwrap_or(false) {
entry.image_url = Some(social_image_url);
}
}
Err(error) => {
log::warn!("{}: {}", error, entry.link.clone());
}
}
}
}