1
0
mirror of https://gitlab.com/Anson-Projects/zine.git synced 2025-07-27 08:41:25 +00:00

Add Archive Posts

This commit is contained in:
2024-05-15 04:44:27 +00:00
parent 61d8acbb9d
commit 4c82106817
9 changed files with 172 additions and 85 deletions

View File

@@ -3,6 +3,8 @@ extern crate feed_rs;
extern crate maud;
extern crate reqwest;
use rand::seq::SliceRandom;
use rand::thread_rng;
use std::error::Error;
use std::fs::write;
use std::fs::DirBuilder;
@@ -16,54 +18,70 @@ use std::collections::HashMap;
fn main() -> Result<(), Box<dyn Error>> {
simple_logger::init_with_level(log::Level::Info).unwrap();
let mut entries = utilities::read_feed("feeds.txt");
let all_posts = utilities::read_feed("feeds.txt");
entries.retain(|entry| entry.score.is_positive());
let mut posts = all_posts.clone();
posts.retain(|post| post.score.is_positive());
// Count occurences of main urls
let url_counts = entries.iter().fold(HashMap::new(), |mut acc, post| {
// Count occurences of main urls to punish blogs that post really frequently
// which also filters out blogs that make tiny updates and change the published date
let url_counts = posts.iter().fold(HashMap::new(), |mut acc, post| {
*acc.entry(post.main_url.clone()).or_insert(0) += 1;
acc
});
// Punish blogs that post really often
entries.iter_mut().for_each(|entry| {
entry.score = (entry.score / url_counts.get(&entry.main_url).unwrap()) as i64;
posts.iter_mut().for_each(|post| {
post.score = (post.score / url_counts.get(&post.main_url).unwrap()) as i64;
});
let mut featured = utilities::read_feed("featured_feeds.txt");
// Give featured a small boost in points
featured = featured
.iter_mut()
.map(|post| {
post.score *= 1.5 as i64;
post.score = (post.score as f64 * 1.5) as i64;
post.clone()
})
.collect::<Vec<_>>();
entries.extend(featured);
posts.extend(featured);
entries.par_iter_mut().for_each(utilities::find_image);
entries.retain(|entry| entry.score.is_positive());
posts.par_iter_mut().for_each(utilities::find_image);
posts.par_iter_mut().for_each(utilities::validate);
entries.sort();
posts.sort();
// Remove bottom 10% from list
entries.truncate(entries.len() - (entries.len() as f64 * 0.1).ceil() as usize);
// Make sure first entry has an image since it is used as the featured post
let mut max_iter = 0;
while entries.first().unwrap().image_url.is_none() {
entries[0].score += -100;
entries.sort();
max_iter += 1;
if max_iter > 10000 {
break;
}
// Move the post with an image_url to the head of the list
if let Some(pos) = posts.iter().position(|post| post.image_url.is_some()) {
let post_with_image = posts.remove(pos);
posts.insert(0, post_with_image);
}
let index = site_generator::generate_index(entries.clone());
posts.truncate(16);
let mut old_posts = all_posts;
old_posts.retain(|p| !posts.contains(p));
old_posts.shuffle(&mut thread_rng());
let mut archive_posts: Vec<utilities::Post> = Vec::new();
let archive_size = 100;
while (archive_posts.len() < archive_size) && (old_posts.len() > 50) {
let iter_size = archive_size - archive_posts.len();
let mut extracted = old_posts
.drain(0..=(iter_size + 50))
.collect::<Vec<utilities::Post>>();
extracted.par_iter_mut().for_each(utilities::validate);
extracted.retain(|post| post.score != 0);
archive_posts.extend(extracted);
}
archive_posts.truncate(archive_size);
let index = site_generator::generate_index(posts.clone(), archive_posts.clone());
let index_path = Path::new("output/index.html");
DirBuilder::new()
.recursive(true)
@@ -75,7 +93,7 @@ fn main() -> Result<(), Box<dyn Error>> {
Err(e) => log::error!("Failed to write to {}: {}", index_path.display(), e),
}
let feed = site_generator::generate_rss(entries.clone());
let feed = site_generator::generate_rss(posts.clone());
let feed_path = Path::new("output/feed.xml");
DirBuilder::new()
.recursive(true)