1
0
mirror of https://gitlab.com/Anson-Projects/zine.git synced 2025-06-15 21:26:38 +00:00

undo rewrite of main

This commit is contained in:
Anson Biggs 2024-07-27 10:53:11 -06:00
parent 2725245393
commit dbbdc8911f

View File

@ -5,6 +5,7 @@ extern crate reqwest;
use rand::seq::SliceRandom; use rand::seq::SliceRandom;
use rand::thread_rng; use rand::thread_rng;
use std::collections::HashSet;
use std::error::Error; use std::error::Error;
use std::fs::write; use std::fs::write;
use std::fs::DirBuilder; use std::fs::DirBuilder;
@ -15,100 +16,92 @@ mod web_fetchers;
use rayon::prelude::*; use rayon::prelude::*;
fn main() -> Result<(), Box<dyn Error>> { fn main() -> Result<(), Box<dyn Error>> {
setup_logging()?; simple_logger::init_with_level(log::Level::Info).unwrap();
let posts = load_and_process_posts()?; let all_posts = utilities::read_feed("feeds.txt");
let archive_posts = create_archive_posts(&posts)?;
generate_and_write_output(posts, archive_posts)?; let mut posts = all_posts.clone();
Ok(())
}
fn setup_logging() -> Result<(), Box<dyn Error>> {
simple_logger::init_with_level(log::Level::Info)?;
Ok(())
}
fn load_and_process_posts() -> Result<Vec<utilities::Post>, Box<dyn Error>> {
let mut posts = grab_posts()?;
process_posts(&mut posts)?;
Ok(posts)
}
fn grab_posts() -> Result<Vec<utilities::Post>, Box<dyn Error>> {
let mut posts = utilities::read_feed("feeds.txt");
posts.retain(|post| post.score.is_positive()); posts.retain(|post| post.score.is_positive());
utilities::retain_most_recent_based_on_main_url(&mut posts);
// Keep only the first occurence of each main_url
{
let mut seen_urls = HashSet::new();
posts.retain(|post| seen_urls.insert(post.main_url.clone()));
}
let mut featured = utilities::read_feed("featured_feeds.txt"); let mut featured = utilities::read_feed("featured_feeds.txt");
featured.iter_mut().for_each(|post| { // Give featured a small boost in points
post.score = (post.score as f64 * 1.5) as i64; featured = featured
}); .iter_mut()
.map(|post| {
post.score = (post.score as f64 * 1.5) as i64;
post.clone()
})
.collect::<Vec<_>>();
posts.extend(featured); posts.extend(featured);
Ok(posts) posts.par_iter_mut().for_each(utilities::find_image);
} posts.par_iter_mut().for_each(utilities::validate);
fn process_posts(posts: &mut Vec<utilities::Post>) -> Result<(), Box<dyn Error>> {
posts.par_iter_mut().for_each(|post| {
utilities::find_image(post);
utilities::validate(post);
});
posts.sort(); posts.sort();
// Move the post with an image_url to the head of the list
if let Some(pos) = posts.iter().position(|post| post.image_url.is_some()) { if let Some(pos) = posts.iter().position(|post| post.image_url.is_some()) {
let post_with_image = posts.remove(pos); let post_with_image = posts.remove(pos);
posts.insert(0, post_with_image); posts.insert(0, post_with_image);
} }
utilities::retain_most_recent_based_on_main_url(&mut posts);
posts.truncate(16); posts.truncate(16);
Ok(())
}
fn create_archive_posts(posts: &[utilities::Post]) -> Result<Vec<utilities::Post>, Box<dyn Error>> { let mut old_posts = all_posts;
const ARCHIVE_SIZE: usize = 100;
let mut old_posts: Vec<_> = utilities::read_feed("feeds.txt") old_posts.retain(|p| !posts.contains(p));
.into_iter()
.filter(|p| !posts.contains(p))
.collect();
old_posts.shuffle(&mut thread_rng()); old_posts.shuffle(&mut thread_rng());
let mut archive_posts = Vec::new(); let mut archive_posts: Vec<utilities::Post> = Vec::new();
while archive_posts.len() < ARCHIVE_SIZE && !old_posts.is_empty() { let archive_size = 100;
let chunk_size = std::cmp::min(ARCHIVE_SIZE - archive_posts.len() + 50, old_posts.len());
let mut chunk: Vec<_> = old_posts.drain(..chunk_size).collect();
chunk.par_iter_mut().for_each(utilities::validate); while (archive_posts.len() < archive_size) && (old_posts.len() > 50) {
chunk.retain(|post| post.score != 0); let iter_size = archive_size - archive_posts.len();
archive_posts.extend(chunk);
let mut extracted = old_posts
.drain(0..=(iter_size + 50))
.collect::<Vec<utilities::Post>>();
extracted.par_iter_mut().for_each(utilities::validate);
extracted.retain(|post| post.score != 0);
archive_posts.extend(extracted);
} }
archive_posts.truncate(ARCHIVE_SIZE); archive_posts.truncate(archive_size);
Ok(archive_posts)
}
fn generate_and_write_output( let index = site_generator::generate_index(posts.clone(), archive_posts.clone());
posts: Vec<utilities::Post>, let index_path = Path::new("output/index.html");
archive_posts: Vec<utilities::Post>,
) -> Result<(), Box<dyn Error>> {
let index = site_generator::generate_index(posts.clone(), archive_posts);
write_to_file("output/index.html", index.into_string())?;
let feed = site_generator::generate_rss(posts);
write_to_file("output/feed.xml", feed)?;
Ok(())
}
fn write_to_file<P: AsRef<Path>>(path: P, content: String) -> Result<(), Box<dyn Error>> {
let path = path.as_ref();
DirBuilder::new() DirBuilder::new()
.recursive(true) .recursive(true)
.create(path.parent().unwrap())?; .create(index_path.parent().unwrap())
write(path, content)?; .unwrap();
log::info!("Successfully wrote to {}", path.display());
match write(index_path, index.into_string()) {
Ok(_) => log::info!("Successfully wrote to {}", index_path.display()),
Err(e) => log::error!("Failed to write to {}: {}", index_path.display(), e),
}
let feed = site_generator::generate_rss(posts.clone());
let feed_path = Path::new("output/feed.xml");
DirBuilder::new()
.recursive(true)
.create(feed_path.parent().unwrap())
.unwrap();
match write(feed_path, feed) {
Ok(_) => log::info!("Successfully wrote to {}", feed_path.display()),
Err(e) => log::error!("Failed to write to {}: {}", feed_path.display(), e),
}
Ok(()) Ok(())
} }