diff --git a/src/main.rs b/src/main.rs index 352c69d..2066a48 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,6 +5,7 @@ extern crate reqwest; use rand::seq::SliceRandom; use rand::thread_rng; +use std::collections::HashSet; use std::error::Error; use std::fs::write; use std::fs::DirBuilder; @@ -15,100 +16,92 @@ mod web_fetchers; use rayon::prelude::*; fn main() -> Result<(), Box> { - setup_logging()?; + simple_logger::init_with_level(log::Level::Info).unwrap(); - let posts = load_and_process_posts()?; - let archive_posts = create_archive_posts(&posts)?; + let all_posts = utilities::read_feed("feeds.txt"); - generate_and_write_output(posts, archive_posts)?; - - Ok(()) -} - -fn setup_logging() -> Result<(), Box> { - simple_logger::init_with_level(log::Level::Info)?; - Ok(()) -} - -fn load_and_process_posts() -> Result, Box> { - let mut posts = grab_posts()?; - process_posts(&mut posts)?; - Ok(posts) -} - -fn grab_posts() -> Result, Box> { - let mut posts = utilities::read_feed("feeds.txt"); + let mut posts = all_posts.clone(); posts.retain(|post| post.score.is_positive()); - utilities::retain_most_recent_based_on_main_url(&mut posts); + + // Keep only the first occurence of each main_url + { + let mut seen_urls = HashSet::new(); + posts.retain(|post| seen_urls.insert(post.main_url.clone())); + } let mut featured = utilities::read_feed("featured_feeds.txt"); - featured.iter_mut().for_each(|post| { - post.score = (post.score as f64 * 1.5) as i64; - }); + // Give featured a small boost in points + featured = featured + .iter_mut() + .map(|post| { + post.score = (post.score as f64 * 1.5) as i64; + post.clone() + }) + .collect::>(); + posts.extend(featured); - Ok(posts) -} - -fn process_posts(posts: &mut Vec) -> Result<(), Box> { - posts.par_iter_mut().for_each(|post| { - utilities::find_image(post); - utilities::validate(post); - }); + posts.par_iter_mut().for_each(utilities::find_image); + posts.par_iter_mut().for_each(utilities::validate); posts.sort(); + // Move the post with an image_url to the head of the list if let Some(pos) = posts.iter().position(|post| post.image_url.is_some()) { let post_with_image = posts.remove(pos); posts.insert(0, post_with_image); } + utilities::retain_most_recent_based_on_main_url(&mut posts); + posts.truncate(16); - Ok(()) -} -fn create_archive_posts(posts: &[utilities::Post]) -> Result, Box> { - const ARCHIVE_SIZE: usize = 100; + let mut old_posts = all_posts; - let mut old_posts: Vec<_> = utilities::read_feed("feeds.txt") - .into_iter() - .filter(|p| !posts.contains(p)) - .collect(); + old_posts.retain(|p| !posts.contains(p)); old_posts.shuffle(&mut thread_rng()); - let mut archive_posts = Vec::new(); - while archive_posts.len() < ARCHIVE_SIZE && !old_posts.is_empty() { - let chunk_size = std::cmp::min(ARCHIVE_SIZE - archive_posts.len() + 50, old_posts.len()); - let mut chunk: Vec<_> = old_posts.drain(..chunk_size).collect(); + let mut archive_posts: Vec = Vec::new(); + let archive_size = 100; - chunk.par_iter_mut().for_each(utilities::validate); - chunk.retain(|post| post.score != 0); - archive_posts.extend(chunk); + while (archive_posts.len() < archive_size) && (old_posts.len() > 50) { + let iter_size = archive_size - archive_posts.len(); + + let mut extracted = old_posts + .drain(0..=(iter_size + 50)) + .collect::>(); + + extracted.par_iter_mut().for_each(utilities::validate); + extracted.retain(|post| post.score != 0); + + archive_posts.extend(extracted); } - archive_posts.truncate(ARCHIVE_SIZE); - Ok(archive_posts) -} + archive_posts.truncate(archive_size); -fn generate_and_write_output( - posts: Vec, - archive_posts: Vec, -) -> Result<(), Box> { - let index = site_generator::generate_index(posts.clone(), archive_posts); - write_to_file("output/index.html", index.into_string())?; - - let feed = site_generator::generate_rss(posts); - write_to_file("output/feed.xml", feed)?; - - Ok(()) -} - -fn write_to_file>(path: P, content: String) -> Result<(), Box> { - let path = path.as_ref(); + let index = site_generator::generate_index(posts.clone(), archive_posts.clone()); + let index_path = Path::new("output/index.html"); DirBuilder::new() .recursive(true) - .create(path.parent().unwrap())?; - write(path, content)?; - log::info!("Successfully wrote to {}", path.display()); + .create(index_path.parent().unwrap()) + .unwrap(); + + match write(index_path, index.into_string()) { + Ok(_) => log::info!("Successfully wrote to {}", index_path.display()), + Err(e) => log::error!("Failed to write to {}: {}", index_path.display(), e), + } + + let feed = site_generator::generate_rss(posts.clone()); + let feed_path = Path::new("output/feed.xml"); + DirBuilder::new() + .recursive(true) + .create(feed_path.parent().unwrap()) + .unwrap(); + + match write(feed_path, feed) { + Ok(_) => log::info!("Successfully wrote to {}", feed_path.display()), + Err(e) => log::error!("Failed to write to {}: {}", feed_path.display(), e), + } + Ok(()) }