diff --git a/src/main.rs b/src/main.rs index c2d27d5..49db38c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -33,12 +33,25 @@ fn main() -> Result<(), Box> { entries.retain(|entry| entry.score.is_positive()); entries.par_iter_mut().for_each(|entry| { - if entry.image_url.is_some() { - entry.score += 1440; + if let Some(image_url) = &entry.image_url { + match web_fetchers::is_valid_image_url(image_url) { + Ok(true) => { + entry.score += 1440; + } + _ => { + entry.image_url = None; + entry.score += -1440; + } + } } else { match web_fetchers::fetch_social_image(entry.link.clone()) { Ok(social_image_url) => { - entry.image_url = Some(social_image_url); + if web_fetchers::is_valid_image_url(&social_image_url).unwrap_or(false) { + entry.image_url = Some(social_image_url); + entry.score += 1440; + } else { + entry.score += -1440; + } } Err(error) => { log::info!("{}: {}", error, entry.link.clone()); @@ -47,7 +60,6 @@ fn main() -> Result<(), Box> { } } }); - entries.retain(|entry| entry.score.is_positive()); // Count occurences of main urls @@ -66,6 +78,18 @@ fn main() -> Result<(), Box> { // Remove bottom 10% from list entries.truncate(entries.len() - (entries.len() as f64 * 0.1).ceil() as usize); + // Make sure first entry has an image since it is used as the featured post + let mut max_iter = 0; + while entries.first().unwrap().image_url.is_none() { + entries[0].score += -100; + entries.sort(); + + max_iter += 1; + if max_iter > 10000 { + break; + } + } + let index = site_generator::generate_index(entries.clone()); let index_path = Path::new("output/index.html"); DirBuilder::new() diff --git a/src/web_fetchers.rs b/src/web_fetchers.rs index 03e3d85..a7cf0ff 100644 --- a/src/web_fetchers.rs +++ b/src/web_fetchers.rs @@ -33,3 +33,16 @@ pub fn fetch_social_image(url: String) -> Result Result> { + let client = reqwest::blocking::Client::new(); + let response = client.head(url).send()?; + + let status = response.status(); + let content_type = response.headers().get(reqwest::header::CONTENT_TYPE); + + Ok(status.is_success() + && content_type.map_or(false, |ct| { + ct.to_str().map_or(false, |s| s.starts_with("image/")) + })) +}