use rayon::prelude::*; use scraper::{Html, Selector}; use std::collections::HashSet; use std::fs; use std::sync::Mutex; use std::time::Duration; fn read_generated_site() -> String { fs::read_to_string("output/index.html").expect( "Failed to read output/index.html - run `cargo run` first to generate the site", ) } fn extract_links(html: &str) -> Vec { let document = Html::parse_document(html); let selector = Selector::parse("a[href]").unwrap(); document .select(&selector) .filter_map(|el| el.value().attr("href")) .map(String::from) .collect() } fn extract_images(html: &str) -> Vec { let document = Html::parse_document(html); let selector = Selector::parse("img[src]").unwrap(); document .select(&selector) .filter_map(|el| el.value().attr("src")) .map(String::from) .collect() } #[test] fn test_site_was_generated() { assert!( fs::metadata("output/index.html").is_ok(), "output/index.html does not exist - run `cargo run` first" ); assert!( fs::metadata("output/feed.xml").is_ok(), "output/feed.xml does not exist - run `cargo run` first" ); } #[test] fn test_all_links_are_valid_urls() { let html = read_generated_site(); let links = extract_links(&html); assert!(!links.is_empty(), "No links found in generated site"); let mut invalid_links = Vec::new(); for link in &links { // Skip anchor links, mailto, and relative paths (which are valid) if link.starts_with('#') || link.starts_with("mailto:") || link.starts_with('/') || link.starts_with("./") { continue; } if url::Url::parse(link).is_err() { invalid_links.push(link.clone()); } } if !invalid_links.is_empty() { for link in &invalid_links { eprintln!("Invalid URL: {}", link); } panic!( "Found {} invalid URLs in generated site", invalid_links.len() ); } } #[test] fn test_no_duplicate_links_in_main_content() { let html = read_generated_site(); let links = extract_links(&html); // Filter to only article links (external http/https links) let article_links: Vec<&String> = links .iter() .filter(|l| l.starts_with("http://") || l.starts_with("https://")) .collect(); let unique: HashSet<&String> = article_links.iter().cloned().collect(); // Some duplication is expected (e.g., link appears in title and "read more") // but we shouldn't have excessive duplication let duplication_ratio = article_links.len() as f64 / unique.len() as f64; assert!( duplication_ratio < 3.0, "Too much link duplication: {} total links, {} unique (ratio: {:.2})", article_links.len(), unique.len(), duplication_ratio ); } #[test] fn test_no_broken_image_urls() { let html = read_generated_site(); let images = extract_images(&html); let mut invalid_images = Vec::new(); for img in &images { // Skip data URLs and relative paths (which are valid) if img.starts_with("data:") || img.starts_with("/") || img.starts_with("./") { continue; } if url::Url::parse(img).is_err() { invalid_images.push(img.clone()); } } if !invalid_images.is_empty() { for img in &invalid_images { eprintln!("Invalid image URL: {}", img); } panic!( "Found {} invalid image URLs in generated site", invalid_images.len() ); } } #[test] fn test_feed_xml_is_valid() { let feed_content = fs::read_to_string("output/feed.xml").expect("Failed to read output/feed.xml"); // Basic XML structure checks assert!( feed_content.contains("") || feed_content.contains(""), "Feed has no items/entries" ); } #[test] fn test_html_has_required_meta_tags() { let html = read_generated_site(); let document = Html::parse_document(&html); // Check for title let title_selector = Selector::parse("title").unwrap(); assert!( document.select(&title_selector).next().is_some(), "Missing tag" ); // Check for viewport meta (mobile responsiveness) let viewport_selector = Selector::parse("meta[name='viewport']").unwrap(); assert!( document.select(&viewport_selector).next().is_some(), "Missing viewport meta tag" ); } #[test] fn test_all_links_are_reachable() { let html = read_generated_site(); let links = extract_links(&html); // Get unique external links let external_links: Vec<String> = links .into_iter() .filter(|l| l.starts_with("http://") || l.starts_with("https://")) .collect::<HashSet<_>>() .into_iter() .collect(); let total_links = external_links.len(); println!("Checking {} unique external links...", total_links); let broken_links: Mutex<Vec<(String, String)>> = Mutex::new(Vec::new()); // Check all links in parallel external_links.par_iter().for_each(|link| { let client = reqwest::blocking::Client::builder() .timeout(Duration::from_secs(15)) .user_agent("Zine-Link-Checker/1.0") .build() .expect("Failed to create HTTP client"); let is_broken = match client.head(link).send() { Ok(response) => { let status = response.status(); // Accept 2xx, 3xx, and 405 (method not allowed - some servers don't allow HEAD) if !status.is_success() && !status.is_redirection() && status.as_u16() != 405 { // Try GET as fallback (some servers reject HEAD) match client.get(link).send() { Ok(get_response) => { let get_status = get_response.status(); if !get_status.is_success() && !get_status.is_redirection() { Some(get_status.to_string()) } else { None } } Err(e) => Some(e.to_string()), } } else { None } } Err(e) => Some(e.to_string()), }; if let Some(error) = is_broken { broken_links.lock().unwrap().push((link.clone(), error)); } }); let broken = broken_links.into_inner().unwrap(); if !broken.is_empty() { eprintln!("\nBroken links found:"); for (link, error) in &broken { eprintln!(" {} - {}", link, error); } panic!( "Found {} broken links out of {} total", broken.len(), total_links ); } println!("All {} links are reachable!", total_links); } #[test] fn test_no_empty_titles_or_descriptions() { let html = read_generated_site(); let document = Html::parse_document(&html); // Check article titles aren't empty let title_selector = Selector::parse("article h2, article h3, .post-title").unwrap(); let titles: Vec<String> = document .select(&title_selector) .map(|el| el.text().collect::<String>().trim().to_string()) .collect(); let empty_titles: Vec<&String> = titles.iter().filter(|t| t.is_empty()).collect(); assert!( empty_titles.is_empty(), "Found {} empty titles in the generated site", empty_titles.len() ); }