use rayon::prelude::*;
use scraper::{Html, Selector};
use std::collections::HashSet;
use std::fs;
use std::sync::Mutex;
use std::time::Duration;
fn read_generated_site() -> String {
fs::read_to_string("output/index.html").expect(
"Failed to read output/index.html - run `cargo run` first to generate the site",
)
}
fn extract_links(html: &str) -> Vec {
let document = Html::parse_document(html);
let selector = Selector::parse("a[href]").unwrap();
document
.select(&selector)
.filter_map(|el| el.value().attr("href"))
.map(String::from)
.collect()
}
fn extract_images(html: &str) -> Vec {
let document = Html::parse_document(html);
let selector = Selector::parse("img[src]").unwrap();
document
.select(&selector)
.filter_map(|el| el.value().attr("src"))
.map(String::from)
.collect()
}
#[test]
fn test_site_was_generated() {
assert!(
fs::metadata("output/index.html").is_ok(),
"output/index.html does not exist - run `cargo run` first"
);
assert!(
fs::metadata("output/feed.xml").is_ok(),
"output/feed.xml does not exist - run `cargo run` first"
);
}
#[test]
fn test_all_links_are_valid_urls() {
let html = read_generated_site();
let links = extract_links(&html);
assert!(!links.is_empty(), "No links found in generated site");
let mut invalid_links = Vec::new();
for link in &links {
// Skip anchor links, mailto, and relative paths (which are valid)
if link.starts_with('#')
|| link.starts_with("mailto:")
|| link.starts_with('/')
|| link.starts_with("./")
{
continue;
}
if url::Url::parse(link).is_err() {
invalid_links.push(link.clone());
}
}
if !invalid_links.is_empty() {
for link in &invalid_links {
eprintln!("Invalid URL: {}", link);
}
panic!(
"Found {} invalid URLs in generated site",
invalid_links.len()
);
}
}
#[test]
fn test_no_duplicate_links_in_main_content() {
let html = read_generated_site();
let links = extract_links(&html);
// Filter to only article links (external http/https links)
let article_links: Vec<&String> = links
.iter()
.filter(|l| l.starts_with("http://") || l.starts_with("https://"))
.collect();
let unique: HashSet<&String> = article_links.iter().cloned().collect();
// Some duplication is expected (e.g., link appears in title and "read more")
// but we shouldn't have excessive duplication
let duplication_ratio = article_links.len() as f64 / unique.len() as f64;
assert!(
duplication_ratio < 3.0,
"Too much link duplication: {} total links, {} unique (ratio: {:.2})",
article_links.len(),
unique.len(),
duplication_ratio
);
}
#[test]
fn test_no_broken_image_urls() {
let html = read_generated_site();
let images = extract_images(&html);
let mut invalid_images = Vec::new();
for img in &images {
// Skip data URLs and relative paths (which are valid)
if img.starts_with("data:") || img.starts_with("/") || img.starts_with("./") {
continue;
}
if url::Url::parse(img).is_err() {
invalid_images.push(img.clone());
}
}
if !invalid_images.is_empty() {
for img in &invalid_images {
eprintln!("Invalid image URL: {}", img);
}
panic!(
"Found {} invalid image URLs in generated site",
invalid_images.len()
);
}
}
#[test]
fn test_feed_xml_is_valid() {
let feed_content =
fs::read_to_string("output/feed.xml").expect("Failed to read output/feed.xml");
// Basic XML structure checks
assert!(
feed_content.contains("") || feed_content.contains(""),
"Feed has no items/entries"
);
}
#[test]
fn test_html_has_required_meta_tags() {
let html = read_generated_site();
let document = Html::parse_document(&html);
// Check for title
let title_selector = Selector::parse("title").unwrap();
assert!(
document.select(&title_selector).next().is_some(),
"Missing tag"
);
// Check for viewport meta (mobile responsiveness)
let viewport_selector = Selector::parse("meta[name='viewport']").unwrap();
assert!(
document.select(&viewport_selector).next().is_some(),
"Missing viewport meta tag"
);
}
#[test]
fn test_all_links_are_reachable() {
let html = read_generated_site();
let links = extract_links(&html);
// Get unique external links
let external_links: Vec = links
.into_iter()
.filter(|l| l.starts_with("http://") || l.starts_with("https://"))
.collect::>()
.into_iter()
.collect();
let total_links = external_links.len();
println!("Checking {} unique external links...", total_links);
let broken_links: Mutex> = Mutex::new(Vec::new());
// Check all links in parallel
external_links.par_iter().for_each(|link| {
let client = reqwest::blocking::Client::builder()
.timeout(Duration::from_secs(15))
.user_agent("Zine-Link-Checker/1.0")
.build()
.expect("Failed to create HTTP client");
let is_broken = match client.head(link).send() {
Ok(response) => {
let status = response.status();
// Accept 2xx, 3xx, and 405 (method not allowed - some servers don't allow HEAD)
if !status.is_success() && !status.is_redirection() && status.as_u16() != 405 {
// Try GET as fallback (some servers reject HEAD)
match client.get(link).send() {
Ok(get_response) => {
let get_status = get_response.status();
if !get_status.is_success() && !get_status.is_redirection() {
Some(get_status.to_string())
} else {
None
}
}
Err(e) => Some(e.to_string()),
}
} else {
None
}
}
Err(e) => Some(e.to_string()),
};
if let Some(error) = is_broken {
broken_links.lock().unwrap().push((link.clone(), error));
}
});
let broken = broken_links.into_inner().unwrap();
if !broken.is_empty() {
eprintln!("\nBroken links found:");
for (link, error) in &broken {
eprintln!(" {} - {}", link, error);
}
panic!(
"Found {} broken links out of {} total",
broken.len(),
total_links
);
}
println!("All {} links are reachable!", total_links);
}
#[test]
fn test_no_empty_titles_or_descriptions() {
let html = read_generated_site();
let document = Html::parse_document(&html);
// Check article titles aren't empty
let title_selector = Selector::parse("article h2, article h3, .post-title").unwrap();
let titles: Vec = document
.select(&title_selector)
.map(|el| el.text().collect::().trim().to_string())
.collect();
let empty_titles: Vec<&String> = titles.iter().filter(|t| t.is_empty()).collect();
assert!(
empty_titles.is_empty(),
"Found {} empty titles in the generated site",
empty_titles.len()
);
}