1
0
mirror of https://gitlab.com/Anson-Projects/zine.git synced 2025-06-16 05:26:40 +00:00

Use Custom Struct

This commit is contained in:
Anson Biggs 2024-04-12 05:45:43 +00:00
parent 841af6aa41
commit f2ff3e3640
9 changed files with 630 additions and 400 deletions

View File

@ -1,4 +1,6 @@
stages: stages:
- build
- lint
- test - test
- build_site - build_site
- deploy - deploy
@ -13,14 +15,26 @@ cache:
- target/ - target/
- cargo/ - cargo/
build:
image: rust:latest
stage: build
script:
- cargo build
lint:
image: rust:latest
stage: lint
script:
- rustup component add clippy
- cargo clippy --all-targets -- -D warnings
test: test:
image: rust:latest image: rust:latest
stage: test stage: test
script: script:
- cargo test --verbose - cargo test --verbose
rules: rules:
- if: "$CI_COMMIT_BRANCH != $CI_DEFAULT_BRANCH" - if: "$CI_COMMIT_BRANCH != $CI_DEFAULT_BRANCH"
build_site: build_site:
image: rust:latest image: rust:latest
@ -44,8 +58,6 @@ pages:
rules: rules:
- if: "$CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH" - if: "$CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH"
workflow: workflow:
name: $CI_COMMIT_REF_NAME name: $CI_COMMIT_REF_NAME
rules: rules:
@ -54,4 +66,4 @@ workflow:
CI_COMMIT_REF_NAME: $SCHEDULED_PIPELINE_NAME CI_COMMIT_REF_NAME: $SCHEDULED_PIPELINE_NAME
- if: $SCHEDULED_BUILD_PIPELINE != 'true' - if: $SCHEDULED_BUILD_PIPELINE != 'true'
variables: variables:
CI_COMMIT_REF_NAME: $DEFAULT_PIPELINE_NAME CI_COMMIT_REF_NAME: $DEFAULT_PIPELINE_NAME

623
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -1,7 +1,7 @@
[package] [package]
name = "aggregate_rss_zine" name = "aggregate_rss_zine"
description = "Aggregate feed of RSS feeds I enjoy in the form of a newspaper." description = "Aggregate feed of RSS feeds I enjoy in the form of a newspaper."
version = "0.2.0" version = "0.3.0"
edition = "2021" edition = "2021"
authors = ["Anson Biggs"] authors = ["Anson Biggs"]
homepage = "https://zine.ansonbiggs.com" homepage = "https://zine.ansonbiggs.com"
@ -10,11 +10,14 @@ license = "MIT"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
feed-rs = "1.4.0" feed-rs = "1.4"
reqwest = { version = "0.11.24", features = ["blocking"] } reqwest = { version = "0.12", features = ["blocking"] }
maud = "0.26.0" maud = "0.26"
chrono = "0.4.33" chrono = "0.4"
scraper = "0.19.0" scraper = "0.19"
rayon = "1.8.1" rayon = "1.8"
simple_logger = "4.3.3" simple_logger = "4.3"
log = "0.4" log = "0.4"
[dev-dependencies]
clippy = "0.0.302"

View File

@ -11,10 +11,10 @@ https://blog.andymatuschak.org/rss
https://blog.benjojo.co.uk/rss.xml https://blog.benjojo.co.uk/rss.xml
https://blog.codinghorror.com/rss/ https://blog.codinghorror.com/rss/
https://blog.frost.kiwi/feed.xml https://blog.frost.kiwi/feed.xml
https://c.pgdm.ch/atom.xml
https://calebporzio.com/feed https://calebporzio.com/feed
https://chrisnicholas.dev/rss.xml https://chrisnicholas.dev/rss.xml
https://christianselig.com/index.xml https://christianselig.com/index.xml
https://ciechanow.ski/atom.xml
https://danluu.com/atom.xml https://danluu.com/atom.xml
https://darekkay.com/atom.xml https://darekkay.com/atom.xml
https://decomposition.al/atom.xml https://decomposition.al/atom.xml
@ -47,6 +47,7 @@ https://steveklabnik.com/feed.xml
https://taylor.town/feed.xml https://taylor.town/feed.xml
https://vickiboykis.com/index.xml https://vickiboykis.com/index.xml
https://vitalik.eth.limo/feed.xml https://vitalik.eth.limo/feed.xml
https://www.bitsaboutmoney.com/archive/rss/
https://www.construction-physics.com/feed https://www.construction-physics.com/feed
https://www.elidedbranches.com/feeds/posts/default https://www.elidedbranches.com/feeds/posts/default
https://www.jeffgeerling.com/blog.xml https://www.jeffgeerling.com/blog.xml

View File

@ -5,76 +5,28 @@ extern crate reqwest;
use chrono::DateTime; use chrono::DateTime;
use chrono::Utc; use chrono::Utc;
use feed_rs::model::Entry;
use maud::{html, Markup}; use maud::{html, Markup};
use std::env;
use crate::utilities; use crate::utilities;
use crate::web_fetchers;
fn create_featured_card(entry: &Entry) -> Markup {
let title = entry
.title
.as_ref()
.map_or_else(|| "".to_string(), |t| t.content.clone());
let link = entry.links.first().unwrap();
let lang = link.clone().href_lang.unwrap_or("en".to_string());
if lang != "en" {
log::warn!("Non english! {} {}", lang, link.href);
}
let mut image_url = entry
.media
.first()
.and_then(|m| m.content.first())
.and_then(|c| c.url.as_ref().map(|u| u.to_string()))
.unwrap_or_default();
// Fallback to fetching social image if direct extraction didn't work
if image_url.is_empty() {
log::info!(
"Falling back to searching for a social image for {}",
link.href
);
image_url = web_fetchers::fetch_social_image(link.href.as_str()).unwrap_or_default();
}
let description = entry.content.as_ref().map_or_else(
|| {
entry
.summary
.as_ref()
.map_or_else(|| "".to_string(), |summary| summary.content.clone())
},
|content| {
content
.body
.as_ref()
.map_or_else(|| "".to_string(), |body| body.clone())
},
);
let cleaned_description = utilities::strip_html_tags(&description);
let truncated_description = utilities::truncate_description(&cleaned_description, 500);
let main_url = utilities::get_root_url(link.href.as_str());
fn create_featured_card(entry: &utilities::Post) -> Markup {
html! { html! {
article class="featured" { article class="featured" {
header { header {
img src=(image_url) alt="Entry image"; @if entry.image_url.is_some() {
img src=(entry.image_url.as_ref().unwrap()) alt="Featured image";
}
hgroup { hgroup {
h2 { (title) } h2 { (entry.title) }
a href=(format!("http://{}", main_url)) { (main_url) } a href=(format!("http://{}", entry.main_url)) { (entry.main_url) }
} }
} }
body { body {
p { (truncated_description) } p { (entry.truncated_description) }
} }
footer { footer {
a class="grid" href=(link.href) style="--pico-text-decoration: none;" { a class="grid" href=(entry.link) style="--pico-text-decoration: none;" {
button class="outline primary" { "Read Featured Post" } button class="outline primary" { "Read Featured Post" }
} }
} }
@ -82,75 +34,24 @@ fn create_featured_card(entry: &Entry) -> Markup {
} }
} }
fn create_post_card(entry: &Entry) -> Markup { fn create_post_card(entry: &utilities::Post) -> Markup {
let title = entry
.title
.as_ref()
.map_or_else(|| "".to_string(), |t| t.content.clone());
let link = entry.links.first().unwrap();
let lang = link.clone().href_lang.unwrap_or("en".to_string());
if lang != "en" {
log::warn!("Non english! {} {}", lang, link.href);
}
let mut image_url = entry
.media
.first()
.and_then(|m| m.content.first())
.and_then(|c| c.url.as_ref().map(|u| u.to_string()))
.unwrap_or_default();
// Fallback to fetching social image if direct extraction didn't work
if image_url.is_empty() {
log::info!(
"Falling back to searching for a social image for {}",
link.href
);
image_url = web_fetchers::fetch_social_image(link.href.as_str()).unwrap_or_default();
}
if image_url.is_empty() {
log::warn!("No image could be gathered for {}", link.href);
}
let description = entry.content.as_ref().map_or_else(
|| {
entry
.summary
.as_ref()
.map_or_else(|| "".to_string(), |summary| summary.content.clone())
},
|content| {
content
.body
.as_ref()
.map_or_else(|| "".to_string(), |body| body.clone())
},
);
let cleaned_description = utilities::strip_html_tags(&description);
let truncated_description = utilities::truncate_description(&cleaned_description, 500);
let main_url = utilities::get_root_url(link.href.as_str());
html! { html! {
article { article {
header { header {
hgroup { hgroup {
h2 { (title) } h2 { (entry.title) }
a href=(format!("http://{}", main_url)) { (main_url) } a href=(format!("http://{}", entry.main_url)) { (entry.main_url) }
} }
} }
body { body {
@if !image_url.is_empty() { @if entry.image_url.is_some() {
img src=(image_url) alt="Entry image"; img src=(entry.image_url.as_ref().unwrap()) alt="Entry image";
p; p;
} }
p { (truncated_description) } p { (entry.truncated_description) }
} }
footer { footer {
a class="grid" href=(link.href) style="--pico-text-decoration: none;" { a class="grid" href=(entry.link) style="--pico-text-decoration: none;" {
button class="outline secondary" { "Read Post" } button class="outline secondary" { "Read Post" }
} }
} }
@ -201,16 +102,15 @@ fn generate_header() -> Markup {
} }
} }
fn about_modal(entries: Vec<Entry>) -> Markup { fn about_modal(entries: Vec<utilities::Post>) -> Markup {
// Get link for each entry, which is a blog post then, // Get link for each entry, which is a blog post then,
// convert it to a url to the main page of the blog // convert it to a url to the main page of the blog
let mut links = entries let mut links = entries
.iter() .iter()
.map(|entry| entry.links.first().unwrap().href.as_str()) .map(|entry| entry.main_url.as_str())
.map(utilities::get_root_url) .collect::<std::collections::HashSet<_>>()
.collect::<std::collections::HashSet<&str>>()
.into_iter() .into_iter()
.collect::<Vec<&str>>(); .collect::<Vec<_>>();
// Alphabetical to be fair to everytone :) // Alphabetical to be fair to everytone :)
links.sort(); links.sort();
@ -234,7 +134,7 @@ fn about_modal(entries: Vec<Entry>) -> Markup {
} }
ul { ul {
@for link in links { @for link in links {
li {a href=("http://".to_owned() + link) {(link)}} li {a href=(format!("{}{}","http://".to_owned() , link)) {(link)}}
} }
} }
p { p {
@ -246,24 +146,14 @@ fn about_modal(entries: Vec<Entry>) -> Markup {
} }
} }
pub fn generate_index(mut entries: Vec<Entry>, featured: Entry) -> Markup { pub fn generate_index(mut entries: Vec<utilities::Post>) -> Markup {
let running_in_gitlab = env::var("CI").map(|val| val == "true").unwrap_or(false); let featured = entries.first().unwrap().clone();
entries.remove(0);
if running_in_gitlab { log::info!(
log::info!("Building for deployment"); "Featured article: {}, img: {:?}",
entries.truncate(30); featured.link,
} else { featured.image_url
log::info!("Building for development"); );
entries.truncate(6);
}
let featured_card: maud::PreEscaped<String>;
if (utilities::get_entry_date(&featured)) > (chrono::Utc::now() - chrono::Duration::days(3)) {
featured_card = create_featured_card(&featured);
entries.retain(|entry| entry != &featured);
} else {
featured_card = html! {};
}
html! { html! {
(maud::DOCTYPE) (maud::DOCTYPE)
@ -282,7 +172,7 @@ pub fn generate_index(mut entries: Vec<Entry>, featured: Entry) -> Markup {
} }
body { main class="container" { body { main class="container" {
{(generate_header())} {(generate_header())}
(featured_card) {(create_featured_card(&featured))}
div class="grid" { div class="grid" {
@for column_entries in utilities::group_by_nth(&entries, 3) { @for column_entries in utilities::group_by_nth(&entries, 3) {
div { div {

View File

@ -7,24 +7,50 @@ use std::error::Error;
use std::fs::write; use std::fs::write;
use std::fs::DirBuilder; use std::fs::DirBuilder;
use std::path::Path; use std::path::Path;
use utilities::read_feed;
mod index_generator; mod index_generator;
mod utilities; mod utilities;
mod web_fetchers; mod web_fetchers;
use rayon::prelude::*;
fn main() -> Result<(), Box<dyn Error>> { fn main() -> Result<(), Box<dyn Error>> {
simple_logger::init_with_level(log::Level::Info).unwrap(); simple_logger::init_with_level(log::Level::Info).unwrap();
let featured = read_feed("featured_feeds.txt").first().unwrap().clone(); let mut featured = utilities::read_feed("featured_feeds.txt");
let entries = read_feed("feeds.txt");
log::info!( featured = featured
"Featured article: {}", .iter_mut()
entries[0].links.first().unwrap().href.as_str() .map(|post| {
); post.score += 60 * 24 * 2;
post.clone()
})
.collect::<Vec<_>>();
let index = index_generator::generate_index(entries, featured); let mut entries = utilities::read_feed("feeds.txt");
entries.extend(featured);
entries.retain(|entry| entry.score.is_positive());
entries.par_iter_mut().for_each(|entry| {
if entry.image_url.is_some() {
entry.score += 300;
} else {
match web_fetchers::fetch_social_image(entry.link.clone()) {
Ok(social_image_url) => {
entry.image_url = Some(social_image_url);
}
Err(error) => {
log::info!("{}: {}", error, entry.link.clone());
entry.score += -600;
}
}
}
});
entries.retain(|entry| entry.score.is_positive());
entries.sort();
let index = index_generator::generate_index(entries);
let output_path = Path::new("output/index.html"); let output_path = Path::new("output/index.html");
DirBuilder::new() DirBuilder::new()

View File

@ -1,11 +1,95 @@
use crate::web_fetchers; use crate::web_fetchers;
use chrono::{DateTime, Duration, Utc};
use feed_rs::model::Entry; use feed_rs::model::Entry;
use rayon::prelude::*; use rayon::prelude::*;
use scraper::{Html, Selector}; use scraper::{Html, Selector};
use std::cmp::Reverse;
use std::cmp::Ordering;
use std::fs; use std::fs;
pub fn read_feed(path: &str) -> Vec<Entry> { #[derive(Clone, PartialEq, Eq)]
pub struct Post {
pub title: String,
pub link: String,
pub date: DateTime<Utc>,
pub lang: String,
pub image_url: Option<String>,
pub truncated_description: String,
pub main_url: String,
pub score: i64,
}
impl Ord for Post {
fn cmp(&self, other: &Self) -> Ordering {
self.score.partial_cmp(&other.score).unwrap().reverse()
}
}
impl PartialOrd for Post {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl Post {
fn from_entry(entry: &feed_rs::model::Entry) -> Self {
let title = entry
.title
.as_ref()
.map_or_else(|| "".to_string(), |t| t.content.clone());
let link = entry.links.first().unwrap();
let date = get_entry_date(entry);
let lang = link.clone().href_lang.unwrap_or("en".to_string());
if lang != "en" {
log::warn!("Non english! {} {}", lang, link.href);
}
let image_url = entry
.media
.first()
.and_then(|m| m.content.first())
.and_then(|c| c.url.as_ref().map(|u| u.to_string()));
let description = entry.content.as_ref().map_or_else(
|| {
entry
.summary
.as_ref()
.map_or_else(|| "".to_string(), |summary| summary.content.clone())
},
|content| {
content
.body
.as_ref()
.map_or_else(|| "".to_string(), |body| body.clone())
},
);
let cleaned_description = strip_html_tags(&description);
let truncated_description = truncate_description(&cleaned_description, 500);
let main_url = get_root_url(link.href.as_str());
let score = (date - (Utc::now() - Duration::days(14))).num_minutes();
Post {
title,
link: link.href.clone(),
date,
lang,
image_url,
truncated_description,
main_url,
score,
}
}
}
pub fn read_feed(path: &str) -> Vec<Post> {
let binding = fs::read_to_string(path).unwrap(); let binding = fs::read_to_string(path).unwrap();
let feed_urls: Vec<&str> = binding.lines().collect(); let feed_urls: Vec<&str> = binding.lines().collect();
@ -27,9 +111,12 @@ pub fn read_feed(path: &str) -> Vec<Entry> {
} }
entries.retain(validate_entry_date); entries.retain(validate_entry_date);
entries.sort_by_key(|entry| Reverse(get_entry_date(entry)));
entries entries
.par_iter()
.map(Post::from_entry)
.filter(|entry| entry.date < chrono::Utc::now())
.collect::<Vec<_>>()
} }
fn validate_entry_date(entry: &Entry) -> bool { fn validate_entry_date(entry: &Entry) -> bool {
@ -42,17 +129,17 @@ fn validate_entry_date(entry: &Entry) -> bool {
} }
} }
pub fn get_entry_date(entry: &Entry) -> chrono::DateTime<chrono::Utc> { pub fn get_entry_date(entry: &Entry) -> DateTime<Utc> {
entry.published.unwrap_or(entry.updated.unwrap_or_default()) entry.published.unwrap_or(entry.updated.unwrap_or_default())
} }
pub fn get_root_url(input_url: &str) -> &str { pub fn get_root_url(input_url: &str) -> String {
let mut url = input_url; let mut url = input_url;
url = url.strip_prefix("https://").unwrap_or(url); url = url.strip_prefix("https://").unwrap_or(url);
url = url.strip_prefix("http://").unwrap_or(url); url = url.strip_prefix("http://").unwrap_or(url);
url.split_once('/').unwrap().0 url.split_once('/').unwrap().0.to_string()
} }
pub fn truncate_description(description: &str, max_length: usize) -> String { pub fn truncate_description(description: &str, max_length: usize) -> String {

View File

@ -17,7 +17,7 @@ pub fn fetch_feed(url: &str) -> Result<Vec<Entry>, Box<dyn Error>> {
Ok(feed.entries) Ok(feed.entries)
} }
pub fn fetch_social_image(url: &str) -> Result<String, Box<dyn std::error::Error>> { pub fn fetch_social_image(url: String) -> Result<String, Box<dyn std::error::Error>> {
let html = reqwest::blocking::get(url)?.text()?; let html = reqwest::blocking::get(url)?.text()?;
let document = Html::parse_document(&html); let document = Html::parse_document(&html);
let selector = Selector::parse("meta[property=\"og:image\"]").unwrap(); let selector = Selector::parse("meta[property=\"og:image\"]").unwrap();
@ -25,8 +25,11 @@ pub fn fetch_social_image(url: &str) -> Result<String, Box<dyn std::error::Error
let image_url = document let image_url = document
.select(&selector) .select(&selector)
.next() .next()
.and_then(|element| element.value().attr("content")) .and_then(|element| element.value().attr("content"));
.unwrap_or("");
Ok(image_url.to_string()) if let Some(url) = image_url {
Ok(url.to_string())
} else {
Err("No social image found".into())
}
} }

View File

@ -1,39 +1,15 @@
use feed_rs::parser; use std::collections::HashSet;
use reqwest::blocking::get;
use std::fs; use std::fs;
// Function to read URLs from a file // Function to read URLs from a file
fn read_feed() -> Vec<String> { fn read_feed(path: &str) -> Vec<String> {
let binding = fs::read_to_string("feeds.txt").unwrap(); let binding = fs::read_to_string(path).unwrap();
binding.lines().map(|s| s.to_owned()).collect() binding.lines().map(|s| s.to_owned()).collect()
} }
// Function to fetch and parse a feed, returning true if successful
fn fetch_and_parse_feed(url: &str) -> bool {
let content = match get(url) {
Ok(response) => response.text().unwrap_or_default(),
Err(_) => return false,
};
parser::parse(content.as_bytes()).is_ok()
}
#[test]
fn test_that_urls_point_to_valid_feeds() {
let urls = read_feed();
for url in urls {
assert!(
fetch_and_parse_feed(&url),
"Feed at URL failed validation: {}",
url
);
}
}
#[test] #[test]
fn test_if_feeds_are_in_alphabetical_order() { fn test_if_feeds_are_in_alphabetical_order() {
let mut urls = read_feed(); let mut urls = read_feed("feeds.txt");
if !urls.windows(2).all(|w| w[0] < w[1]) { if !urls.windows(2).all(|w| w[0] < w[1]) {
println!("Sorted feeds.txt:"); println!("Sorted feeds.txt:");
@ -46,3 +22,10 @@ fn test_if_feeds_are_in_alphabetical_order() {
panic!("feeds.txt was not sorted!") panic!("feeds.txt was not sorted!")
} }
} }
#[test]
fn test_if_feeds_lists_have_overlapping_feed() {
let set1: HashSet<_> = read_feed("feeds.txt").into_iter().collect();
let set2: HashSet<_> = read_feed("featured_feeds.txt").into_iter().collect();
assert!(set1.is_disjoint(&set2));
}