mirror of
https://gitlab.com/Anson-Projects/zine.git
synced 2025-06-16 05:26:40 +00:00
Use Custom Struct
This commit is contained in:
parent
841af6aa41
commit
f2ff3e3640
@ -1,4 +1,6 @@
|
|||||||
stages:
|
stages:
|
||||||
|
- build
|
||||||
|
- lint
|
||||||
- test
|
- test
|
||||||
- build_site
|
- build_site
|
||||||
- deploy
|
- deploy
|
||||||
@ -13,14 +15,26 @@ cache:
|
|||||||
- target/
|
- target/
|
||||||
- cargo/
|
- cargo/
|
||||||
|
|
||||||
|
build:
|
||||||
|
image: rust:latest
|
||||||
|
stage: build
|
||||||
|
script:
|
||||||
|
- cargo build
|
||||||
|
|
||||||
|
lint:
|
||||||
|
image: rust:latest
|
||||||
|
stage: lint
|
||||||
|
script:
|
||||||
|
- rustup component add clippy
|
||||||
|
- cargo clippy --all-targets -- -D warnings
|
||||||
|
|
||||||
test:
|
test:
|
||||||
image: rust:latest
|
image: rust:latest
|
||||||
stage: test
|
stage: test
|
||||||
script:
|
script:
|
||||||
- cargo test --verbose
|
- cargo test --verbose
|
||||||
rules:
|
rules:
|
||||||
- if: "$CI_COMMIT_BRANCH != $CI_DEFAULT_BRANCH"
|
- if: "$CI_COMMIT_BRANCH != $CI_DEFAULT_BRANCH"
|
||||||
|
|
||||||
|
|
||||||
build_site:
|
build_site:
|
||||||
image: rust:latest
|
image: rust:latest
|
||||||
@ -44,8 +58,6 @@ pages:
|
|||||||
rules:
|
rules:
|
||||||
- if: "$CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH"
|
- if: "$CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
workflow:
|
workflow:
|
||||||
name: $CI_COMMIT_REF_NAME
|
name: $CI_COMMIT_REF_NAME
|
||||||
rules:
|
rules:
|
||||||
@ -54,4 +66,4 @@ workflow:
|
|||||||
CI_COMMIT_REF_NAME: $SCHEDULED_PIPELINE_NAME
|
CI_COMMIT_REF_NAME: $SCHEDULED_PIPELINE_NAME
|
||||||
- if: $SCHEDULED_BUILD_PIPELINE != 'true'
|
- if: $SCHEDULED_BUILD_PIPELINE != 'true'
|
||||||
variables:
|
variables:
|
||||||
CI_COMMIT_REF_NAME: $DEFAULT_PIPELINE_NAME
|
CI_COMMIT_REF_NAME: $DEFAULT_PIPELINE_NAME
|
||||||
|
623
Cargo.lock
generated
623
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
19
Cargo.toml
19
Cargo.toml
@ -1,7 +1,7 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "aggregate_rss_zine"
|
name = "aggregate_rss_zine"
|
||||||
description = "Aggregate feed of RSS feeds I enjoy in the form of a newspaper."
|
description = "Aggregate feed of RSS feeds I enjoy in the form of a newspaper."
|
||||||
version = "0.2.0"
|
version = "0.3.0"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
authors = ["Anson Biggs"]
|
authors = ["Anson Biggs"]
|
||||||
homepage = "https://zine.ansonbiggs.com"
|
homepage = "https://zine.ansonbiggs.com"
|
||||||
@ -10,11 +10,14 @@ license = "MIT"
|
|||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
feed-rs = "1.4.0"
|
feed-rs = "1.4"
|
||||||
reqwest = { version = "0.11.24", features = ["blocking"] }
|
reqwest = { version = "0.12", features = ["blocking"] }
|
||||||
maud = "0.26.0"
|
maud = "0.26"
|
||||||
chrono = "0.4.33"
|
chrono = "0.4"
|
||||||
scraper = "0.19.0"
|
scraper = "0.19"
|
||||||
rayon = "1.8.1"
|
rayon = "1.8"
|
||||||
simple_logger = "4.3.3"
|
simple_logger = "4.3"
|
||||||
log = "0.4"
|
log = "0.4"
|
||||||
|
|
||||||
|
[dev-dependencies]
|
||||||
|
clippy = "0.0.302"
|
||||||
|
@ -11,10 +11,10 @@ https://blog.andymatuschak.org/rss
|
|||||||
https://blog.benjojo.co.uk/rss.xml
|
https://blog.benjojo.co.uk/rss.xml
|
||||||
https://blog.codinghorror.com/rss/
|
https://blog.codinghorror.com/rss/
|
||||||
https://blog.frost.kiwi/feed.xml
|
https://blog.frost.kiwi/feed.xml
|
||||||
|
https://c.pgdm.ch/atom.xml
|
||||||
https://calebporzio.com/feed
|
https://calebporzio.com/feed
|
||||||
https://chrisnicholas.dev/rss.xml
|
https://chrisnicholas.dev/rss.xml
|
||||||
https://christianselig.com/index.xml
|
https://christianselig.com/index.xml
|
||||||
https://ciechanow.ski/atom.xml
|
|
||||||
https://danluu.com/atom.xml
|
https://danluu.com/atom.xml
|
||||||
https://darekkay.com/atom.xml
|
https://darekkay.com/atom.xml
|
||||||
https://decomposition.al/atom.xml
|
https://decomposition.al/atom.xml
|
||||||
@ -47,6 +47,7 @@ https://steveklabnik.com/feed.xml
|
|||||||
https://taylor.town/feed.xml
|
https://taylor.town/feed.xml
|
||||||
https://vickiboykis.com/index.xml
|
https://vickiboykis.com/index.xml
|
||||||
https://vitalik.eth.limo/feed.xml
|
https://vitalik.eth.limo/feed.xml
|
||||||
|
https://www.bitsaboutmoney.com/archive/rss/
|
||||||
https://www.construction-physics.com/feed
|
https://www.construction-physics.com/feed
|
||||||
https://www.elidedbranches.com/feeds/posts/default
|
https://www.elidedbranches.com/feeds/posts/default
|
||||||
https://www.jeffgeerling.com/blog.xml
|
https://www.jeffgeerling.com/blog.xml
|
||||||
|
@ -5,76 +5,28 @@ extern crate reqwest;
|
|||||||
|
|
||||||
use chrono::DateTime;
|
use chrono::DateTime;
|
||||||
use chrono::Utc;
|
use chrono::Utc;
|
||||||
use feed_rs::model::Entry;
|
|
||||||
use maud::{html, Markup};
|
use maud::{html, Markup};
|
||||||
use std::env;
|
|
||||||
|
|
||||||
use crate::utilities;
|
use crate::utilities;
|
||||||
use crate::web_fetchers;
|
|
||||||
|
|
||||||
fn create_featured_card(entry: &Entry) -> Markup {
|
|
||||||
let title = entry
|
|
||||||
.title
|
|
||||||
.as_ref()
|
|
||||||
.map_or_else(|| "".to_string(), |t| t.content.clone());
|
|
||||||
|
|
||||||
let link = entry.links.first().unwrap();
|
|
||||||
let lang = link.clone().href_lang.unwrap_or("en".to_string());
|
|
||||||
|
|
||||||
if lang != "en" {
|
|
||||||
log::warn!("Non english! {} {}", lang, link.href);
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut image_url = entry
|
|
||||||
.media
|
|
||||||
.first()
|
|
||||||
.and_then(|m| m.content.first())
|
|
||||||
.and_then(|c| c.url.as_ref().map(|u| u.to_string()))
|
|
||||||
.unwrap_or_default();
|
|
||||||
|
|
||||||
// Fallback to fetching social image if direct extraction didn't work
|
|
||||||
if image_url.is_empty() {
|
|
||||||
log::info!(
|
|
||||||
"Falling back to searching for a social image for {}",
|
|
||||||
link.href
|
|
||||||
);
|
|
||||||
image_url = web_fetchers::fetch_social_image(link.href.as_str()).unwrap_or_default();
|
|
||||||
}
|
|
||||||
|
|
||||||
let description = entry.content.as_ref().map_or_else(
|
|
||||||
|| {
|
|
||||||
entry
|
|
||||||
.summary
|
|
||||||
.as_ref()
|
|
||||||
.map_or_else(|| "".to_string(), |summary| summary.content.clone())
|
|
||||||
},
|
|
||||||
|content| {
|
|
||||||
content
|
|
||||||
.body
|
|
||||||
.as_ref()
|
|
||||||
.map_or_else(|| "".to_string(), |body| body.clone())
|
|
||||||
},
|
|
||||||
);
|
|
||||||
|
|
||||||
let cleaned_description = utilities::strip_html_tags(&description);
|
|
||||||
let truncated_description = utilities::truncate_description(&cleaned_description, 500);
|
|
||||||
|
|
||||||
let main_url = utilities::get_root_url(link.href.as_str());
|
|
||||||
|
|
||||||
|
fn create_featured_card(entry: &utilities::Post) -> Markup {
|
||||||
html! {
|
html! {
|
||||||
article class="featured" {
|
article class="featured" {
|
||||||
header {
|
header {
|
||||||
img src=(image_url) alt="Entry image";
|
@if entry.image_url.is_some() {
|
||||||
|
img src=(entry.image_url.as_ref().unwrap()) alt="Featured image";
|
||||||
|
}
|
||||||
hgroup {
|
hgroup {
|
||||||
h2 { (title) }
|
h2 { (entry.title) }
|
||||||
a href=(format!("http://{}", main_url)) { (main_url) }
|
a href=(format!("http://{}", entry.main_url)) { (entry.main_url) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
body {
|
body {
|
||||||
p { (truncated_description) }
|
p { (entry.truncated_description) }
|
||||||
}
|
}
|
||||||
footer {
|
footer {
|
||||||
a class="grid" href=(link.href) style="--pico-text-decoration: none;" {
|
a class="grid" href=(entry.link) style="--pico-text-decoration: none;" {
|
||||||
button class="outline primary" { "Read Featured Post" }
|
button class="outline primary" { "Read Featured Post" }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -82,75 +34,24 @@ fn create_featured_card(entry: &Entry) -> Markup {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn create_post_card(entry: &Entry) -> Markup {
|
fn create_post_card(entry: &utilities::Post) -> Markup {
|
||||||
let title = entry
|
|
||||||
.title
|
|
||||||
.as_ref()
|
|
||||||
.map_or_else(|| "".to_string(), |t| t.content.clone());
|
|
||||||
|
|
||||||
let link = entry.links.first().unwrap();
|
|
||||||
let lang = link.clone().href_lang.unwrap_or("en".to_string());
|
|
||||||
|
|
||||||
if lang != "en" {
|
|
||||||
log::warn!("Non english! {} {}", lang, link.href);
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut image_url = entry
|
|
||||||
.media
|
|
||||||
.first()
|
|
||||||
.and_then(|m| m.content.first())
|
|
||||||
.and_then(|c| c.url.as_ref().map(|u| u.to_string()))
|
|
||||||
.unwrap_or_default();
|
|
||||||
|
|
||||||
// Fallback to fetching social image if direct extraction didn't work
|
|
||||||
if image_url.is_empty() {
|
|
||||||
log::info!(
|
|
||||||
"Falling back to searching for a social image for {}",
|
|
||||||
link.href
|
|
||||||
);
|
|
||||||
image_url = web_fetchers::fetch_social_image(link.href.as_str()).unwrap_or_default();
|
|
||||||
}
|
|
||||||
if image_url.is_empty() {
|
|
||||||
log::warn!("No image could be gathered for {}", link.href);
|
|
||||||
}
|
|
||||||
|
|
||||||
let description = entry.content.as_ref().map_or_else(
|
|
||||||
|| {
|
|
||||||
entry
|
|
||||||
.summary
|
|
||||||
.as_ref()
|
|
||||||
.map_or_else(|| "".to_string(), |summary| summary.content.clone())
|
|
||||||
},
|
|
||||||
|content| {
|
|
||||||
content
|
|
||||||
.body
|
|
||||||
.as_ref()
|
|
||||||
.map_or_else(|| "".to_string(), |body| body.clone())
|
|
||||||
},
|
|
||||||
);
|
|
||||||
|
|
||||||
let cleaned_description = utilities::strip_html_tags(&description);
|
|
||||||
let truncated_description = utilities::truncate_description(&cleaned_description, 500);
|
|
||||||
|
|
||||||
let main_url = utilities::get_root_url(link.href.as_str());
|
|
||||||
|
|
||||||
html! {
|
html! {
|
||||||
article {
|
article {
|
||||||
header {
|
header {
|
||||||
hgroup {
|
hgroup {
|
||||||
h2 { (title) }
|
h2 { (entry.title) }
|
||||||
a href=(format!("http://{}", main_url)) { (main_url) }
|
a href=(format!("http://{}", entry.main_url)) { (entry.main_url) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
body {
|
body {
|
||||||
@if !image_url.is_empty() {
|
@if entry.image_url.is_some() {
|
||||||
img src=(image_url) alt="Entry image";
|
img src=(entry.image_url.as_ref().unwrap()) alt="Entry image";
|
||||||
p;
|
p;
|
||||||
}
|
}
|
||||||
p { (truncated_description) }
|
p { (entry.truncated_description) }
|
||||||
}
|
}
|
||||||
footer {
|
footer {
|
||||||
a class="grid" href=(link.href) style="--pico-text-decoration: none;" {
|
a class="grid" href=(entry.link) style="--pico-text-decoration: none;" {
|
||||||
button class="outline secondary" { "Read Post" }
|
button class="outline secondary" { "Read Post" }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -201,16 +102,15 @@ fn generate_header() -> Markup {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn about_modal(entries: Vec<Entry>) -> Markup {
|
fn about_modal(entries: Vec<utilities::Post>) -> Markup {
|
||||||
// Get link for each entry, which is a blog post then,
|
// Get link for each entry, which is a blog post then,
|
||||||
// convert it to a url to the main page of the blog
|
// convert it to a url to the main page of the blog
|
||||||
let mut links = entries
|
let mut links = entries
|
||||||
.iter()
|
.iter()
|
||||||
.map(|entry| entry.links.first().unwrap().href.as_str())
|
.map(|entry| entry.main_url.as_str())
|
||||||
.map(utilities::get_root_url)
|
.collect::<std::collections::HashSet<_>>()
|
||||||
.collect::<std::collections::HashSet<&str>>()
|
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.collect::<Vec<&str>>();
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
// Alphabetical to be fair to everytone :)
|
// Alphabetical to be fair to everytone :)
|
||||||
links.sort();
|
links.sort();
|
||||||
@ -234,7 +134,7 @@ fn about_modal(entries: Vec<Entry>) -> Markup {
|
|||||||
}
|
}
|
||||||
ul {
|
ul {
|
||||||
@for link in links {
|
@for link in links {
|
||||||
li {a href=("http://".to_owned() + link) {(link)}}
|
li {a href=(format!("{}{}","http://".to_owned() , link)) {(link)}}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
p {
|
p {
|
||||||
@ -246,24 +146,14 @@ fn about_modal(entries: Vec<Entry>) -> Markup {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn generate_index(mut entries: Vec<Entry>, featured: Entry) -> Markup {
|
pub fn generate_index(mut entries: Vec<utilities::Post>) -> Markup {
|
||||||
let running_in_gitlab = env::var("CI").map(|val| val == "true").unwrap_or(false);
|
let featured = entries.first().unwrap().clone();
|
||||||
|
entries.remove(0);
|
||||||
if running_in_gitlab {
|
log::info!(
|
||||||
log::info!("Building for deployment");
|
"Featured article: {}, img: {:?}",
|
||||||
entries.truncate(30);
|
featured.link,
|
||||||
} else {
|
featured.image_url
|
||||||
log::info!("Building for development");
|
);
|
||||||
entries.truncate(6);
|
|
||||||
}
|
|
||||||
|
|
||||||
let featured_card: maud::PreEscaped<String>;
|
|
||||||
if (utilities::get_entry_date(&featured)) > (chrono::Utc::now() - chrono::Duration::days(3)) {
|
|
||||||
featured_card = create_featured_card(&featured);
|
|
||||||
entries.retain(|entry| entry != &featured);
|
|
||||||
} else {
|
|
||||||
featured_card = html! {};
|
|
||||||
}
|
|
||||||
|
|
||||||
html! {
|
html! {
|
||||||
(maud::DOCTYPE)
|
(maud::DOCTYPE)
|
||||||
@ -282,7 +172,7 @@ pub fn generate_index(mut entries: Vec<Entry>, featured: Entry) -> Markup {
|
|||||||
}
|
}
|
||||||
body { main class="container" {
|
body { main class="container" {
|
||||||
{(generate_header())}
|
{(generate_header())}
|
||||||
(featured_card)
|
{(create_featured_card(&featured))}
|
||||||
div class="grid" {
|
div class="grid" {
|
||||||
@for column_entries in utilities::group_by_nth(&entries, 3) {
|
@for column_entries in utilities::group_by_nth(&entries, 3) {
|
||||||
div {
|
div {
|
||||||
|
44
src/main.rs
44
src/main.rs
@ -7,24 +7,50 @@ use std::error::Error;
|
|||||||
use std::fs::write;
|
use std::fs::write;
|
||||||
use std::fs::DirBuilder;
|
use std::fs::DirBuilder;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use utilities::read_feed;
|
|
||||||
|
|
||||||
mod index_generator;
|
mod index_generator;
|
||||||
mod utilities;
|
mod utilities;
|
||||||
mod web_fetchers;
|
mod web_fetchers;
|
||||||
|
use rayon::prelude::*;
|
||||||
|
|
||||||
fn main() -> Result<(), Box<dyn Error>> {
|
fn main() -> Result<(), Box<dyn Error>> {
|
||||||
simple_logger::init_with_level(log::Level::Info).unwrap();
|
simple_logger::init_with_level(log::Level::Info).unwrap();
|
||||||
|
|
||||||
let featured = read_feed("featured_feeds.txt").first().unwrap().clone();
|
let mut featured = utilities::read_feed("featured_feeds.txt");
|
||||||
let entries = read_feed("feeds.txt");
|
|
||||||
|
|
||||||
log::info!(
|
featured = featured
|
||||||
"Featured article: {}",
|
.iter_mut()
|
||||||
entries[0].links.first().unwrap().href.as_str()
|
.map(|post| {
|
||||||
);
|
post.score += 60 * 24 * 2;
|
||||||
|
post.clone()
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
let index = index_generator::generate_index(entries, featured);
|
let mut entries = utilities::read_feed("feeds.txt");
|
||||||
|
|
||||||
|
entries.extend(featured);
|
||||||
|
|
||||||
|
entries.retain(|entry| entry.score.is_positive());
|
||||||
|
|
||||||
|
entries.par_iter_mut().for_each(|entry| {
|
||||||
|
if entry.image_url.is_some() {
|
||||||
|
entry.score += 300;
|
||||||
|
} else {
|
||||||
|
match web_fetchers::fetch_social_image(entry.link.clone()) {
|
||||||
|
Ok(social_image_url) => {
|
||||||
|
entry.image_url = Some(social_image_url);
|
||||||
|
}
|
||||||
|
Err(error) => {
|
||||||
|
log::info!("{}: {}", error, entry.link.clone());
|
||||||
|
entry.score += -600;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
entries.retain(|entry| entry.score.is_positive());
|
||||||
|
entries.sort();
|
||||||
|
|
||||||
|
let index = index_generator::generate_index(entries);
|
||||||
|
|
||||||
let output_path = Path::new("output/index.html");
|
let output_path = Path::new("output/index.html");
|
||||||
DirBuilder::new()
|
DirBuilder::new()
|
||||||
|
@ -1,11 +1,95 @@
|
|||||||
use crate::web_fetchers;
|
use crate::web_fetchers;
|
||||||
|
use chrono::{DateTime, Duration, Utc};
|
||||||
use feed_rs::model::Entry;
|
use feed_rs::model::Entry;
|
||||||
use rayon::prelude::*;
|
use rayon::prelude::*;
|
||||||
use scraper::{Html, Selector};
|
use scraper::{Html, Selector};
|
||||||
use std::cmp::Reverse;
|
|
||||||
|
use std::cmp::Ordering;
|
||||||
use std::fs;
|
use std::fs;
|
||||||
|
|
||||||
pub fn read_feed(path: &str) -> Vec<Entry> {
|
#[derive(Clone, PartialEq, Eq)]
|
||||||
|
pub struct Post {
|
||||||
|
pub title: String,
|
||||||
|
pub link: String,
|
||||||
|
pub date: DateTime<Utc>,
|
||||||
|
pub lang: String,
|
||||||
|
pub image_url: Option<String>,
|
||||||
|
pub truncated_description: String,
|
||||||
|
pub main_url: String,
|
||||||
|
pub score: i64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Ord for Post {
|
||||||
|
fn cmp(&self, other: &Self) -> Ordering {
|
||||||
|
self.score.partial_cmp(&other.score).unwrap().reverse()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialOrd for Post {
|
||||||
|
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||||
|
Some(self.cmp(other))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Post {
|
||||||
|
fn from_entry(entry: &feed_rs::model::Entry) -> Self {
|
||||||
|
let title = entry
|
||||||
|
.title
|
||||||
|
.as_ref()
|
||||||
|
.map_or_else(|| "".to_string(), |t| t.content.clone());
|
||||||
|
|
||||||
|
let link = entry.links.first().unwrap();
|
||||||
|
|
||||||
|
let date = get_entry_date(entry);
|
||||||
|
|
||||||
|
let lang = link.clone().href_lang.unwrap_or("en".to_string());
|
||||||
|
|
||||||
|
if lang != "en" {
|
||||||
|
log::warn!("Non english! {} {}", lang, link.href);
|
||||||
|
}
|
||||||
|
|
||||||
|
let image_url = entry
|
||||||
|
.media
|
||||||
|
.first()
|
||||||
|
.and_then(|m| m.content.first())
|
||||||
|
.and_then(|c| c.url.as_ref().map(|u| u.to_string()));
|
||||||
|
|
||||||
|
let description = entry.content.as_ref().map_or_else(
|
||||||
|
|| {
|
||||||
|
entry
|
||||||
|
.summary
|
||||||
|
.as_ref()
|
||||||
|
.map_or_else(|| "".to_string(), |summary| summary.content.clone())
|
||||||
|
},
|
||||||
|
|content| {
|
||||||
|
content
|
||||||
|
.body
|
||||||
|
.as_ref()
|
||||||
|
.map_or_else(|| "".to_string(), |body| body.clone())
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
let cleaned_description = strip_html_tags(&description);
|
||||||
|
let truncated_description = truncate_description(&cleaned_description, 500);
|
||||||
|
|
||||||
|
let main_url = get_root_url(link.href.as_str());
|
||||||
|
|
||||||
|
let score = (date - (Utc::now() - Duration::days(14))).num_minutes();
|
||||||
|
|
||||||
|
Post {
|
||||||
|
title,
|
||||||
|
link: link.href.clone(),
|
||||||
|
date,
|
||||||
|
lang,
|
||||||
|
image_url,
|
||||||
|
truncated_description,
|
||||||
|
main_url,
|
||||||
|
score,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn read_feed(path: &str) -> Vec<Post> {
|
||||||
let binding = fs::read_to_string(path).unwrap();
|
let binding = fs::read_to_string(path).unwrap();
|
||||||
let feed_urls: Vec<&str> = binding.lines().collect();
|
let feed_urls: Vec<&str> = binding.lines().collect();
|
||||||
|
|
||||||
@ -27,9 +111,12 @@ pub fn read_feed(path: &str) -> Vec<Entry> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
entries.retain(validate_entry_date);
|
entries.retain(validate_entry_date);
|
||||||
entries.sort_by_key(|entry| Reverse(get_entry_date(entry)));
|
|
||||||
|
|
||||||
entries
|
entries
|
||||||
|
.par_iter()
|
||||||
|
.map(Post::from_entry)
|
||||||
|
.filter(|entry| entry.date < chrono::Utc::now())
|
||||||
|
.collect::<Vec<_>>()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn validate_entry_date(entry: &Entry) -> bool {
|
fn validate_entry_date(entry: &Entry) -> bool {
|
||||||
@ -42,17 +129,17 @@ fn validate_entry_date(entry: &Entry) -> bool {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_entry_date(entry: &Entry) -> chrono::DateTime<chrono::Utc> {
|
pub fn get_entry_date(entry: &Entry) -> DateTime<Utc> {
|
||||||
entry.published.unwrap_or(entry.updated.unwrap_or_default())
|
entry.published.unwrap_or(entry.updated.unwrap_or_default())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_root_url(input_url: &str) -> &str {
|
pub fn get_root_url(input_url: &str) -> String {
|
||||||
let mut url = input_url;
|
let mut url = input_url;
|
||||||
|
|
||||||
url = url.strip_prefix("https://").unwrap_or(url);
|
url = url.strip_prefix("https://").unwrap_or(url);
|
||||||
url = url.strip_prefix("http://").unwrap_or(url);
|
url = url.strip_prefix("http://").unwrap_or(url);
|
||||||
|
|
||||||
url.split_once('/').unwrap().0
|
url.split_once('/').unwrap().0.to_string()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn truncate_description(description: &str, max_length: usize) -> String {
|
pub fn truncate_description(description: &str, max_length: usize) -> String {
|
||||||
|
@ -17,7 +17,7 @@ pub fn fetch_feed(url: &str) -> Result<Vec<Entry>, Box<dyn Error>> {
|
|||||||
Ok(feed.entries)
|
Ok(feed.entries)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn fetch_social_image(url: &str) -> Result<String, Box<dyn std::error::Error>> {
|
pub fn fetch_social_image(url: String) -> Result<String, Box<dyn std::error::Error>> {
|
||||||
let html = reqwest::blocking::get(url)?.text()?;
|
let html = reqwest::blocking::get(url)?.text()?;
|
||||||
let document = Html::parse_document(&html);
|
let document = Html::parse_document(&html);
|
||||||
let selector = Selector::parse("meta[property=\"og:image\"]").unwrap();
|
let selector = Selector::parse("meta[property=\"og:image\"]").unwrap();
|
||||||
@ -25,8 +25,11 @@ pub fn fetch_social_image(url: &str) -> Result<String, Box<dyn std::error::Error
|
|||||||
let image_url = document
|
let image_url = document
|
||||||
.select(&selector)
|
.select(&selector)
|
||||||
.next()
|
.next()
|
||||||
.and_then(|element| element.value().attr("content"))
|
.and_then(|element| element.value().attr("content"));
|
||||||
.unwrap_or("");
|
|
||||||
|
|
||||||
Ok(image_url.to_string())
|
if let Some(url) = image_url {
|
||||||
|
Ok(url.to_string())
|
||||||
|
} else {
|
||||||
|
Err("No social image found".into())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,39 +1,15 @@
|
|||||||
use feed_rs::parser;
|
use std::collections::HashSet;
|
||||||
use reqwest::blocking::get;
|
|
||||||
use std::fs;
|
use std::fs;
|
||||||
|
|
||||||
// Function to read URLs from a file
|
// Function to read URLs from a file
|
||||||
fn read_feed() -> Vec<String> {
|
fn read_feed(path: &str) -> Vec<String> {
|
||||||
let binding = fs::read_to_string("feeds.txt").unwrap();
|
let binding = fs::read_to_string(path).unwrap();
|
||||||
binding.lines().map(|s| s.to_owned()).collect()
|
binding.lines().map(|s| s.to_owned()).collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
// Function to fetch and parse a feed, returning true if successful
|
|
||||||
fn fetch_and_parse_feed(url: &str) -> bool {
|
|
||||||
let content = match get(url) {
|
|
||||||
Ok(response) => response.text().unwrap_or_default(),
|
|
||||||
Err(_) => return false,
|
|
||||||
};
|
|
||||||
|
|
||||||
parser::parse(content.as_bytes()).is_ok()
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_that_urls_point_to_valid_feeds() {
|
|
||||||
let urls = read_feed();
|
|
||||||
|
|
||||||
for url in urls {
|
|
||||||
assert!(
|
|
||||||
fetch_and_parse_feed(&url),
|
|
||||||
"Feed at URL failed validation: {}",
|
|
||||||
url
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_if_feeds_are_in_alphabetical_order() {
|
fn test_if_feeds_are_in_alphabetical_order() {
|
||||||
let mut urls = read_feed();
|
let mut urls = read_feed("feeds.txt");
|
||||||
|
|
||||||
if !urls.windows(2).all(|w| w[0] < w[1]) {
|
if !urls.windows(2).all(|w| w[0] < w[1]) {
|
||||||
println!("Sorted feeds.txt:");
|
println!("Sorted feeds.txt:");
|
||||||
@ -46,3 +22,10 @@ fn test_if_feeds_are_in_alphabetical_order() {
|
|||||||
panic!("feeds.txt was not sorted!")
|
panic!("feeds.txt was not sorted!")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_if_feeds_lists_have_overlapping_feed() {
|
||||||
|
let set1: HashSet<_> = read_feed("feeds.txt").into_iter().collect();
|
||||||
|
let set2: HashSet<_> = read_feed("featured_feeds.txt").into_iter().collect();
|
||||||
|
assert!(set1.is_disjoint(&set2));
|
||||||
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user