Merge branch 'featured-post' into 'master'

Featured post See merge request MisterBiggs/zine!5
2025-09-19 04:02:37 +00:00 · 2024-03-01 06:45:39 +00:00
parent b1d86f634e a4231b6852
commit 8b30cc58b9
7 changed files with 448 additions and 320 deletions
--- a/featured_feeds.txt
+++ b/featured_feeds.txt
@@ -0,0 +1,2 @@
+https://ciechanow.ski/atom.xml
+https://factorio.com/blog/rss
--- a/feeds.txt
+++ b/feeds.txt
@@ -1,11 +1,13 @@
 https://0xd34df00d.me/rss.xml
 https://adamsilver.io/atom.xml
 https://alexturek.com/feed
+https://api.quantamagazine.org/feed/
 https://austinhenley.com/blog/feed.rss
 https://bcantrill.dtrace.org/feed/
 https://blog.andymatuschak.org/rss
 https://blog.benjojo.co.uk/rss.xml
 https://blog.codinghorror.com/rss/
+https://blog.frost.kiwi/feed.xml
 https://calebporzio.com/feed
 https://chrisnicholas.dev/rss.xml
 https://ciechanow.ski/atom.xml
@@ -14,10 +16,14 @@ https://decomposition.al/atom.xml
 https://fabiensanglard.net/rss.xml
 https://factorio.com/blog/rss
 https://fasterthanli.me/index.xml
+https://gaultier.github.io/blog/feed.xml
+https://granary.io/url?input=html&output=rss&url=https://jamesg.blog
+https://jatan.space/rss/
 https://jvns.ca/atom.xml
 https://kinduff.com/feed.xml
 https://magnuschatt.medium.com/feed
 https://mary.codes/rss.xml
+https://matklad.github.io/feed.xml
 https://mtlynch.io/posts/index.xml
 https://newsletter.danhon.com/rss
 https://notes.ansonbiggs.com/rss/
@@ -26,8 +32,14 @@ https://orbitalindex.com/feed.xml
 https://projects.ansonbiggs.com/index.xml
 https://simonwillison.net/atom/entries/
 https://stephango.com/feed.xml
+https://steveklabnik.com/feed.xml
 https://taylor.town/feed.xml
+https://unzip.dev/rss/
+https://vickiboykis.com/index.xml
 https://www.construction-physics.com/feed
 https://www.elidedbranches.com/feeds/posts/default
 https://www.jeffgeerling.com/blog.xml
-https://xeiaso.net/blog.rss
+https://www.makerstations.io/rss/
+https://www.micahlerner.com/feed.xml
+https://www.troyhunt.com/rss/
+https://xeiaso.net/blog.rss
--- a/output/style.css
+++ b/output/style.css
@@ -0,0 +1,6 @@
+.featured-title {
+    display: flex;
+    flex-direction: column;
+    justify-content: center;
+    align-items: center;
+}
--- a/src/index_generator.rs
+++ b/src/index_generator.rs
@@ -0,0 +1,291 @@
+extern crate chrono;
+extern crate feed_rs;
+extern crate maud;
+extern crate reqwest;
+
+use chrono::DateTime;
+use chrono::Utc;
+use feed_rs::model::Entry;
+use maud::{html, Markup};
+use std::env;
+
+use crate::utilities;
+use crate::web_fetchers;
+
+fn create_featured_card(entry: &Entry) -> Markup {
+    let title = entry
+        .title
+        .as_ref()
+        .map_or_else(|| "".to_string(), |t| t.content.clone());
+
+    let link = entry.links.first().unwrap();
+    let lang = link.clone().href_lang.unwrap_or("en".to_string());
+
+    if lang != "en" {
+        println!("Non english! {} {}", lang, link.href);
+    }
+
+    let mut image_url = entry
+        .media
+        .first()
+        .and_then(|m| m.content.first())
+        .and_then(|c| c.url.as_ref().map(|u| u.to_string()))
+        .unwrap_or_default();
+
+    // Fallback to fetching social image if direct extraction didn't work
+    if image_url.is_empty() {
+        println!(
+            "Falling back to searching for a social image for {}",
+            link.href
+        );
+        image_url = web_fetchers::fetch_social_image(link.href.as_str()).unwrap_or_default();
+    }
+
+    let description = entry.content.as_ref().map_or_else(
+        || {
+            entry
+                .summary
+                .as_ref()
+                .map_or_else(|| "".to_string(), |summary| summary.content.clone())
+        },
+        |content| {
+            content
+                .body
+                .as_ref()
+                .map_or_else(|| "".to_string(), |body| body.clone())
+        },
+    );
+
+    let cleaned_description = utilities::strip_html_tags(&description);
+    let truncated_description = utilities::truncate_description(&cleaned_description, 500);
+
+    let main_url = utilities::get_root_url(link.href.as_str());
+    dbg!(main_url);
+
+    html! {
+        article {
+            header class="grid"  {
+                img src=(image_url) alt="Entry image";
+                hgroup class="featured-title" {
+                    h2 { (title) }
+                    a href=(format!("http://{}", main_url)) { (main_url) }
+                }
+            }
+            body {
+                p { (truncated_description) }
+            }
+            footer {
+                a class="grid" href=(link.href) style="--pico-text-decoration: none;" {
+                    button class="outline primary" { "Read Post" }
+                }
+            }
+        }
+    }
+}
+
+fn create_post_card(entry: &Entry) -> Markup {
+    let title = entry
+        .title
+        .as_ref()
+        .map_or_else(|| "".to_string(), |t| t.content.clone());
+
+    let link = entry.links.first().unwrap();
+    let lang = link.clone().href_lang.unwrap_or("en".to_string());
+
+    if lang != "en" {
+        println!("Non english! {} {}", lang, link.href);
+    }
+
+    let mut image_url = entry
+        .media
+        .first()
+        .and_then(|m| m.content.first())
+        .and_then(|c| c.url.as_ref().map(|u| u.to_string()))
+        .unwrap_or_default();
+
+    // Fallback to fetching social image if direct extraction didn't work
+    if image_url.is_empty() {
+        println!(
+            "Falling back to searching for a social image for {}",
+            link.href
+        );
+        image_url = web_fetchers::fetch_social_image(link.href.as_str()).unwrap_or_default();
+    }
+
+    let description = entry.content.as_ref().map_or_else(
+        || {
+            entry
+                .summary
+                .as_ref()
+                .map_or_else(|| "".to_string(), |summary| summary.content.clone())
+        },
+        |content| {
+            content
+                .body
+                .as_ref()
+                .map_or_else(|| "".to_string(), |body| body.clone())
+        },
+    );
+
+    let cleaned_description = utilities::strip_html_tags(&description);
+    let truncated_description = utilities::truncate_description(&cleaned_description, 500);
+
+    let main_url = utilities::get_root_url(link.href.as_str());
+
+    html! {
+        article {
+            header {
+                hgroup {
+                    h2 { (title) }
+                    a href=(format!("http://{}", main_url)) { (main_url) }
+                }
+            }
+            body {
+                @if !image_url.is_empty() {
+                    img src=(image_url) alt="Entry image";
+                    p;
+                }
+                p { (truncated_description) }
+            }
+            footer {
+                a class="grid" href=(link.href) style="--pico-text-decoration: none;" {
+                    button class="outline secondary" { "Read Post" }
+                }
+            }
+        }
+    }
+}
+
+fn generate_footer() -> Markup {
+    let utc: DateTime<Utc> = Utc::now();
+    let formatted_utc = utc.format("%Y-%m-%d %H:%M:%S").to_string();
+
+    html! {
+        footer class="container" {
+            small {
+                p {
+                    a href="https://ansonbiggs.com" { "Anson Biggs" }
+                    " - "
+                    a href="https://gitlab.com/MisterBiggs/zine" { "Source Code" }
+                    " - "
+                    "Page generated at: " em data-tooltip="8AM Mountain Time" { (formatted_utc) " UTC" }
+                }
+            }
+        }
+    }
+}
+fn generate_header() -> Markup {
+    html! {
+        header {
+            nav {
+                ul {
+                    li { h1 { "Anson's Aggregated Feed" }}
+                }
+                ul {
+                    li { button data-target="about" onclick="toggleModal(event)" { "About" } }
+                    li {
+                        details class="dropdown" {
+                            summary role="button" class="outline secondary" { "Theme" }
+                            ul {
+                                li { a href="#" data-theme-switcher="auto" { "Auto" }}
+                                li { a href="#" data-theme-switcher="light" { "Light" }}
+                                li { a href="#" data-theme-switcher="dark" { "Dark" }}
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+fn about_modal(entries: Vec<Entry>) -> Markup {
+    // Get link for each entry, which is a blog post then,
+    // convert it to a url to the main page of the blog
+    let mut links = entries
+        .iter()
+        .map(|entry| entry.links.first().unwrap().href.as_str())
+        .map(utilities::get_root_url)
+        .collect::<std::collections::HashSet<&str>>()
+        .into_iter()
+        .collect::<Vec<&str>>();
+
+    // Alphabetical to be fair to everytone :)
+    links.sort();
+
+    html! {
+        dialog id="about" {
+            article {
+                header {
+                    a href="#" aria-label="Close" rel="prev" {}
+                    p { strong { "About" }}
+                }
+                p {
+                    "When looking for a RSS reader I came across "
+                    a href="https://news.russellsaw.io/" {"news.russellsaw.io"}
+                    " I thought the idea of building my own personalised newspaper was cool. \
+                    So, I decided to build a clone using my own subscribed RSS feeds."
+                }
+                p {
+                    "This page updates daily at 8:11ish AM Mountain Time. The following blogs are"
+                    " featured on the page currently:"
+                }
+                ul {
+                    @for link in links {
+                        li {a href=(link) {(link)}}
+                    }
+                }
+                p {
+                    "For the full list of feeds that are followed see the raw list "
+                    a href="https://gitlab.com/MisterBiggs/zine/-/blob/master/feeds.txt" { "here." }
+                }
+            }
+        }
+    }
+}
+
+pub fn generate_index(mut entries: Vec<Entry>) -> Markup {
+    let running_in_gitlab = env::var("CI").map(|val| val == "true").unwrap_or(false);
+
+    let featured = entries.remove(0);
+    if running_in_gitlab {
+        println!("Building for deployment");
+        entries.truncate(30);
+    } else {
+        println!("Building for development");
+        entries.truncate(6);
+    }
+
+    html! {
+        (maud::DOCTYPE)
+        html {
+            head {
+                title { "Anson's Zine" }
+                meta charset="utf-8";
+                meta name="viewport" content="width=device-width, initial-scale=1";
+                meta name="description" content="Aggregate newspaper of RSS feeds for Anson"
+                link rel="icon" href="data:image/svg+xml,<svg xmlns=%22http://www.w3.org/2000/svg%22 viewBox=%220 0 100 100%22><text y=%22.9em%22 font-size=%2290%22>📰</text></svg>";
+                link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@picocss/pico@2/css/pico.blue.min.css";
+                link rel="stylesheet" href="style.css";
+            }
+            body { main class="container" {
+                {(generate_header())}
+                (create_featured_card(&featured))
+                div class="grid" {
+                    @for column_entries in utilities::group_by_nth(&entries, 3) {
+                        div {
+                            @for entry in column_entries {
+                                {(create_post_card(&entry))}
+                            }
+                        }
+
+                    }
+                }
+                {(generate_footer())}
+                {(about_modal(entries))}
+                script src="modal.js" {}
+                script src="minimal-theme-switcher.js" {}
+            }}
+        }
+    }
+}
--- a/src/main.rs
+++ b/src/main.rs
@@ -3,333 +3,29 @@ extern crate feed_rs;
 extern crate maud;
 extern crate reqwest;

-use chrono::DateTime;
-use chrono::Utc;
-use feed_rs::model::Entry;
-use feed_rs::parser;
-use maud::{html, Markup};
-use reqwest::blocking::get;
-use scraper::{Html, Selector};
-use std::cmp::Reverse;
-use std::env;
 use std::error::Error;
-use std::fs;
 use std::fs::write;
 use std::fs::DirBuilder;
 use std::path::Path;
+use utilities::read_feed;

-use rayon::prelude::*;
-
-fn fetch_feed(url: &str) -> Result<Vec<Entry>, Box<dyn Error>> {
-    let content = get(url)?.text()?;
-    let feed = parser::parse(content.as_bytes())?;
-    println!("\tFeed {} returned {} items", url, feed.entries.len());
-    Ok(feed.entries)
-}
-
-fn fetch_social_image(url: &str) -> Result<String, Box<dyn std::error::Error>> {
-    let html = reqwest::blocking::get(url)?.text()?;
-    let document = Html::parse_document(&html);
-    let selector = Selector::parse("meta[property=\"og:image\"]").unwrap();
-
-    let image_url = document
-        .select(&selector)
-        .next()
-        .and_then(|element| element.value().attr("content"))
-        .unwrap_or("");
-
-    Ok(image_url.to_string())
-}
-fn create_html_card(entry: &Entry) -> Markup {
-    let title = entry
-        .title
-        .as_ref()
-        .map_or_else(|| "".to_string(), |t| t.content.clone());
-
-    let link = entry.links.first().unwrap();
-    let lang = link.clone().href_lang.unwrap_or("en".to_string());
-
-    if lang != "en" {
-        println!("Non english! {} {}", lang, link.href);
-    }
-
-    let mut image_url = entry
-        .media
-        .first()
-        .and_then(|m| m.content.first())
-        .and_then(|c| c.url.as_ref().map(|u| u.to_string()))
-        .unwrap_or_default();
-
-    // Fallback to fetching social image if direct extraction didn't work
-    if image_url.is_empty() {
-        println!(
-            "Falling back to searching for a social image for {}",
-            link.href
-        );
-        image_url = fetch_social_image(link.href.as_str()).unwrap_or_default();
-    }
-
-    let description = entry.content.as_ref().map_or_else(
-        || {
-            entry
-                .summary
-                .as_ref()
-                .map_or_else(|| "".to_string(), |summary| summary.content.clone())
-        },
-        |content| {
-            content
-                .body
-                .as_ref()
-                .map_or_else(|| "".to_string(), |body| body.clone())
-        },
-    );
-
-    let cleaned_description = strip_html_tags(&description);
-    let truncated_description = truncate_description(&cleaned_description, 500);
-
-    let main_url = get_root_url(link.href.as_str());
-
-    html! {
-        article {
-            header {
-                hgroup {
-                    h2 { (title) }
-                    a href=(format!("http://{}", main_url)) { (main_url) }
-                }
-            }
-            body {
-                @if !image_url.is_empty() {
-                    img src=(image_url) alt="Entry image";
-                    p;
-                }
-                p { (truncated_description) }
-            }
-            footer {
-                a class="grid" href=(link.href) style="--pico-text-decoration: none;" {
-                    button class="outline secondary" { "Read Post" }
-                }
-            }
-        }
-    }
-}
-
-fn get_root_url(input_url: &str) -> &str {
-    let mut url = input_url;
-
-    url = url.strip_prefix("https://").unwrap_or(url);
-    url = url.strip_prefix("http://").unwrap_or(url);
-
-    url.split_once('/').unwrap().0
-}
-
-fn truncate_description(description: &str, max_length: usize) -> String {
-    let description_trimmed = description.trim();
-    if description_trimmed.len() > max_length {
-        let mut char_boundary = max_length;
-        for (idx, _) in description_trimmed.char_indices() {
-            if idx > max_length {
-                break;
-            }
-            char_boundary = idx;
-        }
-        format!("{}...", &description_trimmed[..char_boundary])
-    } else {
-        description_trimmed.to_string()
-    }
-}
-
-fn strip_html_tags(html: &str) -> String {
-    let document = Html::parse_document(html);
-    // Use the wildcard selector to select all nodes and extract their text.
-    let selector = Selector::parse("*").unwrap();
-    let mut text_content = String::new();
-
-    for element in document.select(&selector) {
-        let text = element.text().collect::<Vec<_>>().join(" ");
-        text_content.push_str(&text);
-        text_content.push(' ');
-    }
-
-    text_content.trim().to_string()
-}
-
-fn generate_header() -> Markup {
-    html! {
-        header {
-            nav {
-                ul {
-                    li { h1 { "Anson's Aggregated Feed" }}
-                }
-                ul {
-                    li { button data-target="about" onclick="toggleModal(event)" { "About" } }
-                    li {
-                        details class="dropdown" {
-                            summary role="button" class="outline secondary" { "Theme" }
-                            ul {
-                                li { a href="#" data-theme-switcher="auto" { "Auto" }}
-                                li { a href="#" data-theme-switcher="light" { "Light" }}
-                                li { a href="#" data-theme-switcher="dark" { "Dark" }}
-                            }
-                        }
-                    }
-                }
-            }
-        }
-    }
-}
-
-fn about_modal(entries: Vec<Entry>) -> Markup {
-    // Get link for each entry, which is a blog post then,
-    // convert it to a url to the main page of the blog
-    let mut links = entries
-        .iter()
-        .map(|entry| entry.links.first().unwrap().href.as_str())
-        .map(get_root_url)
-        .collect::<std::collections::HashSet<&str>>()
-        .into_iter()
-        .collect::<Vec<&str>>();
-
-    // Alphabetical to be fair to everytone :)
-    links.sort();
-
-    html! {
-        dialog id="about" {
-            article {
-                header {
-                    a href="#" aria-label="Close" rel="prev" {}
-                    p { strong { "About" }}
-                }
-                p {
-                    "When looking for a RSS reader I came across "
-                    a href="https://news.russellsaw.io/" {"news.russellsaw.io"}
-                    " I thought the idea of building my own personalised newspaper was cool. \
-                    So, I decided to build a clone using my own subscribed RSS feeds."
-                }
-                p {
-                    "This page updates daily at 8:11ish AM Mountain Time. The following blogs are"
-                    " featured on the page currently:"
-                }
-                ul {
-                    @for link in links {
-                        li {a href=(link) {(link)}}
-                    }
-                }
-                p {
-                    "For the full list of feeds that are followed see the raw list "
-                    a href="https://gitlab.com/MisterBiggs/zine/-/blob/master/feeds.txt" { "here." }
-                }
-            }
-        }
-    }
-}
-
-fn generate_footer() -> Markup {
-    let utc: DateTime<Utc> = Utc::now();
-    let formatted_utc = utc.format("%Y-%m-%d %H:%M:%S").to_string();
-
-    html! {
-        footer class="container" {
-            small {
-                p {
-                    a href="https://ansonbiggs.com" { "Anson Biggs" }
-                    " - "
-                    a href="https://gitlab.com/MisterBiggs/zine" { "Source Code" }
-                    " - "
-                    "Page generated at: " em data-tooltip="8AM Mountain Time" { (formatted_utc) " UTC" }
-                }
-            }
-        }
-    }
-}
-
-fn group_by_nth<T: Clone>(slice: &[T], n: usize) -> Vec<Vec<T>> {
-    (0..n)
-        .map(|i| {
-            slice
-                .iter()
-                .enumerate()
-                .filter_map(|(index, value)| {
-                    if index % n == i {
-                        Some(value.clone())
-                    } else {
-                        None
-                    }
-                })
-                .collect()
-        })
-        .collect()
-}
-
-fn generate_index(mut entries: Vec<Entry>) -> Markup {
-    let running_in_gitlab = env::var("CI").map(|val| val == "true").unwrap_or(false);
-
-    if running_in_gitlab {
-        println!("Building for deployment");
-        entries.truncate(30);
-    } else {
-        println!("Building for development");
-        entries.truncate(10);
-    }
-
-    html! {
-        (maud::DOCTYPE)
-        html {
-            head {
-                title { "Anson's Zine" }
-                meta charset="utf-8";
-                meta name="viewport" content="width=device-width, initial-scale=1";
-                meta name="description" content="Aggregate newspaper of RSS feeds for Anson"
-                link rel="icon" href="data:image/svg+xml,<svg xmlns=%22http://www.w3.org/2000/svg%22 viewBox=%220 0 100 100%22><text y=%22.9em%22 font-size=%2290%22>📰</text></svg>";
-                link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@picocss/pico@2/css/pico.blue.min.css";
-                link rel="stylesheet" href="style.css";
-            }
-            body { main class="container" {
-                {(generate_header())}
-                div class="grid" {
-                    @for column_entries in group_by_nth(&entries, 3) {
-                        div {
-                            @for entry in column_entries {
-                                {(create_html_card(&entry))}
-                            }
-                        }
-
-                    }
-                }
-                {(generate_footer())}
-                {(about_modal(entries))}
-                script src="modal.js" {}
-                script src="minimal-theme-switcher.js" {}
-            }}
-        }
-    }
-}
+mod index_generator;
+mod utilities;
+mod web_fetchers;

 fn main() -> Result<(), Box<dyn Error>> {
-    let binding = fs::read_to_string("feeds.txt").unwrap();
-    let feed_urls: Vec<&str> = binding.lines().collect();
+    let featured = read_feed("featured_feeds.txt").first().unwrap().clone();
+    let mut entries = read_feed("feeds.txt");

-    println!("Fetching feeds:");
-    let raw_entries: Vec<Result<Vec<Entry>, String>> = feed_urls
-        .into_par_iter()
-        .map(|url| {
-            fetch_feed(url).map_err(|e| format!("Failed to fetch or parse feed {}: {}", url, e))
-        })
-        .collect();
+    // Move featured article to the front
+    entries.retain(|entry| entry != &featured);
+    entries.insert(0, featured);
+    println!(
+        "Featured article: {}",
+        entries[0].links.first().unwrap().href.as_str()
+    );

-    let mut entries: Vec<Entry> = Vec::new();
-    for entry in raw_entries {
-        match entry {
-            Ok(mut feed_entries) => entries.append(&mut feed_entries),
-            Err(e) => println!("{}", e),
-        }
-    }
-
-    // Remove any entries that don't have a timestamp, and then sort by timestamps
-    entries.retain(|entry| entry.published.is_some() || entry.updated.is_some());
-    entries
-        .sort_by_key(|entry| Reverse(entry.published.unwrap_or(entry.updated.unwrap_or_default())));
-
-    let html_string = generate_index(entries).into_string();
+    let index = index_generator::generate_index(entries);

    let output_path = Path::new("output/index.html");
    DirBuilder::new()
@@ -337,7 +33,7 @@ fn main() -> Result<(), Box<dyn Error>> {
        .create(output_path.parent().unwrap())
        .unwrap();

-    match write(output_path, html_string) {
+    match write(output_path, index.into_string()) {
        Ok(_) => println!("Successfully wrote to {}", output_path.display()),
        Err(e) => eprintln!("Failed to write to {}: {}", output_path.display(), e),
    }
--- a/src/utilities.rs
+++ b/src/utilities.rs
@@ -0,0 +1,93 @@
+use crate::web_fetchers;
+use feed_rs::model::Entry;
+use rayon::prelude::*;
+use scraper::{Html, Selector};
+use std::cmp::Reverse;
+use std::fs;
+
+pub fn read_feed(path: &str) -> Vec<Entry> {
+    let binding = fs::read_to_string(path).unwrap();
+    let feed_urls: Vec<&str> = binding.lines().collect();
+
+    println!("Fetching feeds:");
+    let raw_entries: Vec<Result<Vec<Entry>, String>> = feed_urls
+        .into_par_iter()
+        .map(|url| {
+            web_fetchers::fetch_feed(url)
+                .map_err(|e| format!("Failed to fetch or parse feed {}: {}", url, e))
+        })
+        .collect();
+
+    let mut entries: Vec<Entry> = Vec::new();
+    for entry in raw_entries {
+        match entry {
+            Ok(mut feed_entries) => entries.append(&mut feed_entries),
+            Err(e) => println!("{}", e),
+        }
+    }
+
+    // Remove any entries that don't have a timestamp, and then sort by timestamps
+    entries.retain(|entry| entry.published.is_some() || entry.updated.is_some());
+    entries
+        .sort_by_key(|entry| Reverse(entry.published.unwrap_or(entry.updated.unwrap_or_default())));
+
+    entries
+}
+
+pub fn get_root_url(input_url: &str) -> &str {
+    let mut url = input_url;
+
+    url = url.strip_prefix("https://").unwrap_or(url);
+    url = url.strip_prefix("http://").unwrap_or(url);
+
+    url.split_once('/').unwrap().0
+}
+
+pub fn truncate_description(description: &str, max_length: usize) -> String {
+    let description_trimmed = description.trim();
+    if description_trimmed.len() > max_length {
+        let mut char_boundary = max_length;
+        for (idx, _) in description_trimmed.char_indices() {
+            if idx > max_length {
+                break;
+            }
+            char_boundary = idx;
+        }
+        format!("{}...", &description_trimmed[..char_boundary])
+    } else {
+        description_trimmed.to_string()
+    }
+}
+
+pub fn strip_html_tags(html: &str) -> String {
+    let document = Html::parse_document(html);
+    // Use the wildcard selector to select all nodes and extract their text.
+    let selector = Selector::parse("*").unwrap();
+    let mut text_content = String::new();
+
+    for element in document.select(&selector) {
+        let text = element.text().collect::<Vec<_>>().join(" ");
+        text_content.push_str(&text);
+        text_content.push(' ');
+    }
+
+    text_content.trim().to_string()
+}
+
+pub fn group_by_nth<T: Clone>(slice: &[T], n: usize) -> Vec<Vec<T>> {
+    (0..n)
+        .map(|i| {
+            slice
+                .iter()
+                .enumerate()
+                .filter_map(|(index, value)| {
+                    if index % n == i {
+                        Some(value.clone())
+                    } else {
+                        None
+                    }
+                })
+                .collect()
+        })
+        .collect()
+}
--- a/src/web_fetchers.rs
+++ b/src/web_fetchers.rs
@@ -0,0 +1,28 @@
+use feed_rs::model::Entry;
+use feed_rs::parser;
+
+use reqwest::blocking::get;
+use scraper::{Html, Selector};
+
+use std::error::Error;
+
+pub fn fetch_feed(url: &str) -> Result<Vec<Entry>, Box<dyn Error>> {
+    let content = get(url)?.text()?;
+    let feed = parser::parse(content.as_bytes())?;
+    println!("\tFeed {} returned {} items", url, feed.entries.len());
+    Ok(feed.entries)
+}
+
+pub fn fetch_social_image(url: &str) -> Result<String, Box<dyn std::error::Error>> {
+    let html = reqwest::blocking::get(url)?.text()?;
+    let document = Html::parse_document(&html);
+    let selector = Selector::parse("meta[property=\"og:image\"]").unwrap();
+
+    let image_url = document
+        .select(&selector)
+        .next()
+        .and_then(|element| element.value().attr("content"))
+        .unwrap_or("");
+
+    Ok(image_url.to_string())
+}