1
0
mirror of https://gitlab.com/Anson-Projects/projects.git synced 2025-08-02 19:41:38 +00:00

Update Ghost uploader to include extra post info

This commit is contained in:
2024-11-24 22:16:59 -07:00
parent 4d01ec63b1
commit 63c8096a88
3 changed files with 479 additions and 62 deletions

View File

@@ -4,8 +4,10 @@ use futures::future::join_all;
use jsonwebtoken::{encode, Algorithm, EncodingKey, Header};
use maud::html;
use reqwest::Client;
use scraper::{Html, Selector};
use serde::{Deserialize, Serialize};
use std::env;
#[derive(Debug, Serialize, Deserialize)]
struct Claims {
iat: usize,
@@ -26,20 +28,23 @@ struct Post {
status: String,
published_at: String,
updated_at: String,
feature_image: String,
canonical_url: String,
tags: Vec<String>,
feature_image: Option<String>,
feature_image_alt: Option<String>,
feature_image_caption: Option<String>,
meta_description: Option<String>,
custom_excerpt: Option<String>,
}
impl Post {
fn new(
title: &str,
summary: &str,
link: &str,
mut categories: Vec<&str>,
image_url: &str,
pub_date: &str,
) -> Self {
async fn new(entry: Entry) -> Post {
let title = entry.title.as_ref().unwrap().content.clone();
let link = entry.links.first().unwrap().href.as_str();
let slug = get_slug(link);
let summary = summarize_url(link).await;
let html = html! {
p { (summary) }
iframe src=(link) style="width: 100%; height: 80vh" { }
@@ -48,45 +53,77 @@ impl Post {
" The above summary was made by the " a href=("https://help.kagi.com/kagi/api/summarizer.html")
{"Kagi Summarizer"}
}
};
}.into_string();
let slug = get_slug(link);
categories.push("Projects Website");
Post {
title: title.to_string(),
slug: slug.to_string(),
html: html.into_string(),
status: "published".to_string(),
published_at: pub_date.to_string(),
updated_at: chrono::Utc::now().to_rfc3339(),
feature_image: image_url.to_string(),
canonical_url: link.to_string(),
tags: categories.into_iter().map(|c| c.to_string()).collect(),
}
}
}
let status = "published".to_owned();
async fn entry_to_post(entry: Entry) -> Post {
let link = entry.links.first().unwrap().href.as_str();
let summary = summarize_url(link).await;
let published_at = entry.published.unwrap().to_rfc3339();
Post::new(
entry.title.as_ref().unwrap().content.as_str(),
summary.as_str(),
link,
entry
let updated_at = chrono::Utc::now().to_rfc3339();
let canonical_url = link.to_owned();
let mut tags: Vec<String> = entry
.categories
.iter()
.map(|category| category.term.as_str())
.collect::<Vec<&str>>(),
entry
.media
.first()
.and_then(|m| m.content.first())
.and_then(|c| c.url.as_ref().map(|u| u.as_str()))
.unwrap_or_default(),
&entry.published.unwrap().to_rfc3339(),
)
.map(|category| category.term.as_str().to_owned())
.collect();
tags.push("Projects Website".to_owned());
// The rest of the data is optional and has to be pulled from the documents OpenGraph
let raw_html = reqwest::get(link).await.unwrap().text().await.unwrap();
let document = Html::parse_document(&raw_html);
let selector = Selector::parse("meta[property^='og:']").unwrap();
let mut feature_image = None;
let mut feature_image_alt = None;
let mut feature_image_caption = None;
let mut meta_description = None;
let mut custom_excerpt = None;
for meta in document.select(&selector) {
match meta.value().attr("property") {
Some("og:image") => feature_image = meta.value().attr("content").map(String::from),
Some("og:image:alt") => {
// Ghost API limits this to 190 chars
feature_image_alt = meta.value().attr("content").map(|desc| {
desc.chars().take(190).collect()
})
}
Some("og:image:description") => {
feature_image_caption = meta.value().attr("content").map(String::from)
}
Some("og:description") => {
meta_description = meta.value().attr("content").map(String::from);
// Ghost API limits this to 300 chars
custom_excerpt = meta.value().attr("content").map(|desc| {
desc.chars().take(300).collect()
});
}
_ => {}
}
}
let x = Post {
title,
slug,
html,
status,
published_at,
updated_at,
canonical_url,
tags,
feature_image,
feature_image_alt,
feature_image_caption,
meta_description,
custom_excerpt,
};
dbg!(&x);
x
}
}
fn get_slug(link: &str) -> String {
@@ -205,10 +242,12 @@ async fn main() {
let entries = fetch_feed(feed).await;
let post_exists_futures = entries.into_iter().map(|entry| {
let entry_clone = entry.clone(); // Clone entry if necessary (depends on your data structure)
let entry_clone = entry.clone();
async move { (entry_clone, check_if_post_exists(&entry).await) }
});
let post_exists_results = join_all(post_exists_futures).await;
let filtered_entries: Vec<Entry> = post_exists_results
.into_iter()
.filter_map(|(entry, exists)| if !exists { Some(entry) } else { None })
@@ -219,7 +258,7 @@ async fn main() {
return;
}
let post_futures = filtered_entries.into_iter().map(entry_to_post);
let post_futures = filtered_entries.into_iter().map(Post::new);
let client = Client::new();
@@ -241,9 +280,8 @@ async fn main() {
println!("Post {} published successfully.", post.title);
} else {
println!(
"Failed to publish post {}. Status: {:?}",
&post.title,
response.status()
"Failed to publish post {}.\n\tResp: {:?}",
&post.title, response
);
}
}