1
0
mirror of https://gitlab.com/Anson-Projects/projects.git synced 2025-09-19 03:52:37 +00:00

1 Commits

7 changed files with 789 additions and 838 deletions

View File

@@ -14,10 +14,8 @@ staging:
stage: deploy stage: deploy
image: ${CI_REGISTRY_IMAGE}:${CI_COMMIT_BRANCH} image: ${CI_REGISTRY_IMAGE}:${CI_COMMIT_BRANCH}
script: script:
- echo "Building the main website with Quarto..." - echo "Building the project with Quarto..."
- quarto render --to html --output-dir public - quarto render --to html --output-dir public
- echo "Building Ghost-optimized version..."
- quarto render --profile ghost --to html --output-dir public/ghost-content
artifacts: artifacts:
paths: paths:
- public - public

View File

@@ -1,42 +1,25 @@
project: project:
type: website type: website
profiles: website:
default: title: "Anson's Projects"
website: site-url: https://projects.ansonbiggs.com
title: "Anson's Projects" description: A Blog for Technical Topics
site-url: https://projects.ansonbiggs.com navbar:
description: A Blog for Technical Topics left:
navbar: - text: "About"
left: href: about.html
- text: "About" right:
href: about.html - icon: rss
right: href: index.xml
- icon: rss # - icon: gitlab
href: index.xml # href: https://gitlab.com/MisterBiggs
# - icon: gitlab open-graph: true
# href: https://gitlab.com/MisterBiggs format:
open-graph: true html:
format: theme: zephyr
html: css: styles.css
theme: zephyr # toc: true
css: styles.css
# toc: true
ghost:
website:
title: "Anson's Projects"
site-url: https://projects.ansonbiggs.com
description: A Blog for Technical Topics
navbar: false
open-graph: true
format:
html:
theme: none
css: ghost-iframe.css
toc: false
page-layout: article
title-block-banner: false
execute: execute:
freeze: true freeze: true

View File

@@ -1,129 +0,0 @@
/* Ghost iframe optimized styles */
body {
font-family: system-ui, -apple-system, sans-serif;
line-height: 1.6;
color: #333;
max-width: 100%;
margin: 0;
padding: 20px;
background: white;
}
/* Remove any potential margins/padding */
html, body {
margin: 0;
padding: 0;
box-sizing: border-box;
}
/* Ensure content flows naturally */
#quarto-content {
max-width: none;
padding: 0;
margin: 0;
}
/* Style headings for Ghost */
h1, h2, h3, h4, h5, h6 {
margin-top: 1.5em;
margin-bottom: 0.5em;
font-weight: 600;
line-height: 1.3;
}
h1 { font-size: 2em; }
h2 { font-size: 1.5em; }
h3 { font-size: 1.25em; }
/* Code blocks */
pre {
background: #f8f9fa;
border: 1px solid #e9ecef;
border-radius: 6px;
padding: 1rem;
overflow-x: auto;
font-size: 0.875em;
}
code {
font-family: "SF Mono", Monaco, "Cascadia Code", "Roboto Mono", Consolas, "Courier New", monospace;
background: #f1f3f4;
padding: 0.2em 0.4em;
border-radius: 3px;
font-size: 0.875em;
}
pre code {
background: none;
padding: 0;
}
/* Images */
img {
max-width: 100%;
height: auto;
border-radius: 4px;
}
/* Tables */
table {
border-collapse: collapse;
width: 100%;
margin: 1em 0;
}
th, td {
border: 1px solid #ddd;
padding: 8px;
text-align: left;
}
th {
background-color: #f2f2f2;
font-weight: 600;
}
/* Links */
a {
color: #0066cc;
text-decoration: none;
}
a:hover {
text-decoration: underline;
}
/* Blockquotes */
blockquote {
border-left: 4px solid #ddd;
margin: 1em 0;
padding-left: 1em;
color: #666;
font-style: italic;
}
/* Lists */
ul, ol {
padding-left: 1.5em;
}
li {
margin-bottom: 0.25em;
}
/* Remove any navbar/footer elements that might leak through */
.navbar, .nav, footer, .sidebar, .toc, .page-footer {
display: none !important;
}
/* Ensure responsive behavior for iframe */
@media (max-width: 768px) {
body {
padding: 15px;
font-size: 16px;
}
h1 { font-size: 1.75em; }
h2 { font-size: 1.35em; }
h3 { font-size: 1.15em; }
}

View File

@@ -1,16 +1,29 @@
cache:
paths:
- ./ghost-upload/target/
- ./ghost-upload/cargo/
publish: publish:
stage: deploy stage: deploy
image: rust:latest image: rust:latest
script: script:
- echo "Listing project root directory:"
- ls -la
- echo "Listing public directory:"
- ls -la public/ || echo "public directory not found"
- echo "Looking for index.xml:"
- find . -name "index.xml" -type f || echo "No index.xml files found"
- cd ./ghost-upload - cd ./ghost-upload
- cargo run - cargo run
needs: needs:
- pages - pages
rules: rules:
- if: "$CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH" - if: "$CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH"
publish_update:
stage: deploy
image: rust:latest
variables:
UPDATE_EXISTING: "true"
script:
- cd ./ghost-upload
- cargo run
needs:
- pages
rules:
- if: "$CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH"
when: manual

1074
ghost-upload/Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,25 +1,17 @@
# ghost-upload # ghost-upload
This tool synchronizes posts from https://projects.ansonbiggs.com to the Ghost blog at https://notes.ansonbiggs.com. This tool uploads posts from https://projects.ansonbiggs.com to https://notes.ansonbiggs.com.
## Features What's new:
- Uses the Ghost Admin API to check for existing posts by slug instead of probing the public site.
- Optional update support: set `UPDATE_EXISTING=true` to update an existing post in-place (via `PUT /ghost/api/v3/admin/posts/{id}?source=html`).
- Safer slug handling (trims trailing `/` and falls back to the last path segment).
- **Clean content extraction**: Uses Quarto ghost profile to generate clean HTML instead of iframes Env vars:
- **Duplicate prevention**: Checks Ghost Admin API to avoid creating duplicate posts - `admin_api_key`: Ghost Admin API key in `key_id:secret` format.
- **AI summaries**: Uses Kagi Summarizer for post summaries - `kagi_api_key`: Kagi Summarizer API key.
- **Dual content rendering**: GitLab CI builds both main site and ghost-optimized versions - `UPDATE_EXISTING` (optional): if `true`/`1`, update posts that already exist in Ghost.
## How It Works Notes:
- Updates use optimistic concurrency by sending the current `updated_at` from Ghost. If someone edits a post in Ghost after we fetch it, the update will fail with a 409 and be reported in the console.
1. **Dual Build Process**: GitLab CI builds the site twice: - Summaries are always regenerated when creating or updating; if you want to avoid re-summarizing on updates, leave `UPDATE_EXISTING` unset.
- Main site → `public/` (normal theme with navigation)
- Ghost content → `public/ghost-content/` (minimal theme for content extraction)
2. **Content Extraction**: Rust tool fetches clean HTML from the ghost-content version instead of using iframes
3. **Duplicate Detection**: Uses Ghost Admin API to check for existing posts by slug
## Environment Variables
- `admin_api_key`: Ghost Admin API key (required)
- `kagi_api_key`: Kagi Summarizer API key (required)

View File

@@ -1,6 +1,5 @@
use feed_rs::model::Entry; use feed_rs::model::Entry;
use feed_rs::parser; use feed_rs::parser;
use futures::future::join_all;
use jsonwebtoken::{encode, Algorithm, EncodingKey, Header}; use jsonwebtoken::{encode, Algorithm, EncodingKey, Header};
use maud::html; use maud::html;
use reqwest::Client; use reqwest::Client;
@@ -20,6 +19,29 @@ struct PostPayload {
posts: Vec<Post>, posts: Vec<Post>,
} }
#[derive(Serialize, Debug, Clone)]
struct UpdatePost {
id: String,
title: String,
slug: String,
html: String,
status: String,
published_at: String,
updated_at: String,
canonical_url: String,
tags: Vec<String>,
feature_image: Option<String>,
feature_image_alt: Option<String>,
feature_image_caption: Option<String>,
meta_description: Option<String>,
custom_excerpt: Option<String>,
}
#[derive(Serialize, Debug)]
struct UpdatePayload {
posts: Vec<UpdatePost>,
}
#[derive(Serialize, Debug, Clone)] #[derive(Serialize, Debug, Clone)]
struct Post { struct Post {
title: String, title: String,
@@ -45,29 +67,13 @@ impl Post {
let slug = get_slug(link); let slug = get_slug(link);
let summary = summarize_url(link).await; let summary = summarize_url(link).await;
// Extract content from ghost-optimized version
let ghost_content = extract_article_content(&link).await;
let html = html! { let html = html! {
div class="ghost-summary" { p { (summary) }
h3 { "Summary" } iframe src=(link) style="width: 100%; height: 80vh" { }
p { (summary) } p {
} "This content was originally posted on my projects website " a href=(link) { "here." }
div class="ghost-content" { " The above summary was made by the " a href=("https://help.kagi.com/kagi/api/summarizer.html")
(maud::PreEscaped(ghost_content)) {"Kagi Summarizer"}
}
div class="ghost-footer" {
hr {}
p {
em {
"This content was originally posted on my projects website "
a href=(link) { "here" }
". The above summary was generated by the "
a href=("https://help.kagi.com/kagi/api/summarizer.html") {"Kagi Summarizer"}
"."
}
}
} }
}.into_string(); }.into_string();
@@ -137,128 +143,60 @@ impl Post {
meta_description, meta_description,
custom_excerpt, custom_excerpt,
}; };
dbg!(&x);
x x
} }
} }
fn get_slug(link: &str) -> String { fn get_slug(link: &str) -> String {
link.split_once("/posts/").unwrap().1.trim_end_matches('/').to_string() // Prefer portion after "/posts/" if present, otherwise fall back to the last path segment
} let raw = match link.split_once("/posts/") {
Some((_, rest)) => rest,
async fn extract_article_content(original_link: &str) -> String { None => link.rsplit('/').next().unwrap_or(link),
// Convert original link to ghost-content version };
let ghost_link = original_link.replace("projects.ansonbiggs.com", "projects.ansonbiggs.com/ghost-content"); raw.trim_end_matches('/')
.to_string()
match reqwest::get(&ghost_link).await {
Ok(response) => {
match response.text().await {
Ok(html_content) => {
let document = Html::parse_document(&html_content);
// Try different selectors to find the main content
let content_selectors = [
"#quarto-content main",
"#quarto-content",
"main",
"article",
".content",
"body"
];
for selector_str in &content_selectors {
if let Ok(selector) = Selector::parse(selector_str) {
if let Some(element) = document.select(&selector).next() {
let content = element.inner_html();
if !content.trim().is_empty() {
return content;
}
}
}
}
// Fallback: return original content with iframe if extraction fails
format!(r#"<div class="fallback-iframe">
<p><em>Content extraction failed. Falling back to embedded view:</em></p>
<iframe src="{}" style="width: 100%; height: 80vh; border: none;" loading="lazy"></iframe>
</div>"#, original_link)
}
Err(_) => format!(r#"<p><em>Failed to fetch content. <a href="{}">View original post</a></em></p>"#, original_link)
}
}
Err(_) => format!(r#"<p><em>Failed to fetch content. <a href="{}">View original post</a></em></p>"#, original_link)
}
}
#[derive(Deserialize, Debug)]
struct GhostPostsResponse {
posts: Vec<GhostPost>,
} }
#[derive(Deserialize, Debug)] #[derive(Deserialize, Debug)]
struct GhostPost { struct GhostPostSummary {
id: String, id: String,
slug: String,
updated_at: String,
} }
async fn get_existing_post_id(slug: &str, token: &str) -> Option<String> { #[derive(Deserialize, Debug)]
let client = Client::new(); struct GhostPostsResponse<T> {
let api_url = format!("https://notes.ansonbiggs.com/ghost/api/v3/admin/posts/slug/{}/", slug); posts: Vec<T>,
}
match client async fn get_existing_post_by_slug(
.get(&api_url) client: &Client,
ghost_admin_base: &str,
token: &str,
slug: &str,
) -> Option<GhostPostSummary> {
// Use Ghost Admin API to search by slug
let url = format!(
"{}/posts/?filter=slug:{}&fields=id,slug,updated_at",
ghost_admin_base, slug
);
let resp = client
.get(url)
.header("Authorization", format!("Ghost {}", token)) .header("Authorization", format!("Ghost {}", token))
.send() .send()
.await .await
{ .ok()?;
Ok(response) => { if !resp.status().is_success() {
if response.status().is_success() { return None;
if let Ok(ghost_response) = response.json::<GhostPostsResponse>().await {
ghost_response.posts.first().map(|post| post.id.clone())
} else {
None
}
} else {
None
}
}
Err(_) => None,
} }
let json = resp.json::<GhostPostsResponse<GhostPostSummary>>().await.ok()?;
json.posts.into_iter().next()
} }
async fn fetch_feed(path: &str) -> Vec<Entry> { async fn fetch_feed(url: &str) -> Vec<Entry> {
// Debug: Print current directory and list files let content = reqwest::get(url).await.unwrap().text().await.unwrap();
if let Ok(current_dir) = std::env::current_dir() {
eprintln!("Current directory: {:?}", current_dir);
}
// Debug: List files in parent directory let feed = parser::parse(content.as_bytes()).unwrap();
if let Ok(entries) = std::fs::read_dir("..") {
eprintln!("Files in parent directory:");
for entry in entries {
if let Ok(entry) = entry {
eprintln!(" {:?}", entry.path());
}
}
}
// Debug: Check if public directory exists
if let Ok(entries) = std::fs::read_dir("../public") {
eprintln!("Files in ../public:");
for entry in entries {
if let Ok(entry) = entry {
eprintln!(" {:?}", entry.path());
}
}
} else {
eprintln!("../public directory does not exist or cannot be read");
}
// Read from local file instead of HTTP request
let content = std::fs::read_to_string(path).expect("Failed to read RSS feed file");
let feed = parser::parse(content.as_bytes()).expect("Failed to parse RSS feed");
feed.entries feed.entries
} }
@@ -319,12 +257,11 @@ async fn summarize_url(url: &str) -> String {
} }
#[tokio::main] #[tokio::main]
async fn main() { async fn main() {
let ghost_api_url = "https://notes.ansonbiggs.com/ghost/api/v3/admin/posts/?source=html"; let ghost_admin_base = "https://notes.ansonbiggs.com/ghost/api/v3/admin";
let ghost_posts_create_url = format!("{}/posts/?source=html", ghost_admin_base);
let ghost_admin_api_key = env::var("admin_api_key").unwrap(); let ghost_admin_api_key = env::var("admin_api_key").unwrap();
let feed = "https://projects.ansonbiggs.com/index.xml";
let feed = "../public/index.xml";
// Split the key into ID and SECRET // Split the key into ID and SECRET
let (id, secret) = ghost_admin_api_key let (id, secret) = ghost_admin_api_key
@@ -354,56 +291,87 @@ async fn main() {
) )
.expect("JWT encoding failed"); .expect("JWT encoding failed");
let client = Client::new();
// Prepare the post data // Prepare the post data
let entries = fetch_feed(feed).await; let entries = fetch_feed(feed).await;
let post_exists_futures = entries.into_iter().map(|entry| { // Control whether to update existing posts via env var
let entry_clone = entry.clone(); let update_existing = env::var("UPDATE_EXISTING").map(|v| v == "1" || v.eq_ignore_ascii_case("true")).unwrap_or(false);
let token_clone = token.clone();
async move {
let link = entry.links.first().unwrap().href.as_str();
let slug = get_slug(link);
(entry_clone, get_existing_post_id(&slug, &token_clone).await.is_some())
}
});
let post_exists_results = join_all(post_exists_futures).await; for entry in entries {
let link = entry.links.first().unwrap().href.as_str();
let slug = get_slug(link);
let filtered_entries: Vec<Entry> = post_exists_results let existing = get_existing_post_by_slug(&client, ghost_admin_base, &token, &slug).await;
.into_iter()
.filter_map(|(entry, exists)| if !exists { Some(entry) } else { None })
.collect();
if filtered_entries.is_empty() { match existing {
println!("Nothing to post."); None => {
return; // Create new post
} let post = Post::new(entry.clone()).await;
let post_payload = PostPayload { posts: vec![post.clone()] };
let post_futures = filtered_entries.into_iter().map(Post::new); let response = client
.post(&ghost_posts_create_url)
.header("Authorization", format!("Ghost {}", token))
.json(&post_payload)
.send()
.await
.expect("Request failed");
let client = Client::new(); if response.status().is_success() {
println!("Post {} published successfully.", post.title);
} else {
println!(
"Failed to publish post {}.\n\tStatus: {}",
&post.title,
response.status()
);
}
}
Some(summary) => {
if !update_existing {
println!("Post '{}' exists (slug: {}), skipping.", entry.title.unwrap().content, slug);
continue;
}
for post in join_all(post_futures).await { // Update existing post
let post_payload = PostPayload { let post = Post::new(entry.clone()).await;
posts: vec![post.clone()], let update = UpdatePost {
}; id: summary.id,
title: post.title,
slug: post.slug,
html: post.html,
status: post.status,
published_at: post.published_at,
updated_at: summary.updated_at,
canonical_url: post.canonical_url,
tags: post.tags,
feature_image: post.feature_image,
feature_image_alt: post.feature_image_alt,
feature_image_caption: post.feature_image_caption,
meta_description: post.meta_description,
custom_excerpt: post.custom_excerpt,
};
let response = client let update_url = format!("{}/posts/{}/?source=html", ghost_admin_base, update.id);
.post(ghost_api_url) let response = client
.header("Authorization", format!("Ghost {}", token)) .put(update_url)
.json(&post_payload) .header("Authorization", format!("Ghost {}", token))
.send() .json(&UpdatePayload { posts: vec![update] })
.await .send()
.expect("Request failed"); .await
.expect("Update request failed");
// Check the response if response.status().is_success() {
if response.status().is_success() { println!("Post '{}' updated successfully.", entry.title.unwrap().content);
println!("Post {} published successfully.", post.title); } else {
} else { println!(
println!( "Failed to update post '{}' (status: {}).",
"Failed to publish post {}.\n\tResp: {:?}", entry.title.unwrap().content,
&post.title, response response.status()
); );
}
}
} }
} }
} }