feat(ghost-upload): add update support, manual CI job, and dependency updates

2025-09-19 03:52:37 +00:00 · 2025-08-26 11:07:24 -06:00
7 changed files with 789 additions and 838 deletions
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -14,10 +14,8 @@ staging:
  stage: deploy
  image: ${CI_REGISTRY_IMAGE}:${CI_COMMIT_BRANCH}
  script:
-    - echo "Building the main website with Quarto..."
+    - echo "Building the project with Quarto..."
    - quarto render --to html --output-dir public
    - echo "Building Ghost-optimized version..."
    - quarto render --profile ghost --to html --output-dir public/ghost-content
  artifacts:
    paths:
      - public
--- a/_quarto.yml
+++ b/_quarto.yml
@@ -1,42 +1,25 @@
 project:
  type: website
-profiles:
+website:
-  default:
+  title: "Anson's Projects"
-    website:
+  site-url: https://projects.ansonbiggs.com
-      title: "Anson's Projects"
+  description: A Blog for Technical Topics
-      site-url: https://projects.ansonbiggs.com
+  navbar:
-      description: A Blog for Technical Topics
+    left:
-      navbar:
+      - text: "About"
-        left:
+        href: about.html
-          - text: "About"
+    right:
-            href: about.html
+      - icon: rss
-        right:
+        href: index.xml
-          - icon: rss
+      # - icon: gitlab
-            href: index.xml
+      #   href: https://gitlab.com/MisterBiggs
-          # - icon: gitlab
+  open-graph: true
-          #   href: https://gitlab.com/MisterBiggs
+format:
-      open-graph: true
+  html:
-    format:
+    theme: zephyr
-      html:
+    css: styles.css
-        theme: zephyr
+    # toc: true
        css: styles.css
        # toc: true
  ghost:
    website:
      title: "Anson's Projects"
      site-url: https://projects.ansonbiggs.com
      description: A Blog for Technical Topics
      navbar: false
      open-graph: true
    format:
      html:
        theme: none
        css: ghost-iframe.css
        toc: false
        page-layout: article
        title-block-banner: false
 execute: 
  freeze: true
--- a/ghost-iframe.css
+++ b/ghost-iframe.css
@@ -1,129 +0,0 @@
 /* Ghost iframe optimized styles */
 body {
    font-family: system-ui, -apple-system, sans-serif;
    line-height: 1.6;
    color: #333;
    max-width: 100%;
    margin: 0;
    padding: 20px;
    background: white;
 }
 /* Remove any potential margins/padding */
 html, body {
    margin: 0;
    padding: 0;
    box-sizing: border-box;
 }
 /* Ensure content flows naturally */
 #quarto-content {
    max-width: none;
    padding: 0;
    margin: 0;
 }
 /* Style headings for Ghost */
 h1, h2, h3, h4, h5, h6 {
    margin-top: 1.5em;
    margin-bottom: 0.5em;
    font-weight: 600;
    line-height: 1.3;
 }
 h1 { font-size: 2em; }
 h2 { font-size: 1.5em; }
 h3 { font-size: 1.25em; }
 /* Code blocks */
 pre {
    background: #f8f9fa;
    border: 1px solid #e9ecef;
    border-radius: 6px;
    padding: 1rem;
    overflow-x: auto;
    font-size: 0.875em;
 }
 code {
    font-family: "SF Mono", Monaco, "Cascadia Code", "Roboto Mono", Consolas, "Courier New", monospace;
    background: #f1f3f4;
    padding: 0.2em 0.4em;
    border-radius: 3px;
    font-size: 0.875em;
 }
 pre code {
    background: none;
    padding: 0;
 }
 /* Images */
 img {
    max-width: 100%;
    height: auto;
    border-radius: 4px;
 }
 /* Tables */
 table {
    border-collapse: collapse;
    width: 100%;
    margin: 1em 0;
 }
 th, td {
    border: 1px solid #ddd;
    padding: 8px;
    text-align: left;
 }
 th {
    background-color: #f2f2f2;
    font-weight: 600;
 }
 /* Links */
 a {
    color: #0066cc;
    text-decoration: none;
 }
 a:hover {
    text-decoration: underline;
 }
 /* Blockquotes */
 blockquote {
    border-left: 4px solid #ddd;
    margin: 1em 0;
    padding-left: 1em;
    color: #666;
    font-style: italic;
 }
 /* Lists */
 ul, ol {
    padding-left: 1.5em;
 }
 li {
    margin-bottom: 0.25em;
 }
 /* Remove any navbar/footer elements that might leak through */
 .navbar, .nav, footer, .sidebar, .toc, .page-footer {
    display: none !important;
 }
 /* Ensure responsive behavior for iframe */
@media (max-width: 768px) {
    body {
        padding: 15px;
        font-size: 16px;
    }
    h1 { font-size: 1.75em; }
    h2 { font-size: 1.35em; }
    h3 { font-size: 1.15em; }
 }
--- a/ghost-upload/.gitlab-ci.yml
+++ b/ghost-upload/.gitlab-ci.yml
@@ -1,16 +1,29 @@
 cache:
  paths:
    - ./ghost-upload/target/
    - ./ghost-upload/cargo/
 publish:
  stage: deploy
  image: rust:latest
  script:
    - echo "Listing project root directory:"
    - ls -la
    - echo "Listing public directory:"
    - ls -la public/ || echo "public directory not found"
    - echo "Looking for index.xml:"
    - find . -name "index.xml" -type f || echo "No index.xml files found"
    - cd ./ghost-upload
    - cargo run
  needs:
    - pages
  rules:
    - if: "$CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH"
 publish_update:
  stage: deploy
  image: rust:latest
  variables:
    UPDATE_EXISTING: "true"
  script:
    - cd ./ghost-upload
    - cargo run
  needs:
    - pages
  rules:
    - if: "$CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH"
      when: manual
--- a/ghost-upload/Cargo.lock
+++ b/ghost-upload/Cargo.lock
--- a/ghost-upload/README.md
+++ b/ghost-upload/README.md
@@ -1,25 +1,17 @@
 # ghost-upload
-This tool synchronizes posts from https://projects.ansonbiggs.com to the Ghost blog at https://notes.ansonbiggs.com.
+This tool uploads posts from https://projects.ansonbiggs.com to https://notes.ansonbiggs.com.
-## Features
+What's new:
 - Uses the Ghost Admin API to check for existing posts by slug instead of probing the public site.
 - Optional update support: set `UPDATE_EXISTING=true` to update an existing post in-place (via `PUT /ghost/api/v3/admin/posts/{id}?source=html`).
 - Safer slug handling (trims trailing `/` and falls back to the last path segment).
- **Clean content extraction**: Uses Quarto ghost profile to generate clean HTML instead of iframes
+Env vars:
- **Duplicate prevention**: Checks Ghost Admin API to avoid creating duplicate posts
+- `admin_api_key`: Ghost Admin API key in `key_id:secret` format.
- **AI summaries**: Uses Kagi Summarizer for post summaries
+- `kagi_api_key`: Kagi Summarizer API key.
- **Dual content rendering**: GitLab CI builds both main site and ghost-optimized versions
+- `UPDATE_EXISTING` (optional): if `true`/`1`, update posts that already exist in Ghost.
-## How It Works
+Notes:
-
+- Updates use optimistic concurrency by sending the current `updated_at` from Ghost. If someone edits a post in Ghost after we fetch it, the update will fail with a 409 and be reported in the console.
-1. **Dual Build Process**: GitLab CI builds the site twice:
+- Summaries are always regenerated when creating or updating; if you want to avoid re-summarizing on updates, leave `UPDATE_EXISTING` unset.
   - Main site → `public/` (normal theme with navigation)
   - Ghost content → `public/ghost-content/` (minimal theme for content extraction)
 2. **Content Extraction**: Rust tool fetches clean HTML from the ghost-content version instead of using iframes
 3. **Duplicate Detection**: Uses Ghost Admin API to check for existing posts by slug
 ## Environment Variables
 - `admin_api_key`: Ghost Admin API key (required)
 - `kagi_api_key`: Kagi Summarizer API key (required)
--- a/ghost-upload/src/main.rs
+++ b/ghost-upload/src/main.rs
@@ -1,6 +1,5 @@
 use feed_rs::model::Entry;
 use feed_rs::parser;
 use futures::future::join_all;
 use jsonwebtoken::{encode, Algorithm, EncodingKey, Header};
 use maud::html;
 use reqwest::Client;
@@ -20,6 +19,29 @@ struct PostPayload {
    posts: Vec<Post>,
 }
 #[derive(Serialize, Debug, Clone)]
 struct UpdatePost {
    id: String,
    title: String,
    slug: String,
    html: String,
    status: String,
    published_at: String,
    updated_at: String,
    canonical_url: String,
    tags: Vec<String>,
    feature_image: Option<String>,
    feature_image_alt: Option<String>,
    feature_image_caption: Option<String>,
    meta_description: Option<String>,
    custom_excerpt: Option<String>,
 }
 #[derive(Serialize, Debug)]
 struct UpdatePayload {
    posts: Vec<UpdatePost>,
 }
 #[derive(Serialize, Debug, Clone)]
 struct Post {
    title: String,
@@ -45,29 +67,13 @@ impl Post {
        let slug = get_slug(link);
        let summary = summarize_url(link).await;
        // Extract content from ghost-optimized version
        let ghost_content = extract_article_content(&link).await;
        let html = html! {
-            div class="ghost-summary" {
+            p { (summary) }
-                h3 { "Summary" }
+            iframe src=(link) style="width: 100%; height: 80vh" { }
-                p { (summary) }
+            p {
-            }
+                "This content was originally posted on my projects website " a href=(link) { "here." }
-            div class="ghost-content" {
+                " The above summary was made by the " a href=("https://help.kagi.com/kagi/api/summarizer.html")
-                (maud::PreEscaped(ghost_content))
+                {"Kagi Summarizer"}
            }
            div class="ghost-footer" {
                hr {}
                p {
                    em {
                        "This content was originally posted on my projects website " 
                        a href=(link) { "here" }
                        ". The above summary was generated by the " 
                        a href=("https://help.kagi.com/kagi/api/summarizer.html") {"Kagi Summarizer"} 
                        "."
                    }
                }
            }
        }.into_string();
@@ -137,128 +143,60 @@ impl Post {
            meta_description,
            custom_excerpt,
        };
        dbg!(&x);
        x
    }
 }
 fn get_slug(link: &str) -> String {
-    link.split_once("/posts/").unwrap().1.trim_end_matches('/').to_string()
+    // Prefer portion after "/posts/" if present, otherwise fall back to the last path segment
-}
+    let raw = match link.split_once("/posts/") {
-
+        Some((_, rest)) => rest,
-async fn extract_article_content(original_link: &str) -> String {
+        None => link.rsplit('/').next().unwrap_or(link),
-    // Convert original link to ghost-content version
+    };
-    let ghost_link = original_link.replace("projects.ansonbiggs.com", "projects.ansonbiggs.com/ghost-content");
+    raw.trim_end_matches('/')
-    
+        .to_string()
    match reqwest::get(&ghost_link).await {
        Ok(response) => {
            match response.text().await {
                Ok(html_content) => {
                    let document = Html::parse_document(&html_content);
                    // Try different selectors to find the main content
                    let content_selectors = [
                        "#quarto-content main",
                        "#quarto-content",
                        "main",
                        "article",
                        ".content",
                        "body"
                    ];
                    for selector_str in &content_selectors {
                        if let Ok(selector) = Selector::parse(selector_str) {
                            if let Some(element) = document.select(&selector).next() {
                                let content = element.inner_html();
                                if !content.trim().is_empty() {
                                    return content;
                                }
                            }
                        }
                    }
                    // Fallback: return original content with iframe if extraction fails
                    format!(r#"<div class="fallback-iframe">
                        <p><em>Content extraction failed. Falling back to embedded view:</em></p>
                        <iframe src="{}" style="width: 100%; height: 80vh; border: none;" loading="lazy"></iframe>
                    </div>"#, original_link)
                }
                Err(_) => format!(r#"<p><em>Failed to fetch content. <a href="{}">View original post</a></em></p>"#, original_link)
            }
        }
        Err(_) => format!(r#"<p><em>Failed to fetch content. <a href="{}">View original post</a></em></p>"#, original_link)
    }
 }
 #[derive(Deserialize, Debug)]
 struct GhostPostsResponse {
    posts: Vec<GhostPost>,
 }
 #[derive(Deserialize, Debug)]
-struct GhostPost {
+struct GhostPostSummary {
    id: String,
    slug: String,
    updated_at: String,
 }
-async fn get_existing_post_id(slug: &str, token: &str) -> Option<String> {
+#[derive(Deserialize, Debug)]
-    let client = Client::new();
+struct GhostPostsResponse<T> {
-    let api_url = format!("https://notes.ansonbiggs.com/ghost/api/v3/admin/posts/slug/{}/", slug);
+    posts: Vec<T>,
 }
-    match client
+async fn get_existing_post_by_slug(
-        .get(&api_url)
+    client: &Client,
    ghost_admin_base: &str,
    token: &str,
    slug: &str,
 ) -> Option<GhostPostSummary> {
    // Use Ghost Admin API to search by slug
    let url = format!(
        "{}/posts/?filter=slug:{}&fields=id,slug,updated_at",
        ghost_admin_base, slug
    );
    let resp = client
        .get(url)
        .header("Authorization", format!("Ghost {}", token))
        .send()
        .await
-    {
+        .ok()?;
-        Ok(response) => {
+    if !resp.status().is_success() {
-            if response.status().is_success() {
+        return None;
                if let Ok(ghost_response) = response.json::<GhostPostsResponse>().await {
                    ghost_response.posts.first().map(|post| post.id.clone())
                } else {
                    None
                }
            } else {
                None
            }
        }
        Err(_) => None,
    }
    let json = resp.json::<GhostPostsResponse<GhostPostSummary>>().await.ok()?;
    json.posts.into_iter().next()
 }
-async fn fetch_feed(path: &str) -> Vec<Entry> {
+async fn fetch_feed(url: &str) -> Vec<Entry> {
-    // Debug: Print current directory and list files
+    let content = reqwest::get(url).await.unwrap().text().await.unwrap();
    if let Ok(current_dir) = std::env::current_dir() {
        eprintln!("Current directory: {:?}", current_dir);
    }
-    // Debug: List files in parent directory
+    let feed = parser::parse(content.as_bytes()).unwrap();
    if let Ok(entries) = std::fs::read_dir("..") {
        eprintln!("Files in parent directory:");
        for entry in entries {
            if let Ok(entry) = entry {
                eprintln!("  {:?}", entry.path());
            }
        }
    }
    // Debug: Check if public directory exists
    if let Ok(entries) = std::fs::read_dir("../public") {
        eprintln!("Files in ../public:");
        for entry in entries {
            if let Ok(entry) = entry {
                eprintln!("  {:?}", entry.path());
            }
        }
    } else {
        eprintln!("../public directory does not exist or cannot be read");
    }
    // Read from local file instead of HTTP request
    let content = std::fs::read_to_string(path).expect("Failed to read RSS feed file");
    let feed = parser::parse(content.as_bytes()).expect("Failed to parse RSS feed");
    feed.entries
 }
@@ -319,12 +257,11 @@ async fn summarize_url(url: &str) -> String {
 }
 #[tokio::main]
 async fn main() {
-    let ghost_api_url = "https://notes.ansonbiggs.com/ghost/api/v3/admin/posts/?source=html";
+    let ghost_admin_base = "https://notes.ansonbiggs.com/ghost/api/v3/admin";
    let ghost_posts_create_url = format!("{}/posts/?source=html", ghost_admin_base);
    let ghost_admin_api_key = env::var("admin_api_key").unwrap();
-
+    let feed = "https://projects.ansonbiggs.com/index.xml";
    let feed = "../public/index.xml";
    // Split the key into ID and SECRET
    let (id, secret) = ghost_admin_api_key
@@ -354,56 +291,87 @@ async fn main() {
    )
    .expect("JWT encoding failed");
    let client = Client::new();
    // Prepare the post data
    let entries = fetch_feed(feed).await;
-    let post_exists_futures = entries.into_iter().map(|entry| {
+    // Control whether to update existing posts via env var
-        let entry_clone = entry.clone();
+    let update_existing = env::var("UPDATE_EXISTING").map(|v| v == "1" || v.eq_ignore_ascii_case("true")).unwrap_or(false);
        let token_clone = token.clone();
        async move { 
            let link = entry.links.first().unwrap().href.as_str();
            let slug = get_slug(link);
            (entry_clone, get_existing_post_id(&slug, &token_clone).await.is_some()) 
        }
    });
-    let post_exists_results = join_all(post_exists_futures).await;
+    for entry in entries {
        let link = entry.links.first().unwrap().href.as_str();
        let slug = get_slug(link);
-    let filtered_entries: Vec<Entry> = post_exists_results
+        let existing = get_existing_post_by_slug(&client, ghost_admin_base, &token, &slug).await;
        .into_iter()
        .filter_map(|(entry, exists)| if !exists { Some(entry) } else { None })
        .collect();
-    if filtered_entries.is_empty() {
+        match existing {
-        println!("Nothing to post.");
+            None => {
-        return;
+                // Create new post
-    }
+                let post = Post::new(entry.clone()).await;
                let post_payload = PostPayload { posts: vec![post.clone()] };
-    let post_futures = filtered_entries.into_iter().map(Post::new);
+                let response = client
                    .post(&ghost_posts_create_url)
                    .header("Authorization", format!("Ghost {}", token))
                    .json(&post_payload)
                    .send()
                    .await
                    .expect("Request failed");
-    let client = Client::new();
+                if response.status().is_success() {
                    println!("Post {} published successfully.", post.title);
                } else {
                    println!(
                        "Failed to publish post {}.\n\tStatus: {}",
                        &post.title,
                        response.status()
                    );
                }
            }
            Some(summary) => {
                if !update_existing {
                    println!("Post '{}' exists (slug: {}), skipping.", entry.title.unwrap().content, slug);
                    continue;
                }
-    for post in join_all(post_futures).await {
+                // Update existing post
-        let post_payload = PostPayload {
+                let post = Post::new(entry.clone()).await;
-            posts: vec![post.clone()],
+                let update = UpdatePost {
-        };
+                    id: summary.id,
                    title: post.title,
                    slug: post.slug,
                    html: post.html,
                    status: post.status,
                    published_at: post.published_at,
                    updated_at: summary.updated_at,
                    canonical_url: post.canonical_url,
                    tags: post.tags,
                    feature_image: post.feature_image,
                    feature_image_alt: post.feature_image_alt,
                    feature_image_caption: post.feature_image_caption,
                    meta_description: post.meta_description,
                    custom_excerpt: post.custom_excerpt,
                };
-        let response = client
+                let update_url = format!("{}/posts/{}/?source=html", ghost_admin_base, update.id);
-            .post(ghost_api_url)
+                let response = client
-            .header("Authorization", format!("Ghost {}", token))
+                    .put(update_url)
-            .json(&post_payload)
+                    .header("Authorization", format!("Ghost {}", token))
-            .send()
+                    .json(&UpdatePayload { posts: vec![update] })
-            .await
+                    .send()
-            .expect("Request failed");
+                    .await
                    .expect("Update request failed");
-        // Check the response
+                if response.status().is_success() {
-        if response.status().is_success() {
+                    println!("Post '{}' updated successfully.", entry.title.unwrap().content);
-            println!("Post {} published successfully.", post.title);
+                } else {
-        } else {
+                    println!(
-            println!(
+                        "Failed to update post '{}' (status: {}).",
-                "Failed to publish post {}.\n\tResp: {:?}",
+                        entry.title.unwrap().content,
-                &post.title, response
+                        response.status()
-            );
+                    );
                }
            }
        }
    }
 }