1
0
mirror of https://gitlab.com/Anson-Projects/projects.git synced 2025-09-14 09:35:04 +00:00

feat: restore ghost profile functionality for clean content extraction

- Restore Quarto ghost profiles in _quarto.yml for dual content rendering
- Restore ghost-iframe.css with clean styling for Ghost content
- Restore GitLab CI dual build: main site + ghost-content optimized version
- Restore extract_article_content() function in Rust for clean HTML extraction
- Update README to document the ghost profiles feature and how it works

This is the core feature of the MR: generating clean HTML content for Ghost
instead of using iframes by building a ghost-optimized version of the site.
This commit is contained in:
2025-08-22 11:20:06 -06:00
parent 9e2596c070
commit 21ad5cb862
5 changed files with 258 additions and 27 deletions

View File

@@ -14,8 +14,10 @@ staging:
stage: deploy
image: ${CI_REGISTRY_IMAGE}:${CI_COMMIT_BRANCH}
script:
- echo "Building the project with Quarto..."
- echo "Building the main website with Quarto..."
- quarto render --to html --output-dir public
- echo "Building Ghost-optimized version..."
- quarto render --profile ghost --to html --output-dir public/ghost-content
artifacts:
paths:
- public

View File

@@ -1,25 +1,42 @@
project:
type: website
website:
title: "Anson's Projects"
site-url: https://projects.ansonbiggs.com
description: A Blog for Technical Topics
navbar:
left:
- text: "About"
href: about.html
right:
- icon: rss
href: index.xml
# - icon: gitlab
# href: https://gitlab.com/MisterBiggs
open-graph: true
format:
html:
theme: zephyr
css: styles.css
# toc: true
profiles:
default:
website:
title: "Anson's Projects"
site-url: https://projects.ansonbiggs.com
description: A Blog for Technical Topics
navbar:
left:
- text: "About"
href: about.html
right:
- icon: rss
href: index.xml
# - icon: gitlab
# href: https://gitlab.com/MisterBiggs
open-graph: true
format:
html:
theme: zephyr
css: styles.css
# toc: true
ghost:
website:
title: "Anson's Projects"
site-url: https://projects.ansonbiggs.com
description: A Blog for Technical Topics
navbar: false
open-graph: true
format:
html:
theme: none
css: ghost-iframe.css
toc: false
page-layout: article
title-block-banner: false
execute:
freeze: true

129
ghost-iframe.css Normal file
View File

@@ -0,0 +1,129 @@
/* Ghost iframe optimized styles */
body {
font-family: system-ui, -apple-system, sans-serif;
line-height: 1.6;
color: #333;
max-width: 100%;
margin: 0;
padding: 20px;
background: white;
}
/* Remove any potential margins/padding */
html, body {
margin: 0;
padding: 0;
box-sizing: border-box;
}
/* Ensure content flows naturally */
#quarto-content {
max-width: none;
padding: 0;
margin: 0;
}
/* Style headings for Ghost */
h1, h2, h3, h4, h5, h6 {
margin-top: 1.5em;
margin-bottom: 0.5em;
font-weight: 600;
line-height: 1.3;
}
h1 { font-size: 2em; }
h2 { font-size: 1.5em; }
h3 { font-size: 1.25em; }
/* Code blocks */
pre {
background: #f8f9fa;
border: 1px solid #e9ecef;
border-radius: 6px;
padding: 1rem;
overflow-x: auto;
font-size: 0.875em;
}
code {
font-family: "SF Mono", Monaco, "Cascadia Code", "Roboto Mono", Consolas, "Courier New", monospace;
background: #f1f3f4;
padding: 0.2em 0.4em;
border-radius: 3px;
font-size: 0.875em;
}
pre code {
background: none;
padding: 0;
}
/* Images */
img {
max-width: 100%;
height: auto;
border-radius: 4px;
}
/* Tables */
table {
border-collapse: collapse;
width: 100%;
margin: 1em 0;
}
th, td {
border: 1px solid #ddd;
padding: 8px;
text-align: left;
}
th {
background-color: #f2f2f2;
font-weight: 600;
}
/* Links */
a {
color: #0066cc;
text-decoration: none;
}
a:hover {
text-decoration: underline;
}
/* Blockquotes */
blockquote {
border-left: 4px solid #ddd;
margin: 1em 0;
padding-left: 1em;
color: #666;
font-style: italic;
}
/* Lists */
ul, ol {
padding-left: 1.5em;
}
li {
margin-bottom: 0.25em;
}
/* Remove any navbar/footer elements that might leak through */
.navbar, .nav, footer, .sidebar, .toc, .page-footer {
display: none !important;
}
/* Ensure responsive behavior for iframe */
@media (max-width: 768px) {
body {
padding: 15px;
font-size: 16px;
}
h1 { font-size: 1.75em; }
h2 { font-size: 1.35em; }
h3 { font-size: 1.15em; }
}

View File

@@ -1,3 +1,25 @@
# ghost-upload
This code uploads posts from https://projects.ansonbiggs.com to https://notes.ansonbiggs.com. I couldn't figure out how to update posts, and the kagi API doesn't make it clear how long it caches results for so for now only posts that don't exist on the ghost blog will be uploaded. If you want to update content you need to manually make edits to the code and delete posts on the blog.
This tool synchronizes posts from https://projects.ansonbiggs.com to the Ghost blog at https://notes.ansonbiggs.com.
## Features
- **Clean content extraction**: Uses Quarto ghost profile to generate clean HTML instead of iframes
- **Duplicate prevention**: Checks Ghost Admin API to avoid creating duplicate posts
- **AI summaries**: Uses Kagi Summarizer for post summaries
- **Dual content rendering**: GitLab CI builds both main site and ghost-optimized versions
## How It Works
1. **Dual Build Process**: GitLab CI builds the site twice:
- Main site → `public/` (normal theme with navigation)
- Ghost content → `public/ghost-content/` (minimal theme for content extraction)
2. **Content Extraction**: Rust tool fetches clean HTML from the ghost-content version instead of using iframes
3. **Duplicate Detection**: Uses Ghost Admin API to check for existing posts by slug
## Environment Variables
- `admin_api_key`: Ghost Admin API key (required)
- `kagi_api_key`: Kagi Summarizer API key (required)

View File

@@ -45,13 +45,29 @@ impl Post {
let slug = get_slug(link);
let summary = summarize_url(link).await;
// Extract content from ghost-optimized version
let ghost_content = extract_article_content(&link).await;
let html = html! {
p { (summary) }
iframe src=(link) style="width: 100%; height: 80vh" { }
p {
"This content was originally posted on my projects website " a href=(link) { "here." }
" The above summary was made by the " a href=("https://help.kagi.com/kagi/api/summarizer.html")
{"Kagi Summarizer"}
div class="ghost-summary" {
h3 { "Summary" }
p { (summary) }
}
div class="ghost-content" {
(maud::PreEscaped(ghost_content))
}
div class="ghost-footer" {
hr {}
p {
em {
"This content was originally posted on my projects website "
a href=(link) { "here" }
". The above summary was generated by the "
a href=("https://help.kagi.com/kagi/api/summarizer.html") {"Kagi Summarizer"}
"."
}
}
}
}.into_string();
@@ -130,6 +146,51 @@ fn get_slug(link: &str) -> String {
link.split_once("/posts/").unwrap().1.trim_end_matches('/').to_string()
}
async fn extract_article_content(original_link: &str) -> String {
// Convert original link to ghost-content version
let ghost_link = original_link.replace("projects.ansonbiggs.com", "projects.ansonbiggs.com/ghost-content");
match reqwest::get(&ghost_link).await {
Ok(response) => {
match response.text().await {
Ok(html_content) => {
let document = Html::parse_document(&html_content);
// Try different selectors to find the main content
let content_selectors = [
"#quarto-content main",
"#quarto-content",
"main",
"article",
".content",
"body"
];
for selector_str in &content_selectors {
if let Ok(selector) = Selector::parse(selector_str) {
if let Some(element) = document.select(&selector).next() {
let content = element.inner_html();
if !content.trim().is_empty() {
return content;
}
}
}
}
// Fallback: return original content with iframe if extraction fails
format!(r#"<div class="fallback-iframe">
<p><em>Content extraction failed. Falling back to embedded view:</em></p>
<iframe src="{}" style="width: 100%; height: 80vh; border: none;" loading="lazy"></iframe>
</div>"#, original_link)
}
Err(_) => format!(r#"<p><em>Failed to fetch content. <a href="{}">View original post</a></em></p>"#, original_link)
}
}
Err(_) => format!(r#"<p><em>Failed to fetch content. <a href="{}">View original post</a></em></p>"#, original_link)
}
}
#[derive(Deserialize, Debug)]
struct GhostPostsResponse {