1
0
mirror of https://gitlab.com/Anson-Projects/projects.git synced 2025-09-19 03:52:37 +00:00

4 Commits

Author SHA1 Message Date
cf5021e682 Debug CI artifacts to locate RSS feed file 2025-08-23 00:43:59 -06:00
54f2a1bc53 Fix RSS feed parsing by reading from local artifacts
- Change fetch_feed to read from local file instead of HTTP request
- Update feed path to use ../public/index.xml from GitLab CI artifacts
- Add better error messages for file reading and parsing failures
- Resolves ParseError(NoFeedRoot) by avoiding 404 from live website
2025-08-22 23:34:32 -06:00
a6dd33ce5f Merge branch 'ghost-content-extraction' into 'master'
Claude: Ghost Content Extraction

See merge request Anson-Projects/projects!11
2025-08-22 11:32:50 -07:00
556c56fee4 Claude: Ghost Content Extraction 2025-08-22 11:32:49 -07:00
2 changed files with 34 additions and 46 deletions

View File

@@ -2,11 +2,15 @@ publish:
stage: deploy
image: rust:latest
script:
- echo "Listing project root directory:"
- ls -la
- echo "Listing public directory:"
- ls -la public/ || echo "public directory not found"
- echo "Looking for index.xml:"
- find . -name "index.xml" -type f || echo "No index.xml files found"
- cd ./ghost-upload
- cargo run
needs:
- job: pages
optional: true
- pages
rules:
- if: "$CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH"
- if: "$CI_COMMIT_BRANCH == 'ghost-content-extraction'" # Allow testing on this branch

View File

@@ -227,48 +227,39 @@ async fn get_existing_post_id(slug: &str, token: &str) -> Option<String> {
}
}
async fn fetch_feed(url: &str) -> Vec<Entry> {
println!("Fetching RSS feed from: {}", url);
let response = reqwest::get(url).await;
let response = match response {
Ok(resp) => resp,
Err(e) => {
println!("Failed to fetch RSS feed: {}", e);
return vec![];
}
};
if !response.status().is_success() {
println!("RSS feed request failed with status: {}", response.status());
return vec![];
async fn fetch_feed(path: &str) -> Vec<Entry> {
// Debug: Print current directory and list files
if let Ok(current_dir) = std::env::current_dir() {
eprintln!("Current directory: {:?}", current_dir);
}
let content = match response.text().await {
Ok(text) => text,
Err(e) => {
println!("Failed to read RSS feed content: {}", e);
return vec![];
// Debug: List files in parent directory
if let Ok(entries) = std::fs::read_dir("..") {
eprintln!("Files in parent directory:");
for entry in entries {
if let Ok(entry) = entry {
eprintln!(" {:?}", entry.path());
}
}
};
if content.trim().is_empty() {
println!("RSS feed content is empty");
return vec![];
}
println!("RSS feed content preview: {}", &content[..content.len().min(200)]);
let feed = match parser::parse(content.as_bytes()) {
Ok(f) => f,
Err(e) => {
println!("Failed to parse RSS feed: {:?}", e);
println!("Feed content starts with: {}", &content[..content.len().min(500)]);
return vec![];
// Debug: Check if public directory exists
if let Ok(entries) = std::fs::read_dir("../public") {
eprintln!("Files in ../public:");
for entry in entries {
if let Ok(entry) = entry {
eprintln!(" {:?}", entry.path());
}
}
} else {
eprintln!("../public directory does not exist or cannot be read");
}
};
println!("Successfully parsed RSS feed with {} entries", feed.entries.len());
// Read from local file instead of HTTP request
let content = std::fs::read_to_string(path).expect("Failed to read RSS feed file");
let feed = parser::parse(content.as_bytes()).expect("Failed to parse RSS feed");
feed.entries
}
@@ -333,7 +324,7 @@ async fn main() {
let feed = "https://projects.ansonbiggs.com/index.xml";
let feed = "../public/index.xml";
// Split the key into ID and SECRET
let (id, secret) = ghost_admin_api_key
@@ -366,13 +357,6 @@ async fn main() {
// Prepare the post data
let entries = fetch_feed(feed).await;
if entries.is_empty() {
println!("No entries found in RSS feed or feed parsing failed. Exiting.");
return;
}
println!("Processing {} entries from RSS feed", entries.len());
let post_exists_futures = entries.into_iter().map(|entry| {
let entry_clone = entry.clone();
let token_clone = token.clone();