You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

63 lines
1.8 KiB
TypeScript

import { parseHTML } from 'linkedom';
import { writeFile } from 'fs/promises';
interface BlogPost {
title: string;
author: string;
content: string;
date?: string;
url: string;
}
const BLOG_URL = 'https://cronicasperiodisticas.wordpress.com';
async function fetchBlogPosts(): Promise<BlogPost[]> {
console.log(`Fetching blog posts from ${BLOG_URL}...`);
const response = await fetch(BLOG_URL);
const html = await response.text();
const { document } = parseHTML(html);
// Extract post URLs - this selector might need adjustment
const postLinks = [...document.querySelectorAll('div.posttitle a')] as HTMLAnchorElement[];
const postUrls = postLinks.map(link => link.href);
const posts: BlogPost[] = [];
for (const url of postUrls) {
console.log(`Processing post: ${url}`);
try {
const postResponse = await fetch(url);
const postHtml = await postResponse.text();
const { document: postDoc } = parseHTML(postHtml);
const title = postDoc.querySelector('h2.pagetitle')?.textContent?.trim() || 'Untitled';
const content = postDoc.querySelector('div.entry')?.innerHTML || '';
// Extract author from categories - this might need adjustment
const author = [...postDoc.querySelectorAll('a[rel="category tag"]')]
.map(el => el.textContent?.trim())
.filter(Boolean)
.join(', ') || 'Unknown';
posts.push({
title,
author,
content,
url
});
} catch (error) {
console.error(`Failed to process ${url}:`, error);
}
}
return posts;
}
async function main() {
const posts = await fetchBlogPosts();
await writeFile('posts.json', JSON.stringify(posts, null, 2));
console.log(`Saved ${posts.length} posts to posts.json`);
}
main().catch(console.error);