import { parseHTML } from 'linkedom'; import { writeFile } from 'fs/promises'; interface BlogPost { title: string; author: string; content: string; date?: string; url: string; } const BLOG_URL = 'https://cronicasperiodisticas.wordpress.com'; async function fetchBlogPosts(): Promise { console.log(`Fetching blog posts from ${BLOG_URL}...`); const response = await fetch(BLOG_URL); const html = await response.text(); const { document } = parseHTML(html); // Extract post URLs - this selector might need adjustment const postLinks = [...document.querySelectorAll('div.posttitle a')] as HTMLAnchorElement[]; const postUrls = postLinks.map(link => link.href); const posts: BlogPost[] = []; for (const url of postUrls) { console.log(`Processing post: ${url}`); try { const postResponse = await fetch(url); const postHtml = await postResponse.text(); const { document: postDoc } = parseHTML(postHtml); const title = postDoc.querySelector('h2.pagetitle')?.textContent?.trim() || 'Untitled'; const content = postDoc.querySelector('div.entry')?.innerHTML || ''; // Extract author from categories - this might need adjustment const author = [...postDoc.querySelectorAll('a[rel="category tag"]')] .map(el => el.textContent?.trim()) .filter(Boolean) .join(', ') || 'Unknown'; posts.push({ title, author, content, url }); } catch (error) { console.error(`Failed to process ${url}:`, error); } } return posts; } async function main() { const posts = await fetchBlogPosts(); await writeFile('posts.json', JSON.stringify(posts, null, 2)); console.log(`Saved ${posts.length} posts to posts.json`); } main().catch(console.error);