import { readFile } from 'fs/promises'; import Epub from 'epub-gen'; import { parseHTML } from 'linkedom'; interface BlogPost { title: string; author: string; content: string; url: string; } async function generateEpub() { // Read the scraped posts const postsData = await readFile('posts.json', 'utf-8'); const posts: BlogPost[] = JSON.parse(postsData); if (posts.length === 0) { console.error('No posts found in posts.json'); return; } // Prepare ePub options const options = { title: 'Crónicas Periodísticas', author: posts[0].author, // Using first post's author as main author output: 'cronicas-periodisticas.epub', content: [], appendChapterTitles: true, verbose: true, fetchImages: false, // Disable image downloading customHtmlTocTemplate: ({ title, author, chapters }) => ` ${title}

${title}

Por ${author}

Nota: Las imágenes no se incluyeron en este eBook. Visite los enlaces originales para ver el contenido completo.

` }; // Convert each post to ePub chapter format for (const post of posts) { // Parse the content HTML and extract text nodes const { document } = parseHTML(`
${post.content}
`); // Get all text content from paragraphs and other text containers const paragraphs = [...document.querySelectorAll('p, div, span')] .map(el => el.textContent?.trim()) .filter(Boolean) .join('\n\n'); options.content.push({ title: post.title, author: post.author, data: `

${post.title}

Por ${post.author}

${paragraphs || 'No content available'}

Publicación original

` }); } // Generate the ePub console.log(`Generating ePub with ${posts.length} posts...`); await new Epub(options).promise; console.log('ePub generated successfully!'); } generateEpub().catch(console.error);