import { readFile } from 'fs/promises'; import Epub from 'epub-gen'; import { parseHTML } from 'linkedom'; interface BlogPost { title: string; author: string; content: string; url: string; } async function generateEpub() { // Read the scraped posts const postsData = await readFile('posts.json', 'utf-8'); const posts: BlogPost[] = JSON.parse(postsData); if (posts.length === 0) { console.error('No posts found in posts.json'); return; } // Prepare ePub options const options = { title: 'Crónicas Periodísticas', author: posts[0].author, // Using first post's author as main author output: 'cronicas-periodisticas.epub', content: [], appendChapterTitles: true, verbose: true, fetchImages: false, // Disable image downloading customHtmlTocTemplate: ({ title, author, chapters }) => ` ${title}

${title}

Por ${author}

Nota: Las imágenes no se incluyeron en este eBook. Visite los enlaces originales para ver el contenido completo.

` }; // Convert each post to ePub chapter format for (const post of posts) { // Parse the content HTML and preserve paragraph structure const { document } = parseHTML(`
${post.content}
`); // Clean up content while preserving structure const paragraphs = [...document.querySelectorAll('p')] .map(p => { // Remove any images but keep their alt text const images = p.querySelectorAll('img'); images.forEach(img => { const alt = img.getAttribute('alt') || ''; img.replaceWith(alt ? `[Imagen: ${alt}]` : ''); }); return p.outerHTML; }) .join(''); options.content.push({ title: post.title, author: post.author, data: `

${post.title}

Por ${post.author}

${paragraphs || '

No content available

'}

Publicación original

Volver al índice

` }); } // Generate the ePub console.log(`Generating ePub with ${posts.length} posts...`); await new Epub(options).promise; console.log('ePub generated successfully!'); } generateEpub().catch(console.error);