You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

89 lines
2.6 KiB
TypeScript

import { readFile } from 'fs/promises';
import Epub from 'epub-gen';
import { parseHTML } from 'linkedom';
interface BlogPost {
title: string;
author: string;
content: string;
url: string;
}
async function generateEpub() {
// Read the scraped posts
const postsData = await readFile('posts.json', 'utf-8');
const posts: BlogPost[] = JSON.parse(postsData);
if (posts.length === 0) {
console.error('No posts found in posts.json');
return;
}
// Prepare ePub options
const options = {
title: 'Crónicas Periodísticas',
author: posts[0].author, // Using first post's author as main author
output: 'cronicas-periodisticas.epub',
content: [],
appendChapterTitles: true,
verbose: true,
fetchImages: false, // Disable image downloading
customHtmlTocTemplate: ({ title, author, chapters }) => `
<!DOCTYPE html>
<html>
<head>
<title>${title}</title>
</head>
<body>
<h1>${title}</h1>
<p>Por ${author}</p>
<p><em>Nota: Las imágenes no se incluyeron en este eBook. Visite los enlaces originales para ver el contenido completo.</em></p>
<ul>
${chapters.map(chapter => `<li><a href="${chapter.url}">${chapter.title}</a></li>`).join('')}
</ul>
</body>
</html>
`
};
// Convert each post to ePub chapter format
for (const post of posts) {
// Parse the content HTML and preserve paragraph structure
const { document } = parseHTML(`<div>${post.content}</div>`);
// Clean up content while preserving structure
const paragraphs = [...document.querySelectorAll('p')]
.map(p => {
// Remove any images but keep their alt text
const images = p.querySelectorAll('img');
images.forEach(img => {
const alt = img.getAttribute('alt') || '';
img.replaceWith(alt ? `[Imagen: ${alt}]` : '');
});
return p.outerHTML;
})
.join('');
options.content.push({
title: post.title,
author: post.author,
data: `
<h1>${post.title}</h1>
<p><em>Por ${post.author}</em></p>
${paragraphs || '<p>No content available</p>'}
<div style="margin-top: 2em; border-top: 1px solid #eee; padding-top: 1em;">
<p><a href="${post.url}">Publicación original</a></p>
<p><a href="toc.html">Volver al índice</a></p>
</div>
`
});
}
// Generate the ePub
console.log(`Generating ePub with ${posts.length} posts...`);
await new Epub(options).promise;
console.log('ePub generated successfully!');
}
generateEpub().catch(console.error);