You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
89 lines
2.6 KiB
TypeScript
89 lines
2.6 KiB
TypeScript
import { readFile } from 'fs/promises';
|
|
import Epub from 'epub-gen';
|
|
import { parseHTML } from 'linkedom';
|
|
|
|
interface BlogPost {
|
|
title: string;
|
|
author: string;
|
|
content: string;
|
|
url: string;
|
|
}
|
|
|
|
async function generateEpub() {
|
|
// Read the scraped posts
|
|
const postsData = await readFile('posts.json', 'utf-8');
|
|
const posts: BlogPost[] = JSON.parse(postsData);
|
|
|
|
if (posts.length === 0) {
|
|
console.error('No posts found in posts.json');
|
|
return;
|
|
}
|
|
|
|
// Prepare ePub options
|
|
const options = {
|
|
title: 'Crónicas Periodísticas',
|
|
author: posts[0].author, // Using first post's author as main author
|
|
output: 'cronicas-periodisticas.epub',
|
|
content: [],
|
|
appendChapterTitles: true,
|
|
verbose: true,
|
|
fetchImages: false, // Disable image downloading
|
|
customHtmlTocTemplate: ({ title, author, chapters }) => `
|
|
<!DOCTYPE html>
|
|
<html>
|
|
<head>
|
|
<title>${title}</title>
|
|
</head>
|
|
<body>
|
|
<h1>${title}</h1>
|
|
<p>Por ${author}</p>
|
|
<p><em>Nota: Las imágenes no se incluyeron en este eBook. Visite los enlaces originales para ver el contenido completo.</em></p>
|
|
<ul>
|
|
${chapters.map(chapter => `<li><a href="${chapter.url}">${chapter.title}</a></li>`).join('')}
|
|
</ul>
|
|
</body>
|
|
</html>
|
|
`
|
|
};
|
|
|
|
// Convert each post to ePub chapter format
|
|
for (const post of posts) {
|
|
// Parse the content HTML and preserve paragraph structure
|
|
const { document } = parseHTML(`<div>${post.content}</div>`);
|
|
|
|
// Clean up content while preserving structure
|
|
const paragraphs = [...document.querySelectorAll('p')]
|
|
.map(p => {
|
|
// Remove any images but keep their alt text
|
|
const images = p.querySelectorAll('img');
|
|
images.forEach(img => {
|
|
const alt = img.getAttribute('alt') || '';
|
|
img.replaceWith(alt ? `[Imagen: ${alt}]` : '');
|
|
});
|
|
return p.outerHTML;
|
|
})
|
|
.join('');
|
|
|
|
options.content.push({
|
|
title: post.title,
|
|
author: post.author,
|
|
data: `
|
|
<h1>${post.title}</h1>
|
|
<p><em>Por ${post.author}</em></p>
|
|
${paragraphs || '<p>No content available</p>'}
|
|
<div style="margin-top: 2em; border-top: 1px solid #eee; padding-top: 1em;">
|
|
<p><a href="${post.url}">Publicación original</a></p>
|
|
<p><a href="#toc">Volver al índice</a></p>
|
|
</div>
|
|
`
|
|
});
|
|
}
|
|
|
|
// Generate the ePub
|
|
console.log(`Generating ePub with ${posts.length} posts...`);
|
|
await new Epub(options).promise;
|
|
console.log('ePub generated successfully!');
|
|
}
|
|
|
|
generateEpub().catch(console.error);
|