You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
62 lines
1.6 KiB
TypeScript
62 lines
1.6 KiB
TypeScript
import { readFile } from 'fs/promises';
|
|
import Epub from 'epub-gen';
|
|
import { parseHTML } from 'linkedom';
|
|
|
|
interface BlogPost {
|
|
title: string;
|
|
author: string;
|
|
content: string;
|
|
url: string;
|
|
}
|
|
|
|
async function generateEpub() {
|
|
// Read the scraped posts
|
|
const postsData = await readFile('posts.json', 'utf-8');
|
|
const posts: BlogPost[] = JSON.parse(postsData);
|
|
|
|
if (posts.length === 0) {
|
|
console.error('No posts found in posts.json');
|
|
return;
|
|
}
|
|
|
|
// Prepare ePub options
|
|
const options = {
|
|
title: 'Crónicas Periodísticas',
|
|
author: posts[0].author, // Using first post's author as main author
|
|
output: 'cronicas-periodisticas.epub',
|
|
content: [],
|
|
appendChapterTitles: true,
|
|
verbose: true
|
|
};
|
|
|
|
// Convert each post to ePub chapter format
|
|
for (const post of posts) {
|
|
// Parse the content HTML and extract text nodes
|
|
const { document } = parseHTML(`<div>${post.content}</div>`);
|
|
|
|
// Get all text content from paragraphs and other text containers
|
|
const paragraphs = [...document.querySelectorAll('p, div, span')]
|
|
.map(el => el.textContent?.trim())
|
|
.filter(Boolean)
|
|
.join('\n\n');
|
|
|
|
options.content.push({
|
|
title: post.title,
|
|
author: post.author,
|
|
data: `
|
|
<h1>${post.title}</h1>
|
|
<p><em>Por ${post.author}</em></p>
|
|
${paragraphs || 'No content available'}
|
|
<p><a href="${post.url}">Publicación original</a></p>
|
|
`
|
|
});
|
|
}
|
|
|
|
// Generate the ePub
|
|
console.log(`Generating ePub with ${posts.length} posts...`);
|
|
await new Epub(options).promise;
|
|
console.log('ePub generated successfully!');
|
|
}
|
|
|
|
generateEpub().catch(console.error);
|