fix: extract text content properly in epub generation

main
brobert (aider) 3 months ago
parent 49a9198af9
commit fa1801c979

@ -31,10 +31,14 @@ async function generateEpub() {
// Convert each post to ePub chapter format
for (const post of posts) {
const { document } = parseHTML(post.content);
// Parse the content HTML and extract text nodes
const { document } = parseHTML(`<div>${post.content}</div>`);
// Clean up content - remove unwanted elements if needed
const content = document.body.innerHTML;
// Get all text content from paragraphs and other text containers
const paragraphs = [...document.querySelectorAll('p, div, span')]
.map(el => el.textContent?.trim())
.filter(Boolean)
.join('\n\n');
options.content.push({
title: post.title,
@ -42,7 +46,7 @@ async function generateEpub() {
data: `
<h1>${post.title}</h1>
<p><em>Por ${post.author}</em></p>
${content}
${paragraphs || 'No content available'}
<p><a href="${post.url}">Publicación original</a></p>
`
});

Loading…
Cancel
Save