fix: preserve paragraph structure in epub generation

main
brobert (aider) 3 months ago
parent 037fec8b8a
commit 6ee61a7c67

@ -48,14 +48,21 @@ async function generateEpub() {
// Convert each post to ePub chapter format
for (const post of posts) {
// Parse the content HTML and extract text nodes
// Parse the content HTML and preserve paragraph structure
const { document } = parseHTML(`<div>${post.content}</div>`);
// Get all text content from paragraphs and other text containers
const paragraphs = [...document.querySelectorAll('p, div, span')]
.map(el => el.textContent?.trim())
.filter(Boolean)
.join('\n\n');
// Clean up content while preserving structure
const paragraphs = [...document.querySelectorAll('p')]
.map(p => {
// Remove any images but keep their alt text
const images = p.querySelectorAll('img');
images.forEach(img => {
const alt = img.getAttribute('alt') || '';
img.replaceWith(alt ? `[Imagen: ${alt}]` : '');
});
return p.outerHTML;
})
.join('');
options.content.push({
title: post.title,
@ -63,7 +70,7 @@ async function generateEpub() {
data: `
<h1>${post.title}</h1>
<p><em>Por ${post.author}</em></p>
${paragraphs || 'No content available'}
${paragraphs || '<p>No content available</p>'}
<p><a href="${post.url}">Publicación original</a></p>
`
});

Loading…
Cancel
Save