fix: preserve paragraph structure in epub generation

main
brobert (aider) 3 months ago
parent 037fec8b8a
commit 6ee61a7c67

@ -48,14 +48,21 @@ async function generateEpub() {
// Convert each post to ePub chapter format // Convert each post to ePub chapter format
for (const post of posts) { for (const post of posts) {
// Parse the content HTML and extract text nodes // Parse the content HTML and preserve paragraph structure
const { document } = parseHTML(`<div>${post.content}</div>`); const { document } = parseHTML(`<div>${post.content}</div>`);
// Get all text content from paragraphs and other text containers // Clean up content while preserving structure
const paragraphs = [...document.querySelectorAll('p, div, span')] const paragraphs = [...document.querySelectorAll('p')]
.map(el => el.textContent?.trim()) .map(p => {
.filter(Boolean) // Remove any images but keep their alt text
.join('\n\n'); const images = p.querySelectorAll('img');
images.forEach(img => {
const alt = img.getAttribute('alt') || '';
img.replaceWith(alt ? `[Imagen: ${alt}]` : '');
});
return p.outerHTML;
})
.join('');
options.content.push({ options.content.push({
title: post.title, title: post.title,
@ -63,7 +70,7 @@ async function generateEpub() {
data: ` data: `
<h1>${post.title}</h1> <h1>${post.title}</h1>
<p><em>Por ${post.author}</em></p> <p><em>Por ${post.author}</em></p>
${paragraphs || 'No content available'} ${paragraphs || '<p>No content available</p>'}
<p><a href="${post.url}">Publicación original</a></p> <p><a href="${post.url}">Publicación original</a></p>
` `
}); });

Loading…
Cancel
Save