From 6ee61a7c67f7c408888078f65340f1836367ca20 Mon Sep 17 00:00:00 2001 From: "brobert (aider)" Date: Tue, 1 Apr 2025 15:12:14 +0200 Subject: [PATCH] fix: preserve paragraph structure in epub generation --- generate-epub.ts | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/generate-epub.ts b/generate-epub.ts index 1390860..617953c 100644 --- a/generate-epub.ts +++ b/generate-epub.ts @@ -48,14 +48,21 @@ async function generateEpub() { // Convert each post to ePub chapter format for (const post of posts) { - // Parse the content HTML and extract text nodes + // Parse the content HTML and preserve paragraph structure const { document } = parseHTML(`
${post.content}
`); - // Get all text content from paragraphs and other text containers - const paragraphs = [...document.querySelectorAll('p, div, span')] - .map(el => el.textContent?.trim()) - .filter(Boolean) - .join('\n\n'); + // Clean up content while preserving structure + const paragraphs = [...document.querySelectorAll('p')] + .map(p => { + // Remove any images but keep their alt text + const images = p.querySelectorAll('img'); + images.forEach(img => { + const alt = img.getAttribute('alt') || ''; + img.replaceWith(alt ? `[Imagen: ${alt}]` : ''); + }); + return p.outerHTML; + }) + .join(''); options.content.push({ title: post.title, @@ -63,7 +70,7 @@ async function generateEpub() { data: `

${post.title}

Por ${post.author}

- ${paragraphs || 'No content available'} + ${paragraphs || '

No content available

'}

Publicación original

` });