From fa1801c979290007181cd2efbe01d88e3ad049c9 Mon Sep 17 00:00:00 2001 From: "brobert (aider)" Date: Tue, 1 Apr 2025 15:03:48 +0200 Subject: [PATCH] fix: extract text content properly in epub generation --- generate-epub.ts | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/generate-epub.ts b/generate-epub.ts index ffa92ba..229c33a 100644 --- a/generate-epub.ts +++ b/generate-epub.ts @@ -31,10 +31,14 @@ async function generateEpub() { // Convert each post to ePub chapter format for (const post of posts) { - const { document } = parseHTML(post.content); + // Parse the content HTML and extract text nodes + const { document } = parseHTML(`
${post.content}
`); - // Clean up content - remove unwanted elements if needed - const content = document.body.innerHTML; + // Get all text content from paragraphs and other text containers + const paragraphs = [...document.querySelectorAll('p, div, span')] + .map(el => el.textContent?.trim()) + .filter(Boolean) + .join('\n\n'); options.content.push({ title: post.title, @@ -42,7 +46,7 @@ async function generateEpub() { data: `

${post.title}

Por ${post.author}

- ${content} + ${paragraphs || 'No content available'}

Publicación original

` });