fix: extract text content properly in epub generation

3 months ago · fa1801c979
parent 49a9198af9
commit fa1801c979
1 changed files with 8 additions and 4 deletions
--- a/generate-epub.ts
+++ b/generate-epub.ts
@ -31,10 +31,14 @@ async function generateEpub() {

  // Convert each post to ePub chapter format
  for (const post of posts) {
-    const { document } = parseHTML(post.content);
+    // Parse the content HTML and extract text nodes
+    const { document } = parseHTML(`<div>${post.content}</div>`);
    
-    // Clean up content - remove unwanted elements if needed
-    const content = document.body.innerHTML;
+    // Get all text content from paragraphs and other text containers
+    const paragraphs = [...document.querySelectorAll('p, div, span')]
+      .map(el => el.textContent?.trim())
+      .filter(Boolean)
+      .join('\n\n');

    options.content.push({
      title: post.title,
@ -42,7 +46,7 @@ async function generateEpub() {
      data: `
        <h1>${post.title}</h1>
        <p><em>Por ${post.author}</em></p>
-        ${content}
+        ${paragraphs || 'No content available'}
        <p><a href="${post.url}">Publicación original</a></p>
      `
    });