feat: Improve blog URL discovery with better logging

main
brobert (aider) 3 months ago
parent 976245d84a
commit bae808473c

@ -77,7 +77,16 @@ async function discoverAllPostUrls(baseUrl: string, maxDepth = 3): Promise<strin
postUrls.add(url); postUrls.add(url);
console.log(`✅ Found post URL: ${url}`); console.log(`✅ Found post URL: ${url}`);
} else { } else {
console.log(`🔍 Found internal link: ${url}`); // Log what type of internal link we found
if (path.startsWith('/tag/')) {
console.log(`🏷️ Found tag page: ${url}`);
} else if (path.startsWith('/category/')) {
console.log(`🗂️ Found category page: ${url}`);
} else if (path.startsWith('/page/')) {
console.log(`📄 Found pagination page: ${url}`);
} else {
console.log(`🔍 Found internal link: ${url}`);
}
} }
// Find and filter links on page // Find and filter links on page
@ -102,9 +111,11 @@ async function discoverAllPostUrls(baseUrl: string, maxDepth = 3): Promise<strin
continue; continue;
} }
// Only add pagination links and post URLs to queue // Add pagination, tag, category and post URLs to queue
const path = urlObj.pathname; const path = urlObj.pathname;
if (path.startsWith('/page/') || // Pagination if (path.startsWith('/page/') || // Pagination
path.startsWith('/tag/') || // Tag pages
path.startsWith('/category/') || // Category pages
/^\/\d{4}\/\d{2}\/\d{2}\/[^/]+\/$/.test(path)) { // Post URLs /^\/\d{4}\/\d{2}\/\d{2}\/[^/]+\/$/.test(path)) { // Post URLs
queue.push({url: normalizedUrl, depth: depth + 1}); queue.push({url: normalizedUrl, depth: depth + 1});
} }

Loading…
Cancel
Save