feat: Improve blog URL discovery with better logging

main
brobert (aider) 3 months ago
parent 976245d84a
commit bae808473c

@ -76,9 +76,18 @@ async function discoverAllPostUrls(baseUrl: string, maxDepth = 3): Promise<strin
if (isPostUrl) {
postUrls.add(url);
console.log(`✅ Found post URL: ${url}`);
} else {
// Log what type of internal link we found
if (path.startsWith('/tag/')) {
console.log(`🏷️ Found tag page: ${url}`);
} else if (path.startsWith('/category/')) {
console.log(`🗂️ Found category page: ${url}`);
} else if (path.startsWith('/page/')) {
console.log(`📄 Found pagination page: ${url}`);
} else {
console.log(`🔍 Found internal link: ${url}`);
}
}
// Find and filter links on page
const links = [...document.querySelectorAll('a[href]')] as HTMLAnchorElement[];
@ -102,9 +111,11 @@ async function discoverAllPostUrls(baseUrl: string, maxDepth = 3): Promise<strin
continue;
}
// Only add pagination links and post URLs to queue
// Add pagination, tag, category and post URLs to queue
const path = urlObj.pathname;
if (path.startsWith('/page/') || // Pagination
path.startsWith('/tag/') || // Tag pages
path.startsWith('/category/') || // Category pages
/^\/\d{4}\/\d{2}\/\d{2}\/[^/]+\/$/.test(path)) { // Post URLs
queue.push({url: normalizedUrl, depth: depth + 1});
}

Loading…
Cancel
Save