|
|
@ -77,7 +77,16 @@ async function discoverAllPostUrls(baseUrl: string, maxDepth = 3): Promise<strin
|
|
|
|
postUrls.add(url);
|
|
|
|
postUrls.add(url);
|
|
|
|
console.log(`✅ Found post URL: ${url}`);
|
|
|
|
console.log(`✅ Found post URL: ${url}`);
|
|
|
|
} else {
|
|
|
|
} else {
|
|
|
|
console.log(`🔍 Found internal link: ${url}`);
|
|
|
|
// Log what type of internal link we found
|
|
|
|
|
|
|
|
if (path.startsWith('/tag/')) {
|
|
|
|
|
|
|
|
console.log(`🏷️ Found tag page: ${url}`);
|
|
|
|
|
|
|
|
} else if (path.startsWith('/category/')) {
|
|
|
|
|
|
|
|
console.log(`🗂️ Found category page: ${url}`);
|
|
|
|
|
|
|
|
} else if (path.startsWith('/page/')) {
|
|
|
|
|
|
|
|
console.log(`📄 Found pagination page: ${url}`);
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
console.log(`🔍 Found internal link: ${url}`);
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Find and filter links on page
|
|
|
|
// Find and filter links on page
|
|
|
@ -102,9 +111,11 @@ async function discoverAllPostUrls(baseUrl: string, maxDepth = 3): Promise<strin
|
|
|
|
continue;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Only add pagination links and post URLs to queue
|
|
|
|
// Add pagination, tag, category and post URLs to queue
|
|
|
|
const path = urlObj.pathname;
|
|
|
|
const path = urlObj.pathname;
|
|
|
|
if (path.startsWith('/page/') || // Pagination
|
|
|
|
if (path.startsWith('/page/') || // Pagination
|
|
|
|
|
|
|
|
path.startsWith('/tag/') || // Tag pages
|
|
|
|
|
|
|
|
path.startsWith('/category/') || // Category pages
|
|
|
|
/^\/\d{4}\/\d{2}\/\d{2}\/[^/]+\/$/.test(path)) { // Post URLs
|
|
|
|
/^\/\d{4}\/\d{2}\/\d{2}\/[^/]+\/$/.test(path)) { // Post URLs
|
|
|
|
queue.push({url: normalizedUrl, depth: depth + 1});
|
|
|
|
queue.push({url: normalizedUrl, depth: depth + 1});
|
|
|
|
}
|
|
|
|
}
|
|
|
|