fix: make post URL regex match with or without trailing slash

main
brobert (aider) 3 months ago
parent bae808473c
commit 5afdda6115

@ -69,9 +69,9 @@ async function discoverAllPostUrls(baseUrl: string, maxDepth = 3): Promise<strin
continue; continue;
} }
// Strict check for post URL pattern // Check for post URL pattern (with or without trailing slash)
const path = new URL(url).pathname; const path = new URL(url).pathname;
const isPostUrl = /^\/\d{4}\/\d{2}\/\d{2}\/[^/]+\/$/.test(path); const isPostUrl = /^\/\d{4}\/\d{2}\/\d{2}\/[^/]+(\/)?$/.test(path);
if (isPostUrl) { if (isPostUrl) {
postUrls.add(url); postUrls.add(url);
@ -116,7 +116,7 @@ async function discoverAllPostUrls(baseUrl: string, maxDepth = 3): Promise<strin
if (path.startsWith('/page/') || // Pagination if (path.startsWith('/page/') || // Pagination
path.startsWith('/tag/') || // Tag pages path.startsWith('/tag/') || // Tag pages
path.startsWith('/category/') || // Category pages path.startsWith('/category/') || // Category pages
/^\/\d{4}\/\d{2}\/\d{2}\/[^/]+\/$/.test(path)) { // Post URLs /^\/\d{4}\/\d{2}\/\d{2}\/[^/]+(\/)?$/.test(path)) { // Post URLs
queue.push({url: normalizedUrl, depth: depth + 1}); queue.push({url: normalizedUrl, depth: depth + 1});
} }
} catch (error) { } catch (error) {

Loading…
Cancel
Save