|
|
|
@ -43,9 +43,18 @@ async function fetchBlogPosts(): Promise<BlogPost[]> {
|
|
|
|
|
const html = await fetchWithRetry(BLOG_URL);
|
|
|
|
|
const { document } = parseHTML(html);
|
|
|
|
|
|
|
|
|
|
// Extract post URLs - this selector might need adjustment
|
|
|
|
|
// Extract and filter post URLs
|
|
|
|
|
const postLinks = [...document.querySelectorAll('div.posttitle a')] as HTMLAnchorElement[];
|
|
|
|
|
const postUrls = postLinks.map(link => link.href);
|
|
|
|
|
const postUrls = postLinks
|
|
|
|
|
.map(link => link.href)
|
|
|
|
|
.filter(url => {
|
|
|
|
|
// Only include URLs that look like actual posts
|
|
|
|
|
const isPost = /\/\d{4}\/\d{2}\/\d{2}\//.test(url);
|
|
|
|
|
if (!isPost) {
|
|
|
|
|
console.log(`Skipping non-post URL: ${url}`);
|
|
|
|
|
}
|
|
|
|
|
return isPost;
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
// Process posts with limited concurrency and delays
|
|
|
|
|
const processPost = async (url: string): Promise<BlogPost | null> => {
|
|
|
|
|