|
|
|
@ -47,8 +47,6 @@ async function fetchBlogPosts(): Promise<BlogPost[]> {
|
|
|
|
|
const postLinks = [...document.querySelectorAll('div.posttitle a')] as HTMLAnchorElement[];
|
|
|
|
|
const postUrls = postLinks.map(link => link.href);
|
|
|
|
|
|
|
|
|
|
const posts: BlogPost[] = [];
|
|
|
|
|
|
|
|
|
|
// Process posts with limited concurrency and delays
|
|
|
|
|
const processPost = async (url: string): Promise<BlogPost | null> => {
|
|
|
|
|
await setTimeout(SCRAPE_CONFIG.delayBetweenRequests);
|
|
|
|
@ -74,7 +72,11 @@ async function fetchBlogPosts(): Promise<BlogPost[]> {
|
|
|
|
|
url
|
|
|
|
|
};
|
|
|
|
|
} catch (error) {
|
|
|
|
|
console.error(`Failed to process ${url}:`, error);
|
|
|
|
|
if (error instanceof Error) {
|
|
|
|
|
console.error(`Failed to process ${url}: ${error.message}`);
|
|
|
|
|
} else {
|
|
|
|
|
console.error(`Failed to process ${url}:`, error);
|
|
|
|
|
}
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
@ -84,9 +86,11 @@ async function fetchBlogPosts(): Promise<BlogPost[]> {
|
|
|
|
|
postUrls.slice(0, 10).map(processPost) // Limit to 10 posts for initial testing
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
const posts = results
|
|
|
|
|
.filter(result => result.status === 'fulfilled' && result.value !== null)
|
|
|
|
|
.map(result => (result as PromiseFulfilledResult<BlogPost>).value);
|
|
|
|
|
const posts: BlogPost[] = results
|
|
|
|
|
.filter((result): result is PromiseFulfilledResult<BlogPost> =>
|
|
|
|
|
result.status === 'fulfilled' && result.value !== null
|
|
|
|
|
)
|
|
|
|
|
.map(result => result.value);
|
|
|
|
|
|
|
|
|
|
console.log(`Successfully processed ${posts.length}/${postUrls.length} posts`);
|
|
|
|
|
return posts;
|
|
|
|
|