refactor: improve type safety and error handling in scraper

main
brobert (aider) 3 months ago
parent a872a18077
commit 4889032011

@ -47,8 +47,6 @@ async function fetchBlogPosts(): Promise<BlogPost[]> {
const postLinks = [...document.querySelectorAll('div.posttitle a')] as HTMLAnchorElement[]; const postLinks = [...document.querySelectorAll('div.posttitle a')] as HTMLAnchorElement[];
const postUrls = postLinks.map(link => link.href); const postUrls = postLinks.map(link => link.href);
const posts: BlogPost[] = [];
// Process posts with limited concurrency and delays // Process posts with limited concurrency and delays
const processPost = async (url: string): Promise<BlogPost | null> => { const processPost = async (url: string): Promise<BlogPost | null> => {
await setTimeout(SCRAPE_CONFIG.delayBetweenRequests); await setTimeout(SCRAPE_CONFIG.delayBetweenRequests);
@ -74,7 +72,11 @@ async function fetchBlogPosts(): Promise<BlogPost[]> {
url url
}; };
} catch (error) { } catch (error) {
console.error(`Failed to process ${url}:`, error); if (error instanceof Error) {
console.error(`Failed to process ${url}: ${error.message}`);
} else {
console.error(`Failed to process ${url}:`, error);
}
return null; return null;
} }
}; };
@ -84,9 +86,11 @@ async function fetchBlogPosts(): Promise<BlogPost[]> {
postUrls.slice(0, 10).map(processPost) // Limit to 10 posts for initial testing postUrls.slice(0, 10).map(processPost) // Limit to 10 posts for initial testing
); );
const posts = results const posts: BlogPost[] = results
.filter(result => result.status === 'fulfilled' && result.value !== null) .filter((result): result is PromiseFulfilledResult<BlogPost> =>
.map(result => (result as PromiseFulfilledResult<BlogPost>).value); result.status === 'fulfilled' && result.value !== null
)
.map(result => result.value);
console.log(`Successfully processed ${posts.length}/${postUrls.length} posts`); console.log(`Successfully processed ${posts.length}/${postUrls.length} posts`);
return posts; return posts;

Loading…
Cancel
Save