Removed stubs for remove_from_future_crawls since the visited set is updated soon as the URL was queued, Removed add_to_retry_queue(url) since retry with exponential backoff with help of tenacity is going to take care of it.

This commit is contained in:
Aravind Karnam
2024-10-17 15:42:43 +05:30
parent ce7fce4b16
commit 8105fd178e

View File

@@ -91,11 +91,6 @@ class BFSScraperStrategy(ScraperStrategy):
if not crawl_result.success:
# Logging and Monitoring
logging.error(f"Failed to crawl URL: {url}. Error: {crawl_result.error_message}")
# Error Categorization
if crawl_result.status_code == 404:
self.remove_from_future_crawls(url)
elif crawl_result.status_code == 503:
await self.add_to_retry_queue(url)
return crawl_result
# Process links