From 8105fd178e1b7b00a4628e2227953fbe418af5c4 Mon Sep 17 00:00:00 2001 From: Aravind Karnam Date: Thu, 17 Oct 2024 15:42:43 +0530 Subject: [PATCH] Removed stubs for remove_from_future_crawls since the visited set is updated soon as the URL was queued, Removed add_to_retry_queue(url) since retry with exponential backoff with help of tenacity is going to take care of it. --- crawl4ai/scraper/bfs_scraper_strategy.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/crawl4ai/scraper/bfs_scraper_strategy.py b/crawl4ai/scraper/bfs_scraper_strategy.py index b6cdaa80..ce4d0127 100644 --- a/crawl4ai/scraper/bfs_scraper_strategy.py +++ b/crawl4ai/scraper/bfs_scraper_strategy.py @@ -91,11 +91,6 @@ class BFSScraperStrategy(ScraperStrategy): if not crawl_result.success: # Logging and Monitoring logging.error(f"Failed to crawl URL: {url}. Error: {crawl_result.error_message}") - # Error Categorization - if crawl_result.status_code == 404: - self.remove_from_future_crawls(url) - elif crawl_result.status_code == 503: - await self.add_to_retry_queue(url) return crawl_result # Process links