Remove robots.txt compliance from scraper

2025-01-27 11:58:54 +05:30
parent 513d008de5
commit bb6450f458
1 changed files with 1 additions and 1 deletions
--- a/crawl4ai/scraper/bfs_scraper_strategy.py
+++ b/crawl4ai/scraper/bfs_scraper_strategy.py
@@ -50,7 +50,7 @@ class BFSScraperStrategy(ScraperStrategy):
        self.process_external_links = process_external_links

    async def can_process_url(self, url: str, depth: int) -> bool:
-        """Check if URL can be processed based on robots.txt and filters
+        """Check if URL can be processed based on filters
        This is our gatekeeper method that determines if a URL should be processed. It:
            - Validates URL format using a robust built-in method
            - Applies custom filters from the filter chain