From 04d8b47b927a5b3ba73e156e99292b76631c9c34 Mon Sep 17 00:00:00 2001 From: Aravind Karnam Date: Wed, 16 Oct 2024 22:34:54 +0530 Subject: [PATCH] Exposed min_crawl_delay for BFSScraperStrategy --- crawl4ai/scraper/bfs_scraper_strategy.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crawl4ai/scraper/bfs_scraper_strategy.py b/crawl4ai/scraper/bfs_scraper_strategy.py index 1146714d..6fc39e73 100644 --- a/crawl4ai/scraper/bfs_scraper_strategy.py +++ b/crawl4ai/scraper/bfs_scraper_strategy.py @@ -18,14 +18,14 @@ logging.basicConfig(level=logging.DEBUG) rate_limiter = AsyncLimiter(1, 1) # 1 request per second class BFSScraperStrategy(ScraperStrategy): - def __init__(self, max_depth: int, filter_chain: FilterChain, url_scorer: URLScorer, max_concurrent: int = 5): + def __init__(self, max_depth: int, filter_chain: FilterChain, url_scorer: URLScorer, max_concurrent: int = 5, min_crawl_delay: int=1): self.max_depth = max_depth self.filter_chain = filter_chain self.url_scorer = url_scorer self.max_concurrent = max_concurrent # For Crawl Politeness self.last_crawl_time = defaultdict(float) - self.min_crawl_delay = 1 # 1 second delay between requests to the same domain + self.min_crawl_delay = min_crawl_delay # 1 second delay between requests to the same domain # For Robots.txt Compliance self.robot_parsers = {}