fix: code formatting
This commit is contained in:
@@ -154,7 +154,9 @@ class BFSScraperStrategy(ScraperStrategy):
|
|||||||
# Fill batch with available jobs
|
# Fill batch with available jobs
|
||||||
while len(jobs) < SCRAPER_BATCH_SIZE and not queue.empty():
|
while len(jobs) < SCRAPER_BATCH_SIZE and not queue.empty():
|
||||||
score, depth, url = await queue.get()
|
score, depth, url = await queue.get()
|
||||||
if url not in active_crawls: # Only add if not currently processing
|
if (
|
||||||
|
url not in active_crawls
|
||||||
|
): # Only add if not currently processing
|
||||||
jobs.append((score, depth, url))
|
jobs.append((score, depth, url))
|
||||||
active_crawls.add(url)
|
active_crawls.add(url)
|
||||||
self.stats.current_depth = depth
|
self.stats.current_depth = depth
|
||||||
|
|||||||
@@ -129,9 +129,11 @@ async def advanced_scraper_example():
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Create crawler and scraper
|
# Create crawler and scraper
|
||||||
async with AsyncWebScraper(crawler_config=CrawlerRunConfig(bypass_cache=True),
|
async with AsyncWebScraper(
|
||||||
browser_config=browser_config,
|
crawler_config=CrawlerRunConfig(bypass_cache=True),
|
||||||
strategy=bfs_strategy) as scraper:
|
browser_config=browser_config,
|
||||||
|
strategy=bfs_strategy,
|
||||||
|
) as scraper:
|
||||||
|
|
||||||
# Track statistics
|
# Track statistics
|
||||||
stats = {"processed": 0, "errors": 0, "total_size": 0}
|
stats = {"processed": 0, "errors": 0, "total_size": 0}
|
||||||
|
|||||||
Reference in New Issue
Block a user