Remove the can_process_url check from _process_links since it's already being checked in process_url

This commit is contained in:
Aravind Karnam
2024-11-26 11:11:49 +05:30
parent ee3001b1f7
commit a98d51a62c

View File

@@ -198,7 +198,7 @@ class BFSScraperStrategy(ScraperStrategy):
links_to_process += result.links["external"]
for link in links_to_process:
url = link['href']
if url not in visited and await self.can_process_url(url,depth):
if url not in visited:
new_depth = depths[source_url] + 1
if new_depth <= self.max_depth:
if self.url_scorer: