Remove the can_process_url check from _process_links since it's already being checked in process_url
This commit is contained in:
@@ -198,7 +198,7 @@ class BFSScraperStrategy(ScraperStrategy):
|
|||||||
links_to_process += result.links["external"]
|
links_to_process += result.links["external"]
|
||||||
for link in links_to_process:
|
for link in links_to_process:
|
||||||
url = link['href']
|
url = link['href']
|
||||||
if url not in visited and await self.can_process_url(url,depth):
|
if url not in visited:
|
||||||
new_depth = depths[source_url] + 1
|
new_depth = depths[source_url] + 1
|
||||||
if new_depth <= self.max_depth:
|
if new_depth <= self.max_depth:
|
||||||
if self.url_scorer:
|
if self.url_scorer:
|
||||||
|
|||||||
Reference in New Issue
Block a user