fix: removed child_urls from CrawlResult
This commit is contained in:
@@ -135,9 +135,7 @@ class CrawlResult(BaseModel):
|
||||
# Attributes for position
|
||||
depth: Optional[int] = None
|
||||
score: Optional[float] = -inf
|
||||
# For referencing children and parents from a flattened list of CrawlResult elements
|
||||
parent_url: Optional[str] = None
|
||||
child_urls: Optional[List[str]] = None
|
||||
|
||||
class Config:
|
||||
arbitrary_types_allowed = True
|
||||
|
||||
@@ -84,7 +84,6 @@ class BFSTraversalStrategy(TraversalStrategy):
|
||||
links_to_process = result.links["internal"]
|
||||
if self.process_external_links:
|
||||
links_to_process += result.links["external"]
|
||||
child_urls = []
|
||||
for link in links_to_process:
|
||||
url = link["href"]
|
||||
if url in visited:
|
||||
@@ -93,13 +92,11 @@ class BFSTraversalStrategy(TraversalStrategy):
|
||||
self.stats.urls_skipped += 1
|
||||
continue
|
||||
score = self.url_scorer.score(url) if self.url_scorer else 0
|
||||
child_urls.append(url)
|
||||
await queue.put((score, next_depth, url, source_url))
|
||||
depths[url] = next_depth
|
||||
self.stats.total_depth_reached = max(
|
||||
self.stats.total_depth_reached, next_depth
|
||||
)
|
||||
return child_urls
|
||||
|
||||
async def deep_crawl(
|
||||
self,
|
||||
@@ -167,13 +164,12 @@ class BFSTraversalStrategy(TraversalStrategy):
|
||||
crawl_info = active_crawls.pop(result.url, None)
|
||||
|
||||
if crawl_info and result.success:
|
||||
child_urls = await self._process_links(
|
||||
await self._process_links(
|
||||
result, result.url, queue, visited, depths
|
||||
)
|
||||
result.depth = crawl_info["depth"]
|
||||
result.score = crawl_info["score"]
|
||||
result.parent_url = crawl_info["parent_url"]
|
||||
result.child_urls = child_urls
|
||||
yield result
|
||||
else:
|
||||
self.logger.warning(
|
||||
|
||||
Reference in New Issue
Block a user