From 858c18df393b64475ada3fbf578cb9d5cd56bcec Mon Sep 17 00:00:00 2001 From: Aravind Karnam Date: Wed, 29 Jan 2025 18:08:34 +0530 Subject: [PATCH] fix: removed child_urls from CrawlResult --- crawl4ai/models.py | 2 -- crawl4ai/traversal/bfs_traversal_strategy.py | 6 +----- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/crawl4ai/models.py b/crawl4ai/models.py index 8bb1402c..1da85582 100644 --- a/crawl4ai/models.py +++ b/crawl4ai/models.py @@ -135,9 +135,7 @@ class CrawlResult(BaseModel): # Attributes for position depth: Optional[int] = None score: Optional[float] = -inf - # For referencing children and parents from a flattened list of CrawlResult elements parent_url: Optional[str] = None - child_urls: Optional[List[str]] = None class Config: arbitrary_types_allowed = True diff --git a/crawl4ai/traversal/bfs_traversal_strategy.py b/crawl4ai/traversal/bfs_traversal_strategy.py index a809aa4e..f613033e 100644 --- a/crawl4ai/traversal/bfs_traversal_strategy.py +++ b/crawl4ai/traversal/bfs_traversal_strategy.py @@ -84,7 +84,6 @@ class BFSTraversalStrategy(TraversalStrategy): links_to_process = result.links["internal"] if self.process_external_links: links_to_process += result.links["external"] - child_urls = [] for link in links_to_process: url = link["href"] if url in visited: @@ -93,13 +92,11 @@ class BFSTraversalStrategy(TraversalStrategy): self.stats.urls_skipped += 1 continue score = self.url_scorer.score(url) if self.url_scorer else 0 - child_urls.append(url) await queue.put((score, next_depth, url, source_url)) depths[url] = next_depth self.stats.total_depth_reached = max( self.stats.total_depth_reached, next_depth ) - return child_urls async def deep_crawl( self, @@ -167,13 +164,12 @@ class BFSTraversalStrategy(TraversalStrategy): crawl_info = active_crawls.pop(result.url, None) if crawl_info and result.success: - child_urls = await self._process_links( + await self._process_links( result, result.url, queue, visited, depths ) result.depth = crawl_info["depth"] result.score = crawl_info["score"] result.parent_url = crawl_info["parent_url"] - result.child_urls = child_urls yield result else: self.logger.warning(