fix: removed child_urls from CrawlResult
This commit is contained in:
@@ -135,9 +135,7 @@ class CrawlResult(BaseModel):
|
|||||||
# Attributes for position
|
# Attributes for position
|
||||||
depth: Optional[int] = None
|
depth: Optional[int] = None
|
||||||
score: Optional[float] = -inf
|
score: Optional[float] = -inf
|
||||||
# For referencing children and parents from a flattened list of CrawlResult elements
|
|
||||||
parent_url: Optional[str] = None
|
parent_url: Optional[str] = None
|
||||||
child_urls: Optional[List[str]] = None
|
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
arbitrary_types_allowed = True
|
arbitrary_types_allowed = True
|
||||||
|
|||||||
@@ -84,7 +84,6 @@ class BFSTraversalStrategy(TraversalStrategy):
|
|||||||
links_to_process = result.links["internal"]
|
links_to_process = result.links["internal"]
|
||||||
if self.process_external_links:
|
if self.process_external_links:
|
||||||
links_to_process += result.links["external"]
|
links_to_process += result.links["external"]
|
||||||
child_urls = []
|
|
||||||
for link in links_to_process:
|
for link in links_to_process:
|
||||||
url = link["href"]
|
url = link["href"]
|
||||||
if url in visited:
|
if url in visited:
|
||||||
@@ -93,13 +92,11 @@ class BFSTraversalStrategy(TraversalStrategy):
|
|||||||
self.stats.urls_skipped += 1
|
self.stats.urls_skipped += 1
|
||||||
continue
|
continue
|
||||||
score = self.url_scorer.score(url) if self.url_scorer else 0
|
score = self.url_scorer.score(url) if self.url_scorer else 0
|
||||||
child_urls.append(url)
|
|
||||||
await queue.put((score, next_depth, url, source_url))
|
await queue.put((score, next_depth, url, source_url))
|
||||||
depths[url] = next_depth
|
depths[url] = next_depth
|
||||||
self.stats.total_depth_reached = max(
|
self.stats.total_depth_reached = max(
|
||||||
self.stats.total_depth_reached, next_depth
|
self.stats.total_depth_reached, next_depth
|
||||||
)
|
)
|
||||||
return child_urls
|
|
||||||
|
|
||||||
async def deep_crawl(
|
async def deep_crawl(
|
||||||
self,
|
self,
|
||||||
@@ -167,13 +164,12 @@ class BFSTraversalStrategy(TraversalStrategy):
|
|||||||
crawl_info = active_crawls.pop(result.url, None)
|
crawl_info = active_crawls.pop(result.url, None)
|
||||||
|
|
||||||
if crawl_info and result.success:
|
if crawl_info and result.success:
|
||||||
child_urls = await self._process_links(
|
await self._process_links(
|
||||||
result, result.url, queue, visited, depths
|
result, result.url, queue, visited, depths
|
||||||
)
|
)
|
||||||
result.depth = crawl_info["depth"]
|
result.depth = crawl_info["depth"]
|
||||||
result.score = crawl_info["score"]
|
result.score = crawl_info["score"]
|
||||||
result.parent_url = crawl_info["parent_url"]
|
result.parent_url = crawl_info["parent_url"]
|
||||||
result.child_urls = child_urls
|
|
||||||
yield result
|
yield result
|
||||||
else:
|
else:
|
||||||
self.logger.warning(
|
self.logger.warning(
|
||||||
|
|||||||
Reference in New Issue
Block a user