Refactor: remove the old deep_crawl method
This commit is contained in:
@@ -65,6 +65,7 @@ DeepCrawlManyReturn = Union[
|
|||||||
AsyncGenerator[CrawlResultT, None],
|
AsyncGenerator[CrawlResultT, None],
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
class AsyncWebCrawler:
|
class AsyncWebCrawler:
|
||||||
"""
|
"""
|
||||||
Asynchronous web crawler with flexible caching capabilities.
|
Asynchronous web crawler with flexible caching capabilities.
|
||||||
@@ -850,40 +851,6 @@ class AsyncWebCrawler:
|
|||||||
_results = await dispatcher.run_urls(crawler=self, urls=urls, config=config)
|
_results = await dispatcher.run_urls(crawler=self, urls=urls, config=config)
|
||||||
return [transform_result(res) for res in _results]
|
return [transform_result(res) for res in _results]
|
||||||
|
|
||||||
async def adeep_crawl(
|
|
||||||
self,
|
|
||||||
url: str,
|
|
||||||
strategy: DeepCrawlStrategy,
|
|
||||||
crawler_run_config: Optional[CrawlerRunConfig] = None,
|
|
||||||
stream: Optional[bool] = False,
|
|
||||||
) -> Union[AsyncGenerator[CrawlResult,None],List[CrawlResult]]:
|
|
||||||
"""
|
|
||||||
Traverse child URLs starting from the given URL, based on Traversal strategy
|
|
||||||
|
|
||||||
Args:
|
|
||||||
url: Starting URL for scraping
|
|
||||||
strategy: Traversal strategy to use
|
|
||||||
crawler_config: Configuration object controlling crawl behavior
|
|
||||||
stream (bool, optional): Whether to stream the results. Defaults to False.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of CrawlResults
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
result_generator = strategy.deep_crawl(
|
|
||||||
url, crawler=self, crawler_run_config=crawler_run_config
|
|
||||||
)
|
|
||||||
if stream:
|
|
||||||
return result_generator
|
|
||||||
else:
|
|
||||||
results = []
|
|
||||||
async for result in result_generator:
|
|
||||||
results.append(result)
|
|
||||||
return results
|
|
||||||
except Exception as e:
|
|
||||||
self.logger.error(f"Error in streaming Deep Crawl: {str(e)}")
|
|
||||||
raise
|
|
||||||
|
|
||||||
async def aclear_cache(self):
|
async def aclear_cache(self):
|
||||||
"""Clear the cache database."""
|
"""Clear the cache database."""
|
||||||
await async_db_manager.cleanup()
|
await async_db_manager.cleanup()
|
||||||
|
|||||||
Reference in New Issue
Block a user