feat(crawler): add deep crawling capabilities with BFS strategy
Implements deep crawling functionality with a new BreadthFirstSearch strategy: - Add DeepCrawlStrategy base class and BFS implementation - Integrate deep crawling with AsyncWebCrawler via decorator pattern - Update CrawlerRunConfig to support deep crawling parameters - Add pagination support for Google Search crawler BREAKING CHANGE: AsyncWebCrawler.arun and arun_many return types now include deep crawl results
This commit is contained in:
@@ -16,7 +16,7 @@ from .extraction_strategy import (
|
||||
JsonXPathExtractionStrategy
|
||||
)
|
||||
|
||||
|
||||
from .async_deep_crawl import DeepCrawlStrategy, BreadthFirstSearchStrategy
|
||||
from .chunking_strategy import ChunkingStrategy, RegexChunking
|
||||
from .markdown_generation_strategy import DefaultMarkdownGenerator
|
||||
from .content_filter_strategy import PruningContentFilter, BM25ContentFilter, LLMContentFilter, RelevantContentFilter
|
||||
@@ -33,6 +33,8 @@ from .docker_client import Crawl4aiDockerClient
|
||||
from .hub import CrawlerHub
|
||||
|
||||
__all__ = [
|
||||
"DeepCrawlStrategy",
|
||||
"BreadthFirstSearchStrategy",
|
||||
"AsyncWebCrawler",
|
||||
"CrawlResult",
|
||||
"CrawlerHub",
|
||||
|
||||
Reference in New Issue
Block a user