refactor(deep-crawling): reorganize deep crawling strategies and add new implementations
Split deep crawling code into separate strategy files for better organization and maintainability. Added new BFF (Best First) and DFS crawling strategies. Introduced base strategy class and common types. BREAKING CHANGE: Deep crawling implementation has been split into multiple files. Import paths for deep crawling strategies have changed.
This commit is contained in:
@@ -2,8 +2,10 @@ import asyncio
|
||||
import time
|
||||
|
||||
|
||||
from crawl4ai import CrawlerRunConfig, AsyncWebCrawler, CacheMode
|
||||
from crawl4ai.deep_crawling.bfs_strategy import BFSDeepCrawlStrategy
|
||||
from crawl4ai import CrawlerRunConfig, AsyncWebCrawler, CacheMode
|
||||
from crawl4ai.content_scraping_strategy import LXMLWebScrapingStrategy
|
||||
from crawl4ai.deep_crawling import BFSDeepCrawlStrategy
|
||||
# from crawl4ai.deep_crawling import BFSDeepCrawlStrategy, BestFirstCrawlingStrategy
|
||||
|
||||
|
||||
async def main():
|
||||
@@ -15,7 +17,8 @@ async def main():
|
||||
),
|
||||
stream=False,
|
||||
verbose=True,
|
||||
cache_mode=CacheMode.BYPASS
|
||||
cache_mode=CacheMode.BYPASS,
|
||||
scraping_strategy=LXMLWebScrapingStrategy()
|
||||
)
|
||||
|
||||
async with AsyncWebCrawler() as crawler:
|
||||
|
||||
Reference in New Issue
Block a user