refactor(crawling): improve type hints and code cleanup
- Added proper return type hints for DeepCrawlStrategy.arun method - Added __call__ method to DeepCrawlStrategy for easier usage - Removed redundant comments and imports - Cleaned up type hints in DFS strategy - Removed empty docker_client.py and .continuerules - Added .private/ to gitignore BREAKING CHANGE: DeepCrawlStrategy.arun now returns Union[CrawlResultT, List[CrawlResultT], AsyncGenerator[CrawlResultT, None]]
This commit is contained in:
@@ -2,13 +2,8 @@
|
||||
from typing import AsyncGenerator, Optional, Set, Dict, List, Tuple
|
||||
|
||||
from ..models import CrawlResult
|
||||
from .bfs_strategy import BFSDeepCrawlStrategy # Inherit common logic: can_process_url, link_discovery, etc.
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
if TYPE_CHECKING:
|
||||
from ..async_configs import CrawlerRunConfig
|
||||
from ..async_webcrawler import AsyncWebCrawler
|
||||
|
||||
from .bfs_strategy import BFSDeepCrawlStrategy # noqa
|
||||
from ..types import AsyncWebCrawler, CrawlerRunConfig
|
||||
|
||||
class DFSDeepCrawlStrategy(BFSDeepCrawlStrategy):
|
||||
"""
|
||||
@@ -20,8 +15,8 @@ class DFSDeepCrawlStrategy(BFSDeepCrawlStrategy):
|
||||
async def _arun_batch(
|
||||
self,
|
||||
start_url: str,
|
||||
crawler: "AsyncWebCrawler",
|
||||
config: "CrawlerRunConfig",
|
||||
crawler: AsyncWebCrawler,
|
||||
config: CrawlerRunConfig,
|
||||
) -> List[CrawlResult]:
|
||||
"""
|
||||
Batch (non-streaming) DFS mode.
|
||||
@@ -61,8 +56,8 @@ class DFSDeepCrawlStrategy(BFSDeepCrawlStrategy):
|
||||
async def _arun_stream(
|
||||
self,
|
||||
start_url: str,
|
||||
crawler: "AsyncWebCrawler",
|
||||
config: "CrawlerRunConfig",
|
||||
crawler: AsyncWebCrawler,
|
||||
config: CrawlerRunConfig,
|
||||
) -> AsyncGenerator[CrawlResult, None]:
|
||||
"""
|
||||
Streaming DFS mode.
|
||||
|
||||
Reference in New Issue
Block a user