refactor(crawling): improve type hints and code cleanup

- Added proper return type hints for DeepCrawlStrategy.arun method
- Added __call__ method to DeepCrawlStrategy for easier usage
- Removed redundant comments and imports
- Cleaned up type hints in DFS strategy
- Removed empty docker_client.py and .continuerules
- Added .private/ to gitignore

BREAKING CHANGE: DeepCrawlStrategy.arun now returns Union[CrawlResultT, List[CrawlResultT], AsyncGenerator[CrawlResultT, None]]
This commit is contained in:
UncleCode
2025-02-07 19:01:59 +08:00
parent 926beee832
commit 91073c1244
7 changed files with 12 additions and 22 deletions

View File

@@ -2,13 +2,8 @@
from typing import AsyncGenerator, Optional, Set, Dict, List, Tuple
from ..models import CrawlResult
from .bfs_strategy import BFSDeepCrawlStrategy # Inherit common logic: can_process_url, link_discovery, etc.
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from ..async_configs import CrawlerRunConfig
from ..async_webcrawler import AsyncWebCrawler
from .bfs_strategy import BFSDeepCrawlStrategy # noqa
from ..types import AsyncWebCrawler, CrawlerRunConfig
class DFSDeepCrawlStrategy(BFSDeepCrawlStrategy):
"""
@@ -20,8 +15,8 @@ class DFSDeepCrawlStrategy(BFSDeepCrawlStrategy):
async def _arun_batch(
self,
start_url: str,
crawler: "AsyncWebCrawler",
config: "CrawlerRunConfig",
crawler: AsyncWebCrawler,
config: CrawlerRunConfig,
) -> List[CrawlResult]:
"""
Batch (non-streaming) DFS mode.
@@ -61,8 +56,8 @@ class DFSDeepCrawlStrategy(BFSDeepCrawlStrategy):
async def _arun_stream(
self,
start_url: str,
crawler: "AsyncWebCrawler",
config: "CrawlerRunConfig",
crawler: AsyncWebCrawler,
config: CrawlerRunConfig,
) -> AsyncGenerator[CrawlResult, None]:
"""
Streaming DFS mode.