refactor(core): reorganize project structure and remove legacy code

Major reorganization of the project structure: - Moved legacy synchronous crawler code to legacy folder - Removed deprecated CLI and docs manager - Consolidated version manager into utils.py - Added CrawlerHub to __init__.py exports - Fixed type hints in async_webcrawler.py - Fixed minor bugs in chunking and crawler strategies BREAKING CHANGE: Removed synchronous WebCrawler, CLI, and docs management functionality. Users should migrate to AsyncWebCrawler.
2025-01-30 19:35:06 +08:00
parent 31938fb922
commit f81712eb91
23 changed files with 425 additions and 4 deletions
--- a/crawl4ai/async_webcrawler.py
+++ b/crawl4ai/async_webcrawler.py
@@ -49,6 +49,12 @@ from collections.abc import AsyncGenerator
 CrawlResultT = TypeVar('CrawlResultT', bound=CrawlResult)
 RunManyReturn = Union[List[CrawlResultT], AsyncGenerator[CrawlResultT, None]]

+DeepCrawlSingleReturn = Union[List[CrawlResultT], AsyncGenerator[CrawlResultT, None]]
+DeepCrawlManyReturn = Union[
+    List[List[CrawlResultT]],
+    AsyncGenerator[CrawlResultT, None],
+]
+
 from .__version__ import __version__ as crawl4ai_version


@@ -282,7 +288,7 @@ class AsyncWebCrawler:
        user_agent: str = None,
        verbose=True,
        **kwargs,
-    ) -> CrawlResult:
+    ) -> Union[CrawlResult, DeepCrawlSingleReturn]:
        """
        Runs the crawler for a single source: URL (web, local file, or raw HTML).

@@ -709,7 +715,7 @@ class AsyncWebCrawler:
        user_agent: str = None,
        verbose=True,
        **kwargs
-        ) -> RunManyReturn:
+        ) -> Union[RunManyReturn, DeepCrawlManyReturn]:
        """
        Runs the crawler for multiple URLs concurrently using a configurable dispatcher strategy.