From 467be9ac76eda7886ca8e4ddc3aee7e4c19ef6ce Mon Sep 17 00:00:00 2001 From: UncleCode Date: Sun, 9 Feb 2025 20:23:40 +0800 Subject: [PATCH] feat(deep-crawling): add DFS strategy and update exports; refactor CLI entry point --- crawl4ai/__init__.py | 3 ++- crawl4ai/cli.py | 0 crawl4ai/deep_crawling/__init__.py | 2 ++ crawl4ai/deep_crawling/filters.py | 1 + pyproject.toml | 6 ++++-- 5 files changed, 9 insertions(+), 3 deletions(-) create mode 100644 crawl4ai/cli.py diff --git a/crawl4ai/__init__.py b/crawl4ai/__init__.py index ea7c2191..f4aa60b1 100644 --- a/crawl4ai/__init__.py +++ b/crawl4ai/__init__.py @@ -33,9 +33,10 @@ from .async_dispatcher import ( ) from .docker_client import Crawl4aiDockerClient from .hub import CrawlerHub - +from .deep_crawling import DeepCrawlStrategy __all__ = [ "AsyncWebCrawler", + "DeepCrawlStrategy", "CrawlResult", "CrawlerHub", "CacheMode", diff --git a/crawl4ai/cli.py b/crawl4ai/cli.py new file mode 100644 index 00000000..e69de29b diff --git a/crawl4ai/deep_crawling/__init__.py b/crawl4ai/deep_crawling/__init__.py index bbe27eff..8ebdb58b 100644 --- a/crawl4ai/deep_crawling/__init__.py +++ b/crawl4ai/deep_crawling/__init__.py @@ -2,10 +2,12 @@ from .base_strategy import DeepCrawlDecorator, DeepCrawlStrategy from .bfs_strategy import BFSDeepCrawlStrategy from .bff_strategy import BestFirstCrawlingStrategy +from .dfs_strategy import DFSDeepCrawlStrategy __all__ = [ "DeepCrawlDecorator", "DeepCrawlStrategy", "BFSDeepCrawlStrategy", "BestFirstCrawlingStrategy", + "DFSDeepCrawlStrategy", ] \ No newline at end of file diff --git a/crawl4ai/deep_crawling/filters.py b/crawl4ai/deep_crawling/filters.py index f74b1c61..68472e9d 100644 --- a/crawl4ai/deep_crawling/filters.py +++ b/crawl4ai/deep_crawling/filters.py @@ -224,6 +224,7 @@ def create_common_filter_chain() -> FilterChain: # Use __slots__ and array for maximum memory/speed efficiency +@dataclass class FastFilterStats: __slots__ = ("_counters",) diff --git a/pyproject.toml b/pyproject.toml index 38e1f89f..ea6c5494 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,9 @@ dependencies = [ "rich>=13.9.4", "cssselect>=1.2.0", "httpx==0.27.2", - "fake-useragent>=2.0.3" + "fake-useragent>=2.0.3", + "click>=8.1.7", + "pyperclip>=1.8.2" ] classifiers = [ "Development Status :: 4 - Beta", @@ -70,7 +72,7 @@ crawl4ai-download-models = "crawl4ai.model_loader:main" crawl4ai-migrate = "crawl4ai.migrations:main" crawl4ai-setup = "crawl4ai.install:post_install" crawl4ai-doctor = "crawl4ai.install:doctor" -crawl = "crawl4ai.cli:cli" +crwl = "crawl4ai.cli:cli" [tool.setuptools] packages = {find = {where = ["."], include = ["crawl4ai*"]}}