feat(deep-crawling): add DFS strategy and update exports; refactor CLI entry point
This commit is contained in:
@@ -33,9 +33,10 @@ from .async_dispatcher import (
|
|||||||
)
|
)
|
||||||
from .docker_client import Crawl4aiDockerClient
|
from .docker_client import Crawl4aiDockerClient
|
||||||
from .hub import CrawlerHub
|
from .hub import CrawlerHub
|
||||||
|
from .deep_crawling import DeepCrawlStrategy
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"AsyncWebCrawler",
|
"AsyncWebCrawler",
|
||||||
|
"DeepCrawlStrategy",
|
||||||
"CrawlResult",
|
"CrawlResult",
|
||||||
"CrawlerHub",
|
"CrawlerHub",
|
||||||
"CacheMode",
|
"CacheMode",
|
||||||
|
|||||||
0
crawl4ai/cli.py
Normal file
0
crawl4ai/cli.py
Normal file
@@ -2,10 +2,12 @@
|
|||||||
from .base_strategy import DeepCrawlDecorator, DeepCrawlStrategy
|
from .base_strategy import DeepCrawlDecorator, DeepCrawlStrategy
|
||||||
from .bfs_strategy import BFSDeepCrawlStrategy
|
from .bfs_strategy import BFSDeepCrawlStrategy
|
||||||
from .bff_strategy import BestFirstCrawlingStrategy
|
from .bff_strategy import BestFirstCrawlingStrategy
|
||||||
|
from .dfs_strategy import DFSDeepCrawlStrategy
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"DeepCrawlDecorator",
|
"DeepCrawlDecorator",
|
||||||
"DeepCrawlStrategy",
|
"DeepCrawlStrategy",
|
||||||
"BFSDeepCrawlStrategy",
|
"BFSDeepCrawlStrategy",
|
||||||
"BestFirstCrawlingStrategy",
|
"BestFirstCrawlingStrategy",
|
||||||
|
"DFSDeepCrawlStrategy",
|
||||||
]
|
]
|
||||||
@@ -224,6 +224,7 @@ def create_common_filter_chain() -> FilterChain:
|
|||||||
|
|
||||||
|
|
||||||
# Use __slots__ and array for maximum memory/speed efficiency
|
# Use __slots__ and array for maximum memory/speed efficiency
|
||||||
|
@dataclass
|
||||||
class FastFilterStats:
|
class FastFilterStats:
|
||||||
__slots__ = ("_counters",)
|
__slots__ = ("_counters",)
|
||||||
|
|
||||||
|
|||||||
@@ -37,7 +37,9 @@ dependencies = [
|
|||||||
"rich>=13.9.4",
|
"rich>=13.9.4",
|
||||||
"cssselect>=1.2.0",
|
"cssselect>=1.2.0",
|
||||||
"httpx==0.27.2",
|
"httpx==0.27.2",
|
||||||
"fake-useragent>=2.0.3"
|
"fake-useragent>=2.0.3",
|
||||||
|
"click>=8.1.7",
|
||||||
|
"pyperclip>=1.8.2"
|
||||||
]
|
]
|
||||||
classifiers = [
|
classifiers = [
|
||||||
"Development Status :: 4 - Beta",
|
"Development Status :: 4 - Beta",
|
||||||
@@ -70,7 +72,7 @@ crawl4ai-download-models = "crawl4ai.model_loader:main"
|
|||||||
crawl4ai-migrate = "crawl4ai.migrations:main"
|
crawl4ai-migrate = "crawl4ai.migrations:main"
|
||||||
crawl4ai-setup = "crawl4ai.install:post_install"
|
crawl4ai-setup = "crawl4ai.install:post_install"
|
||||||
crawl4ai-doctor = "crawl4ai.install:doctor"
|
crawl4ai-doctor = "crawl4ai.install:doctor"
|
||||||
crawl = "crawl4ai.cli:cli"
|
crwl = "crawl4ai.cli:cli"
|
||||||
|
|
||||||
[tool.setuptools]
|
[tool.setuptools]
|
||||||
packages = {find = {where = ["."], include = ["crawl4ai*"]}}
|
packages = {find = {where = ["."], include = ["crawl4ai*"]}}
|
||||||
|
|||||||
Reference in New Issue
Block a user