feat: add stealth mode and enhance undetected browser support

- Add playwright-stealth integration with enable_stealth parameter in BrowserConfig
- Merge undetected browser strategy into main async_crawler_strategy.py using adapter pattern
- Add browser adapters (BrowserAdapter, PlaywrightAdapter, UndetectedAdapter) for flexible browser switching
- Update install.py to install both playwright and patchright browsers automatically
- Add comprehensive documentation for anti-bot features (stealth mode + undetected browser)
- Create examples demonstrating stealth mode usage and comparison tests
- Update pyproject.toml and requirements.txt with patchright>=1.49.0 and other dependencies
- Remove duplicate/unused dependencies (alphashape, cssselect, pyperclip, shapely, selenium)
- Add dependency checker tool in tests/check_dependencies.py

Breaking changes: None - all existing functionality preserved

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
unclecode
2025-07-17 16:59:10 +08:00
parent 5c33cbcca2
commit 6a728cbe5b
27 changed files with 2833 additions and 460 deletions

View File

@@ -13,40 +13,38 @@ authors = [
{name = "Unclecode", email = "unclecode@kidocode.com"}
]
dependencies = [
"aiofiles>=24.1.0",
"aiohttp>=3.11.11",
"aiosqlite~=0.20",
"anyio>=4.0.0",
"lxml~=5.3",
"litellm>=1.53.1",
"numpy>=1.26.0,<3",
"pillow>=10.4",
"playwright>=1.49.0",
"patchright>=1.49.0",
"python-dotenv~=1.0",
"requests~=2.26",
"beautifulsoup4~=4.12",
"tf-playwright-stealth>=1.1.0",
"xxhash~=3.4",
"rank-bm25~=0.2",
"aiofiles>=24.1.0",
"snowballstemmer~=2.2",
"pydantic>=2.10",
"pyOpenSSL>=24.3.0",
"psutil>=6.1.1",
"PyYAML>=6.0",
"nltk>=3.9.1",
"playwright",
"rich>=13.9.4",
"cssselect>=1.2.0",
"httpx>=0.27.2",
"httpx[http2]>=0.27.2",
"fake-useragent>=2.0.3",
"click>=8.1.7",
"pyperclip>=1.8.2",
"chardet>=5.2.0",
"aiohttp>=3.11.11",
"brotli>=1.1.0",
"humanize>=4.10.0",
"lark>=1.2.2",
"sentence-transformers>=2.2.0",
"alphashape>=1.3.1",
"shapely>=2.0.0"
"sentence-transformers>=2.2.0"
]
classifiers = [
"Development Status :: 4 - Beta",
@@ -60,20 +58,17 @@ classifiers = [
]
[project.optional-dependencies]
pdf = ["PyPDF2"]
torch = ["torch", "nltk", "scikit-learn"]
transformer = ["transformers", "tokenizers"]
cosine = ["torch", "transformers", "nltk"]
sync = ["selenium"]
pdf = ["PyPDF2>=3.0.0"]
torch = ["torch>=2.0.0", "nltk>=3.9.1", "scikit-learn>=1.3.0"]
transformer = ["transformers>=4.30.0", "tokenizers>=0.13.0"]
cosine = ["torch>=2.0.0", "transformers>=4.30.0", "nltk>=3.9.1"]
all = [
"PyPDF2",
"torch",
"nltk",
"scikit-learn",
"transformers",
"tokenizers",
"selenium",
"PyPDF2"
"PyPDF2>=3.0.0",
"torch>=2.0.0",
"nltk>=3.9.1",
"scikit-learn>=1.3.0",
"transformers>=4.30.0",
"tokenizers>=0.13.0"
]
[project.scripts]