feat(crawler): add HTTP crawler strategy for lightweight web scraping

Implements a new AsyncHTTPCrawlerStrategy class that provides a fast, memory-efficient alternative to browser-based crawling. Features include: - Support for HTTP/HTTPS requests with configurable methods, headers, and timeouts - File and raw content handling capabilities - Streaming response processing for large files - Customizable request/response hooks - Comprehensive error handling Also refactors browser management code into separate module for better organization.
2025-02-15 19:26:30 +08:00
parent 063df572b0
commit 8bb799068e
7 changed files with 1353 additions and 851 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -39,7 +39,9 @@ dependencies = [
    "httpx==0.27.2",
    "fake-useragent>=2.0.3",
    "click>=8.1.7",
-    "pyperclip>=1.8.2"
+    "pyperclip>=1.8.2",
+    "cchardet>=2.1.7",
+    "aiohttp>=3.11.11"
 ]
 classifiers = [
    "Development Status :: 4 - Beta",