refactor(server): migrate to pool-based crawler management

Replace crawler_manager.py with simpler crawler_pool.py implementation:
- Add global page semaphore for hard concurrency cap
- Implement browser pool with idle cleanup
- Add playground UI for testing and stress testing
- Update API handlers to use pooled crawlers
- Enhance logging levels and symbols

BREAKING CHANGE: Removes CrawlerManager class in favor of simpler pool-based approach
This commit is contained in:
UncleCode
2025-04-20 20:14:26 +08:00
parent 16b2318242
commit a58c8000aa
14 changed files with 1447 additions and 1435 deletions

View File

@@ -0,0 +1,35 @@
#!/usr/bin/env python3
"""
Quick sanitycheck for /config/dump endpoint.
Usage:
python test_config_dump.py [http://localhost:8020]
If the server isnt running, start it first:
uvicorn deploy.docker.server:app --port 8020
"""
import sys, json, textwrap, requests
BASE = sys.argv[1] if len(sys.argv) > 1 else "http://localhost:8020"
URL = f"{BASE.rstrip('/')}/config/dump"
CASES = [
# --- CrawlRunConfig variants ---
"CrawlerRunConfig()",
"CrawlerRunConfig(stream=True, cache_mode=CacheMode.BYPASS)",
"CrawlerRunConfig(js_only=True, wait_until='networkidle')",
# --- BrowserConfig variants ---
"BrowserConfig()",
"BrowserConfig(headless=False, extra_args=['--disable-gpu'])",
"BrowserConfig(browser_mode='builtin', proxy='http://1.2.3.4:8080')",
]
for code in CASES:
print("\n=== POST:", code)
resp = requests.post(URL, json={"code": code}, timeout=15)
if resp.ok:
print(json.dumps(resp.json(), indent=2)[:400] + "...")
else:
print("ERROR", resp.status_code, resp.text[:200])