refactor(server): migrate to pool-based crawler management

Replace crawler_manager.py with simpler crawler_pool.py implementation:
- Add global page semaphore for hard concurrency cap
- Implement browser pool with idle cleanup
- Add playground UI for testing and stress testing
- Update API handlers to use pooled crawlers
- Enhance logging levels and symbols

BREAKING CHANGE: Removes CrawlerManager class in favor of simpler pool-based approach
This commit is contained in:
UncleCode
2025-04-20 20:14:26 +08:00
parent 16b2318242
commit a58c8000aa
14 changed files with 1447 additions and 1435 deletions

View File

@@ -24,13 +24,13 @@ from rich.panel import Panel
from rich.syntax import Syntax
# --- Constants ---
# DEFAULT_API_URL = "http://localhost:11235" # Default port
DEFAULT_API_URL = "http://localhost:11235" # Default port
DEFAULT_API_URL = "http://localhost:8020" # Default port
DEFAULT_URL_COUNT = 1000
DEFAULT_MAX_CONCURRENT_REQUESTS = 5
DEFAULT_URL_COUNT = 100
DEFAULT_MAX_CONCURRENT_REQUESTS = 1
DEFAULT_CHUNK_SIZE = 10
DEFAULT_REPORT_PATH = "reports_api"
DEFAULT_STREAM_MODE = False
DEFAULT_STREAM_MODE = True
REQUEST_TIMEOUT = 180.0
# Initialize Rich console
@@ -77,6 +77,10 @@ class ApiStressTest:
self.report_path = pathlib.Path(report_path)
self.report_path.mkdir(parents=True, exist_ok=True)
self.stream_mode = stream_mode
# Ignore repo path and set it to current file path
self.repo_path = pathlib.Path(__file__).parent.resolve()
self.test_id = time.strftime("%Y%m%d_%H%M%S")
self.results_summary = {