refactor(server): migrate to pool-based crawler management
Replace crawler_manager.py with simpler crawler_pool.py implementation: - Add global page semaphore for hard concurrency cap - Implement browser pool with idle cleanup - Add playground UI for testing and stress testing - Update API handlers to use pooled crawlers - Enhance logging levels and symbols BREAKING CHANGE: Removes CrawlerManager class in favor of simpler pool-based approach
This commit is contained in:
34
tests/memory/cap_test.py
Normal file
34
tests/memory/cap_test.py
Normal file
@@ -0,0 +1,34 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Hammer /crawl with many concurrent requests to prove GLOBAL_SEM works.
|
||||
"""
|
||||
|
||||
import asyncio, httpx, json, uuid, argparse
|
||||
|
||||
API = "http://localhost:8020/crawl"
|
||||
URLS_PER_CALL = 1 # keep it minimal so each arun() == 1 page
|
||||
CONCURRENT_CALLS = 20 # way above your cap
|
||||
|
||||
payload_template = {
|
||||
"browser_config": {"type": "BrowserConfig", "params": {"headless": True}},
|
||||
"crawler_config": {
|
||||
"type": "CrawlerRunConfig",
|
||||
"params": {"cache_mode": "BYPASS", "verbose": False},
|
||||
}
|
||||
}
|
||||
|
||||
async def one_call(client):
|
||||
payload = payload_template.copy()
|
||||
payload["urls"] = [f"https://httpbin.org/anything/{uuid.uuid4()}"]
|
||||
r = await client.post(API, json=payload)
|
||||
r.raise_for_status()
|
||||
return r.json()["server_peak_memory_mb"]
|
||||
|
||||
async def main():
|
||||
async with httpx.AsyncClient(timeout=60) as client:
|
||||
tasks = [asyncio.create_task(one_call(client)) for _ in range(CONCURRENT_CALLS)]
|
||||
mem_usages = await asyncio.gather(*tasks)
|
||||
print("Calls finished OK, server peaks reported:", mem_usages)
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
Reference in New Issue
Block a user