Files
crawl4ai/tests/memory/cap_test.py
UncleCode a58c8000aa refactor(server): migrate to pool-based crawler management
Replace crawler_manager.py with simpler crawler_pool.py implementation:
- Add global page semaphore for hard concurrency cap
- Implement browser pool with idle cleanup
- Add playground UI for testing and stress testing
- Update API handlers to use pooled crawlers
- Enhance logging levels and symbols

BREAKING CHANGE: Removes CrawlerManager class in favor of simpler pool-based approach
2025-04-20 20:14:26 +08:00

35 lines
1.1 KiB
Python

#!/usr/bin/env python3
"""
Hammer /crawl with many concurrent requests to prove GLOBAL_SEM works.
"""
import asyncio, httpx, json, uuid, argparse
API = "http://localhost:8020/crawl"
URLS_PER_CALL = 1 # keep it minimal so each arun() == 1 page
CONCURRENT_CALLS = 20 # way above your cap
payload_template = {
"browser_config": {"type": "BrowserConfig", "params": {"headless": True}},
"crawler_config": {
"type": "CrawlerRunConfig",
"params": {"cache_mode": "BYPASS", "verbose": False},
}
}
async def one_call(client):
payload = payload_template.copy()
payload["urls"] = [f"https://httpbin.org/anything/{uuid.uuid4()}"]
r = await client.post(API, json=payload)
r.raise_for_status()
return r.json()["server_peak_memory_mb"]
async def main():
async with httpx.AsyncClient(timeout=60) as client:
tasks = [asyncio.create_task(one_call(client)) for _ in range(CONCURRENT_CALLS)]
mem_usages = await asyncio.gather(*tasks)
print("Calls finished OK, server peaks reported:", mem_usages)
if __name__ == "__main__":
asyncio.run(main())