Files
crawl4ai/deploy/docker/tests/test_1_basic.py
unclecode b97eaeea4c feat(docker): implement smart browser pool with 10x memory efficiency
Major refactoring to eliminate memory leaks and enable high-scale crawling:

- **Smart 3-Tier Browser Pool**:
  - Permanent browser (always-ready default config)
  - Hot pool (configs used 3+ times, longer TTL)
  - Cold pool (new/rare configs, short TTL)
  - Auto-promotion: cold → hot after 3 uses
  - 100% pool reuse achieved in tests

- **Container-Aware Memory Detection**:
  - Read cgroup v1/v2 memory limits (not host metrics)
  - Accurate memory pressure detection in Docker
  - Memory-based browser creation blocking

- **Adaptive Janitor**:
  - Dynamic cleanup intervals (10s/30s/60s based on memory)
  - Tiered TTLs: cold 30-300s, hot 120-600s
  - Aggressive cleanup at high memory pressure

- **Unified Pool Usage**:
  - All endpoints now use pool (/html, /screenshot, /pdf, /execute_js, /md, /llm)
  - Fixed config signature mismatch (permanent browser matches endpoints)
  - get_default_browser_config() helper for consistency

- **Configuration**:
  - Reduced idle_ttl: 1800s → 300s (30min → 5min)
  - Fixed port: 11234 → 11235 (match Gunicorn)

**Performance Results** (from stress tests):
- Memory: 10x reduction (500-700MB × N → 270MB permanent)
- Latency: 30-50x faster (<100ms pool hits vs 3-5s startup)
- Reuse: 100% for default config, 60%+ for variants
- Capacity: 100+ concurrent requests (vs ~20 before)
- Leak: 0 MB/cycle (stable across tests)

**Test Infrastructure**:
- 7-phase sequential test suite (tests/)
- Docker stats integration + log analysis
- Pool promotion verification
- Memory leak detection
- Full endpoint coverage

Fixes memory issues reported in production deployments.
2025-10-17 20:38:39 +08:00

139 lines
4.2 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Test 1: Basic Container Health + Single Endpoint
- Starts container
- Hits /health endpoint 10 times
- Reports success rate and basic latency
"""
import asyncio
import time
import docker
import httpx
# Config
IMAGE = "crawl4ai-local:latest"
CONTAINER_NAME = "crawl4ai-test"
PORT = 11235
REQUESTS = 10
async def test_endpoint(url: str, count: int):
"""Hit endpoint multiple times, return stats."""
results = []
async with httpx.AsyncClient(timeout=30.0) as client:
for i in range(count):
start = time.time()
try:
resp = await client.get(url)
elapsed = (time.time() - start) * 1000 # ms
results.append({
"success": resp.status_code == 200,
"latency_ms": elapsed,
"status": resp.status_code
})
print(f" [{i+1}/{count}] ✓ {resp.status_code} - {elapsed:.0f}ms")
except Exception as e:
results.append({
"success": False,
"latency_ms": None,
"error": str(e)
})
print(f" [{i+1}/{count}] ✗ Error: {e}")
return results
def start_container(client, image: str, name: str, port: int):
"""Start container, return container object."""
# Clean up existing
try:
old = client.containers.get(name)
print(f"🧹 Stopping existing container '{name}'...")
old.stop()
old.remove()
except docker.errors.NotFound:
pass
print(f"🚀 Starting container '{name}' from image '{image}'...")
container = client.containers.run(
image,
name=name,
ports={f"{port}/tcp": port},
detach=True,
shm_size="1g",
environment={"PYTHON_ENV": "production"}
)
# Wait for health
print(f"⏳ Waiting for container to be healthy...")
for _ in range(30): # 30s timeout
time.sleep(1)
container.reload()
if container.status == "running":
try:
# Quick health check
import requests
resp = requests.get(f"http://localhost:{port}/health", timeout=2)
if resp.status_code == 200:
print(f"✅ Container healthy!")
return container
except:
pass
raise TimeoutError("Container failed to start")
def stop_container(container):
"""Stop and remove container."""
print(f"🛑 Stopping container...")
container.stop()
container.remove()
print(f"✅ Container removed")
async def main():
print("="*60)
print("TEST 1: Basic Container Health + Single Endpoint")
print("="*60)
client = docker.from_env()
container = None
try:
# Start container
container = start_container(client, IMAGE, CONTAINER_NAME, PORT)
# Test /health endpoint
print(f"\n📊 Testing /health endpoint ({REQUESTS} requests)...")
url = f"http://localhost:{PORT}/health"
results = await test_endpoint(url, REQUESTS)
# Calculate stats
successes = sum(1 for r in results if r["success"])
success_rate = (successes / len(results)) * 100
latencies = [r["latency_ms"] for r in results if r["latency_ms"] is not None]
avg_latency = sum(latencies) / len(latencies) if latencies else 0
# Print results
print(f"\n{'='*60}")
print(f"RESULTS:")
print(f" Success Rate: {success_rate:.1f}% ({successes}/{len(results)})")
print(f" Avg Latency: {avg_latency:.0f}ms")
if latencies:
print(f" Min Latency: {min(latencies):.0f}ms")
print(f" Max Latency: {max(latencies):.0f}ms")
print(f"{'='*60}")
# Pass/Fail
if success_rate >= 100:
print(f"✅ TEST PASSED")
return 0
else:
print(f"❌ TEST FAILED (expected 100% success rate)")
return 1
except Exception as e:
print(f"\n❌ TEST ERROR: {e}")
return 1
finally:
if container:
stop_container(container)
if __name__ == "__main__":
exit_code = asyncio.run(main())
exit(exit_code)