feat(docker): add multi-container cluster deployment with CLI management
Add comprehensive Docker cluster orchestration with horizontal scaling support. CLI Commands: - crwl server start/stop/restart/status/scale/logs - Auto-detection: Single (N=1) → Swarm (N>1) → Compose (N>1 fallback) - Support for 1-100 container replicas with zero-downtime scaling Infrastructure: - Nginx load balancing (round-robin API, sticky sessions monitoring) - Redis-based container discovery via heartbeats (30s interval) - Real-time monitoring dashboard with cluster-wide visibility - WebSocket aggregation from all containers Security & Stability Fixes (12 critical issues): - Add timeout protection to browser pool locks (prevent deadlocks) - Implement Redis retry logic with exponential backoff - Add container ID validation (prevent Redis key injection) - Add CLI input sanitization (prevent shell injection) - Add file locking for state management (prevent corruption) - Fix WebSocket resource leaks and connection cleanup - Add graceful degradation and circuit breakers Configuration: - RedisTTLConfig dataclass with environment variable support - Template-based docker-compose.yml and nginx.conf generation - Comprehensive error handling with actionable messages Documentation: - AGENT.md: Complete DevOps context for AI assistants - MULTI_CONTAINER_ARCHITECTURE.md: Technical architecture guide - Reorganized docs into deploy/docker/docs/
This commit is contained in:
@@ -5,6 +5,7 @@ import asyncio
|
||||
from typing import Dict, List, Optional
|
||||
from datetime import datetime, timezone
|
||||
from collections import deque
|
||||
from dataclasses import dataclass
|
||||
from redis import asyncio as aioredis
|
||||
from utils import get_container_memory_percent
|
||||
import psutil
|
||||
@@ -12,13 +13,49 @@ import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ========== Configuration ==========
|
||||
|
||||
@dataclass
|
||||
class RedisTTLConfig:
|
||||
"""Redis TTL configuration (in seconds).
|
||||
|
||||
Configures how long different types of monitoring data are retained in Redis.
|
||||
Adjust based on your monitoring needs and Redis memory constraints.
|
||||
"""
|
||||
active_requests: int = 300 # 5 minutes - short-lived active request data
|
||||
completed_requests: int = 3600 # 1 hour - recent completed requests
|
||||
janitor_events: int = 3600 # 1 hour - browser cleanup events
|
||||
errors: int = 3600 # 1 hour - error logs
|
||||
endpoint_stats: int = 86400 # 24 hours - aggregated endpoint statistics
|
||||
heartbeat: int = 60 # 1 minute - container heartbeat (2x the 30s interval)
|
||||
|
||||
@classmethod
|
||||
def from_env(cls) -> 'RedisTTLConfig':
|
||||
"""Load TTL configuration from environment variables."""
|
||||
import os
|
||||
return cls(
|
||||
active_requests=int(os.getenv('REDIS_TTL_ACTIVE_REQUESTS', 300)),
|
||||
completed_requests=int(os.getenv('REDIS_TTL_COMPLETED_REQUESTS', 3600)),
|
||||
janitor_events=int(os.getenv('REDIS_TTL_JANITOR_EVENTS', 3600)),
|
||||
errors=int(os.getenv('REDIS_TTL_ERRORS', 3600)),
|
||||
endpoint_stats=int(os.getenv('REDIS_TTL_ENDPOINT_STATS', 86400)),
|
||||
heartbeat=int(os.getenv('REDIS_TTL_HEARTBEAT', 60)),
|
||||
)
|
||||
|
||||
|
||||
class MonitorStats:
|
||||
"""Tracks real-time server stats with Redis persistence."""
|
||||
|
||||
def __init__(self, redis: aioredis.Redis):
|
||||
def __init__(self, redis: aioredis.Redis, ttl_config: Optional[RedisTTLConfig] = None):
|
||||
self.redis = redis
|
||||
self.ttl = ttl_config or RedisTTLConfig.from_env()
|
||||
self.start_time = time.time()
|
||||
|
||||
# Get container ID for Redis keys
|
||||
from utils import get_container_id
|
||||
self.container_id = get_container_id()
|
||||
|
||||
# In-memory queues (fast reads, Redis backup)
|
||||
self.active_requests: Dict[str, Dict] = {} # id -> request info
|
||||
self.completed_requests: deque = deque(maxlen=100) # Last 100
|
||||
@@ -32,6 +69,9 @@ class MonitorStats:
|
||||
self._persist_queue: asyncio.Queue = asyncio.Queue(maxsize=10)
|
||||
self._persist_worker_task: Optional[asyncio.Task] = None
|
||||
|
||||
# Heartbeat task for container discovery
|
||||
self._heartbeat_task: Optional[asyncio.Task] = None
|
||||
|
||||
# Timeline data (5min window, 5s resolution = 60 points)
|
||||
self.memory_timeline: deque = deque(maxlen=60)
|
||||
self.requests_timeline: deque = deque(maxlen=60)
|
||||
@@ -45,10 +85,14 @@ class MonitorStats:
|
||||
"url": url[:100], # Truncate long URLs
|
||||
"start_time": time.time(),
|
||||
"config_sig": config.get("sig", "default") if config else "default",
|
||||
"mem_start": psutil.Process().memory_info().rss / (1024 * 1024)
|
||||
"mem_start": psutil.Process().memory_info().rss / (1024 * 1024),
|
||||
"container_id": self.container_id
|
||||
}
|
||||
self.active_requests[request_id] = req_info
|
||||
|
||||
# Persist to Redis
|
||||
await self._persist_active_requests()
|
||||
|
||||
# Increment endpoint counter
|
||||
if endpoint not in self.endpoint_stats:
|
||||
self.endpoint_stats[endpoint] = {
|
||||
@@ -95,19 +139,29 @@ class MonitorStats:
|
||||
"success": success,
|
||||
"error": error,
|
||||
"status_code": status_code,
|
||||
"pool_hit": pool_hit
|
||||
"pool_hit": pool_hit,
|
||||
"container_id": self.container_id
|
||||
}
|
||||
self.completed_requests.append(completed)
|
||||
|
||||
# Persist to Redis
|
||||
await self._persist_completed_requests()
|
||||
await self._persist_active_requests() # Update active (removed this request)
|
||||
|
||||
# Track errors
|
||||
if not success and error:
|
||||
self.errors.append({
|
||||
error_entry = {
|
||||
"timestamp": end_time,
|
||||
"endpoint": endpoint,
|
||||
"url": req_info["url"],
|
||||
"error": error,
|
||||
"request_id": request_id
|
||||
})
|
||||
"request_id": request_id,
|
||||
"message": error,
|
||||
"level": "ERROR",
|
||||
"container_id": self.container_id
|
||||
}
|
||||
self.errors.append(error_entry)
|
||||
await self._persist_errors()
|
||||
|
||||
await self._persist_endpoint_stats()
|
||||
|
||||
@@ -117,8 +171,10 @@ class MonitorStats:
|
||||
"timestamp": time.time(),
|
||||
"type": event_type, # "close_cold", "close_hot", "promote"
|
||||
"sig": sig[:8],
|
||||
"details": details
|
||||
"details": details,
|
||||
"container_id": self.container_id
|
||||
})
|
||||
await self._persist_janitor_events()
|
||||
|
||||
def _cleanup_old_entries(self, max_age_seconds: int = 300):
|
||||
"""Remove entries older than max_age_seconds (default 5min)."""
|
||||
@@ -149,13 +205,23 @@ class MonitorStats:
|
||||
recent_reqs = sum(1 for req in self.completed_requests
|
||||
if now - req.get("end_time", 0) < 5)
|
||||
|
||||
# Browser counts (acquire lock to prevent race conditions)
|
||||
# Browser counts (acquire lock with timeout to prevent deadlock)
|
||||
from crawler_pool import PERMANENT, HOT_POOL, COLD_POOL, LOCK
|
||||
async with LOCK:
|
||||
try:
|
||||
async with asyncio.timeout(2.0):
|
||||
async with LOCK:
|
||||
browser_count = {
|
||||
"permanent": 1 if PERMANENT else 0,
|
||||
"hot": len(HOT_POOL),
|
||||
"cold": len(COLD_POOL)
|
||||
}
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning("Lock acquisition timeout in update_timeline, using cached browser counts")
|
||||
# Use last known values or defaults
|
||||
browser_count = {
|
||||
"permanent": 1 if PERMANENT else 0,
|
||||
"hot": len(HOT_POOL),
|
||||
"cold": len(COLD_POOL)
|
||||
"permanent": 1,
|
||||
"hot": 0,
|
||||
"cold": 0
|
||||
}
|
||||
|
||||
self.memory_timeline.append({"time": now, "value": mem_pct})
|
||||
@@ -163,15 +229,117 @@ class MonitorStats:
|
||||
self.browser_timeline.append({"time": now, "browsers": browser_count})
|
||||
|
||||
async def _persist_endpoint_stats(self):
|
||||
"""Persist endpoint stats to Redis."""
|
||||
try:
|
||||
await self.redis.set(
|
||||
"monitor:endpoint_stats",
|
||||
json.dumps(self.endpoint_stats),
|
||||
ex=86400 # 24h TTL
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to persist endpoint stats: {e}")
|
||||
"""Persist endpoint stats to Redis with retry logic."""
|
||||
max_retries = 3
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
await self.redis.set(
|
||||
"monitor:endpoint_stats",
|
||||
json.dumps(self.endpoint_stats),
|
||||
ex=self.ttl.endpoint_stats
|
||||
)
|
||||
return # Success
|
||||
except aioredis.ConnectionError as e:
|
||||
if attempt < max_retries - 1:
|
||||
backoff = 0.5 * (2 ** attempt) # 0.5s, 1s, 2s
|
||||
logger.warning(f"Redis connection error persisting endpoint stats (attempt {attempt + 1}/{max_retries}), retrying in {backoff}s: {e}")
|
||||
await asyncio.sleep(backoff)
|
||||
else:
|
||||
logger.error(f"Failed to persist endpoint stats after {max_retries} attempts: {e}")
|
||||
except Exception as e:
|
||||
logger.error(f"Non-retryable error persisting endpoint stats: {e}")
|
||||
break
|
||||
|
||||
async def _persist_active_requests(self):
|
||||
"""Persist active requests to Redis with retry logic."""
|
||||
max_retries = 3
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
if self.active_requests:
|
||||
await self.redis.set(
|
||||
f"monitor:{self.container_id}:active_requests",
|
||||
json.dumps(list(self.active_requests.values())),
|
||||
ex=self.ttl.active_requests
|
||||
)
|
||||
else:
|
||||
await self.redis.delete(f"monitor:{self.container_id}:active_requests")
|
||||
return # Success
|
||||
except aioredis.ConnectionError as e:
|
||||
if attempt < max_retries - 1:
|
||||
backoff = 0.5 * (2 ** attempt) # 0.5s, 1s, 2s
|
||||
logger.warning(f"Redis connection error persisting active requests (attempt {attempt + 1}/{max_retries}), retrying in {backoff}s: {e}")
|
||||
await asyncio.sleep(backoff)
|
||||
else:
|
||||
logger.error(f"Failed to persist active requests after {max_retries} attempts: {e}")
|
||||
except Exception as e:
|
||||
logger.error(f"Non-retryable error persisting active requests: {e}")
|
||||
break
|
||||
|
||||
async def _persist_completed_requests(self):
|
||||
"""Persist completed requests to Redis with retry logic."""
|
||||
max_retries = 3
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
await self.redis.set(
|
||||
f"monitor:{self.container_id}:completed",
|
||||
json.dumps(list(self.completed_requests)),
|
||||
ex=self.ttl.completed_requests
|
||||
)
|
||||
return # Success
|
||||
except aioredis.ConnectionError as e:
|
||||
if attempt < max_retries - 1:
|
||||
backoff = 0.5 * (2 ** attempt) # 0.5s, 1s, 2s
|
||||
logger.warning(f"Redis connection error persisting completed requests (attempt {attempt + 1}/{max_retries}), retrying in {backoff}s: {e}")
|
||||
await asyncio.sleep(backoff)
|
||||
else:
|
||||
logger.error(f"Failed to persist completed requests after {max_retries} attempts: {e}")
|
||||
except Exception as e:
|
||||
logger.error(f"Non-retryable error persisting completed requests: {e}")
|
||||
break
|
||||
|
||||
async def _persist_janitor_events(self):
|
||||
"""Persist janitor events to Redis with retry logic."""
|
||||
max_retries = 3
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
await self.redis.set(
|
||||
f"monitor:{self.container_id}:janitor",
|
||||
json.dumps(list(self.janitor_events)),
|
||||
ex=self.ttl.janitor_events
|
||||
)
|
||||
return # Success
|
||||
except aioredis.ConnectionError as e:
|
||||
if attempt < max_retries - 1:
|
||||
backoff = 0.5 * (2 ** attempt) # 0.5s, 1s, 2s
|
||||
logger.warning(f"Redis connection error persisting janitor events (attempt {attempt + 1}/{max_retries}), retrying in {backoff}s: {e}")
|
||||
await asyncio.sleep(backoff)
|
||||
else:
|
||||
logger.error(f"Failed to persist janitor events after {max_retries} attempts: {e}")
|
||||
except Exception as e:
|
||||
logger.error(f"Non-retryable error persisting janitor events: {e}")
|
||||
break
|
||||
|
||||
async def _persist_errors(self):
|
||||
"""Persist errors to Redis with retry logic."""
|
||||
max_retries = 3
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
await self.redis.set(
|
||||
f"monitor:{self.container_id}:errors",
|
||||
json.dumps(list(self.errors)),
|
||||
ex=self.ttl.errors
|
||||
)
|
||||
return # Success
|
||||
except aioredis.ConnectionError as e:
|
||||
if attempt < max_retries - 1:
|
||||
backoff = 0.5 * (2 ** attempt) # 0.5s, 1s, 2s
|
||||
logger.warning(f"Redis connection error persisting errors (attempt {attempt + 1}/{max_retries}), retrying in {backoff}s: {e}")
|
||||
await asyncio.sleep(backoff)
|
||||
else:
|
||||
logger.error(f"Failed to persist errors after {max_retries} attempts: {e}")
|
||||
except Exception as e:
|
||||
logger.error(f"Non-retryable error persisting errors: {e}")
|
||||
break
|
||||
|
||||
async def _persistence_worker(self):
|
||||
"""Background worker to persist stats to Redis."""
|
||||
@@ -202,25 +370,121 @@ class MonitorStats:
|
||||
self._persist_worker_task = None
|
||||
logger.info("Stopped persistence worker")
|
||||
|
||||
async def _heartbeat_worker(self):
|
||||
"""Send heartbeat to Redis every 30s with circuit breaker for failures."""
|
||||
from utils import detect_deployment_mode
|
||||
import os
|
||||
|
||||
heartbeat_failures = 0
|
||||
max_failures = 5 # Circuit breaker threshold
|
||||
|
||||
while True:
|
||||
try:
|
||||
# Get hostname/container name for friendly display
|
||||
# Try HOSTNAME env var first (set by Docker Compose), then socket.gethostname()
|
||||
import socket
|
||||
hostname = os.getenv("HOSTNAME", socket.gethostname())
|
||||
|
||||
# Register this container
|
||||
mode, containers = detect_deployment_mode()
|
||||
container_info = {
|
||||
"id": self.container_id,
|
||||
"hostname": hostname,
|
||||
"last_seen": time.time(),
|
||||
"mode": mode,
|
||||
"failure_count": heartbeat_failures
|
||||
}
|
||||
|
||||
# Set heartbeat with configured TTL
|
||||
await self.redis.setex(
|
||||
f"monitor:heartbeat:{self.container_id}",
|
||||
self.ttl.heartbeat,
|
||||
json.dumps(container_info)
|
||||
)
|
||||
|
||||
# Add to active containers set
|
||||
await self.redis.sadd("monitor:active_containers", self.container_id)
|
||||
|
||||
# Reset failure counter on success
|
||||
heartbeat_failures = 0
|
||||
|
||||
# Wait 30s before next heartbeat
|
||||
await asyncio.sleep(30)
|
||||
|
||||
except asyncio.CancelledError:
|
||||
break
|
||||
except aioredis.ConnectionError as e:
|
||||
heartbeat_failures += 1
|
||||
logger.error(
|
||||
f"Heartbeat Redis connection error (attempt {heartbeat_failures}/{max_failures}): {e}"
|
||||
)
|
||||
|
||||
if heartbeat_failures >= max_failures:
|
||||
# Circuit breaker - back off for longer
|
||||
logger.critical(
|
||||
f"Heartbeat circuit breaker triggered after {heartbeat_failures} failures. "
|
||||
f"Container will appear offline for 5 minutes."
|
||||
)
|
||||
await asyncio.sleep(300) # 5 min backoff
|
||||
heartbeat_failures = 0
|
||||
else:
|
||||
# Exponential backoff
|
||||
backoff = min(30 * (2 ** heartbeat_failures), 300)
|
||||
await asyncio.sleep(backoff)
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected heartbeat error: {e}", exc_info=True)
|
||||
await asyncio.sleep(30)
|
||||
|
||||
def start_heartbeat(self):
|
||||
"""Start the heartbeat worker."""
|
||||
if not self._heartbeat_task:
|
||||
self._heartbeat_task = asyncio.create_task(self._heartbeat_worker())
|
||||
logger.info("Started heartbeat worker")
|
||||
|
||||
async def stop_heartbeat(self):
|
||||
"""Stop the heartbeat worker and immediately deregister container."""
|
||||
if self._heartbeat_task:
|
||||
self._heartbeat_task.cancel()
|
||||
try:
|
||||
await self._heartbeat_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
|
||||
# Immediate deregistration (no 60s wait)
|
||||
try:
|
||||
await self.redis.srem("monitor:active_containers", self.container_id)
|
||||
await self.redis.delete(f"monitor:heartbeat:{self.container_id}")
|
||||
logger.info(f"Container {self.container_id} immediately deregistered from monitoring")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to deregister container on shutdown: {e}")
|
||||
|
||||
self._heartbeat_task = None
|
||||
logger.info("Stopped heartbeat worker")
|
||||
|
||||
async def cleanup(self):
|
||||
"""Cleanup on shutdown - persist final stats and stop workers."""
|
||||
logger.info("Monitor cleanup starting...")
|
||||
try:
|
||||
# Persist final stats before shutdown
|
||||
await self._persist_endpoint_stats()
|
||||
# Stop background worker
|
||||
# Stop background workers
|
||||
await self.stop_persistence_worker()
|
||||
await self.stop_heartbeat()
|
||||
logger.info("Monitor cleanup completed")
|
||||
except Exception as e:
|
||||
logger.error(f"Monitor cleanup error: {e}")
|
||||
|
||||
async def load_from_redis(self):
|
||||
"""Load persisted stats from Redis."""
|
||||
"""Load persisted stats from Redis and start workers."""
|
||||
try:
|
||||
data = await self.redis.get("monitor:endpoint_stats")
|
||||
if data:
|
||||
self.endpoint_stats = json.loads(data)
|
||||
logger.info("Loaded endpoint stats from Redis")
|
||||
|
||||
# Start background workers
|
||||
self.start_heartbeat()
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to load from Redis: {e}")
|
||||
|
||||
@@ -232,17 +496,28 @@ class MonitorStats:
|
||||
# Network I/O (delta since last call)
|
||||
net = psutil.net_io_counters()
|
||||
|
||||
# Pool status (acquire lock to prevent race conditions)
|
||||
# Pool status (acquire lock with timeout to prevent race conditions)
|
||||
from crawler_pool import PERMANENT, HOT_POOL, COLD_POOL, LOCK
|
||||
async with LOCK:
|
||||
# TODO: Track actual browser process memory instead of estimates
|
||||
# These are conservative estimates based on typical Chromium usage
|
||||
permanent_mem = 270 if PERMANENT else 0 # Estimate: ~270MB for permanent browser
|
||||
hot_mem = len(HOT_POOL) * 180 # Estimate: ~180MB per hot pool browser
|
||||
cold_mem = len(COLD_POOL) * 180 # Estimate: ~180MB per cold pool browser
|
||||
permanent_active = PERMANENT is not None
|
||||
hot_count = len(HOT_POOL)
|
||||
cold_count = len(COLD_POOL)
|
||||
try:
|
||||
async with asyncio.timeout(2.0):
|
||||
async with LOCK:
|
||||
# TODO: Track actual browser process memory instead of estimates
|
||||
# These are conservative estimates based on typical Chromium usage
|
||||
permanent_mem = 270 if PERMANENT else 0 # Estimate: ~270MB for permanent browser
|
||||
hot_mem = len(HOT_POOL) * 180 # Estimate: ~180MB per hot pool browser
|
||||
cold_mem = len(COLD_POOL) * 180 # Estimate: ~180MB per cold pool browser
|
||||
permanent_active = PERMANENT is not None
|
||||
hot_count = len(HOT_POOL)
|
||||
cold_count = len(COLD_POOL)
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning("Lock acquisition timeout in get_health_summary, using defaults")
|
||||
# Use safe defaults when lock times out
|
||||
permanent_mem = 0
|
||||
hot_mem = 0
|
||||
cold_mem = 0
|
||||
permanent_active = False
|
||||
hot_count = 0
|
||||
cold_count = 0
|
||||
|
||||
return {
|
||||
"container": {
|
||||
@@ -286,46 +561,52 @@ class MonitorStats:
|
||||
return requests
|
||||
|
||||
async def get_browser_list(self) -> List[Dict]:
|
||||
"""Get detailed browser pool information."""
|
||||
"""Get detailed browser pool information with timeout protection."""
|
||||
from crawler_pool import PERMANENT, HOT_POOL, COLD_POOL, LAST_USED, USAGE_COUNT, DEFAULT_CONFIG_SIG, LOCK
|
||||
|
||||
browsers = []
|
||||
now = time.time()
|
||||
|
||||
# Acquire lock to prevent race conditions during iteration
|
||||
async with LOCK:
|
||||
if PERMANENT:
|
||||
browsers.append({
|
||||
"type": "permanent",
|
||||
"sig": DEFAULT_CONFIG_SIG[:8] if DEFAULT_CONFIG_SIG else "unknown",
|
||||
"age_seconds": int(now - self.start_time),
|
||||
"last_used_seconds": int(now - LAST_USED.get(DEFAULT_CONFIG_SIG, now)),
|
||||
"memory_mb": 270,
|
||||
"hits": USAGE_COUNT.get(DEFAULT_CONFIG_SIG, 0),
|
||||
"killable": False
|
||||
})
|
||||
# Acquire lock with timeout to prevent deadlock
|
||||
try:
|
||||
async with asyncio.timeout(2.0):
|
||||
async with LOCK:
|
||||
if PERMANENT:
|
||||
browsers.append({
|
||||
"type": "permanent",
|
||||
"sig": DEFAULT_CONFIG_SIG[:8] if DEFAULT_CONFIG_SIG else "unknown",
|
||||
"age_seconds": int(now - self.start_time),
|
||||
"last_used_seconds": int(now - LAST_USED.get(DEFAULT_CONFIG_SIG, now)),
|
||||
"memory_mb": 270,
|
||||
"hits": USAGE_COUNT.get(DEFAULT_CONFIG_SIG, 0),
|
||||
"killable": False
|
||||
})
|
||||
|
||||
for sig, crawler in HOT_POOL.items():
|
||||
browsers.append({
|
||||
"type": "hot",
|
||||
"sig": sig[:8],
|
||||
"age_seconds": int(now - self.start_time), # Approximation
|
||||
"last_used_seconds": int(now - LAST_USED.get(sig, now)),
|
||||
"memory_mb": 180, # Estimate
|
||||
"hits": USAGE_COUNT.get(sig, 0),
|
||||
"killable": True
|
||||
})
|
||||
for sig, crawler in HOT_POOL.items():
|
||||
browsers.append({
|
||||
"type": "hot",
|
||||
"sig": sig[:8],
|
||||
"age_seconds": int(now - self.start_time), # Approximation
|
||||
"last_used_seconds": int(now - LAST_USED.get(sig, now)),
|
||||
"memory_mb": 180, # Estimate
|
||||
"hits": USAGE_COUNT.get(sig, 0),
|
||||
"killable": True
|
||||
})
|
||||
|
||||
for sig, crawler in COLD_POOL.items():
|
||||
browsers.append({
|
||||
"type": "cold",
|
||||
"sig": sig[:8],
|
||||
"age_seconds": int(now - self.start_time),
|
||||
"last_used_seconds": int(now - LAST_USED.get(sig, now)),
|
||||
"memory_mb": 180,
|
||||
"hits": USAGE_COUNT.get(sig, 0),
|
||||
"killable": True
|
||||
})
|
||||
for sig, crawler in COLD_POOL.items():
|
||||
browsers.append({
|
||||
"type": "cold",
|
||||
"sig": sig[:8],
|
||||
"age_seconds": int(now - self.start_time),
|
||||
"last_used_seconds": int(now - LAST_USED.get(sig, now)),
|
||||
"memory_mb": 180,
|
||||
"hits": USAGE_COUNT.get(sig, 0),
|
||||
"killable": True
|
||||
})
|
||||
except asyncio.TimeoutError:
|
||||
logger.error("Browser list lock timeout - pool may be locked by janitor")
|
||||
# Return empty list when lock times out to prevent blocking
|
||||
return []
|
||||
|
||||
return browsers
|
||||
|
||||
|
||||
Reference in New Issue
Block a user