feat(docker): add multi-container cluster deployment with CLI management
Add comprehensive Docker cluster orchestration with horizontal scaling support. CLI Commands: - crwl server start/stop/restart/status/scale/logs - Auto-detection: Single (N=1) → Swarm (N>1) → Compose (N>1 fallback) - Support for 1-100 container replicas with zero-downtime scaling Infrastructure: - Nginx load balancing (round-robin API, sticky sessions monitoring) - Redis-based container discovery via heartbeats (30s interval) - Real-time monitoring dashboard with cluster-wide visibility - WebSocket aggregation from all containers Security & Stability Fixes (12 critical issues): - Add timeout protection to browser pool locks (prevent deadlocks) - Implement Redis retry logic with exponential backoff - Add container ID validation (prevent Redis key injection) - Add CLI input sanitization (prevent shell injection) - Add file locking for state management (prevent corruption) - Fix WebSocket resource leaks and connection cleanup - Add graceful degradation and circuit breakers Configuration: - RedisTTLConfig dataclass with environment variable support - Template-based docker-compose.yml and nginx.conf generation - Comprehensive error handling with actionable messages Documentation: - AGENT.md: Complete DevOps context for AI assistants - MULTI_CONTAINER_ARCHITECTURE.md: Technical architecture guide - Reorganized docs into deploy/docker/docs/
This commit is contained in:
@@ -203,4 +203,51 @@ def get_container_memory_percent() -> float:
|
||||
except:
|
||||
# Non-container or unsupported: fallback to host
|
||||
import psutil
|
||||
return psutil.virtual_memory().percent
|
||||
return psutil.virtual_memory().percent
|
||||
|
||||
|
||||
def get_container_id() -> str:
|
||||
"""Get current container ID (hostname in Docker)."""
|
||||
import socket
|
||||
return socket.gethostname()
|
||||
|
||||
|
||||
def detect_deployment_mode() -> tuple[str, list[dict]]:
|
||||
"""Detect if running in single/swarm/compose mode and get container list.
|
||||
|
||||
Returns:
|
||||
(mode, containers) where mode is "single"|"swarm"|"compose"
|
||||
containers is list of {id, hostname, healthy}
|
||||
"""
|
||||
import socket
|
||||
my_hostname = socket.gethostname()
|
||||
|
||||
# Check if we're behind nginx (Compose mode indicator)
|
||||
# In Compose, service name resolves to multiple IPs
|
||||
try:
|
||||
import socket as sock
|
||||
# Try to resolve "crawl4ai" service name (Compose service)
|
||||
try:
|
||||
addrs = sock.getaddrinfo("crawl4ai", None)
|
||||
unique_ips = set(addr[4][0] for addr in addrs)
|
||||
if len(unique_ips) > 1:
|
||||
# Multiple IPs = Compose with replicas
|
||||
containers = [
|
||||
{"id": f"container-{i+1}", "hostname": f"crawl4ai-{i+1}", "healthy": True}
|
||||
for i in range(len(unique_ips))
|
||||
]
|
||||
return "compose", containers
|
||||
except:
|
||||
pass
|
||||
|
||||
# Check for Swarm mode (TODO: needs swarm-specific detection)
|
||||
# For now, if hostname pattern matches swarm, detect it
|
||||
if "." in my_hostname and len(my_hostname.split(".")) > 2:
|
||||
# Swarm hostname format: service.slot.task_id
|
||||
return "swarm", [{"id": my_hostname, "hostname": my_hostname, "healthy": True}]
|
||||
|
||||
except:
|
||||
pass
|
||||
|
||||
# Default: single container
|
||||
return "single", [{"id": my_hostname, "hostname": my_hostname, "healthy": True}]
|
||||
Reference in New Issue
Block a user