Add comprehensive Docker cluster orchestration with horizontal scaling support. CLI Commands: - crwl server start/stop/restart/status/scale/logs - Auto-detection: Single (N=1) → Swarm (N>1) → Compose (N>1 fallback) - Support for 1-100 container replicas with zero-downtime scaling Infrastructure: - Nginx load balancing (round-robin API, sticky sessions monitoring) - Redis-based container discovery via heartbeats (30s interval) - Real-time monitoring dashboard with cluster-wide visibility - WebSocket aggregation from all containers Security & Stability Fixes (12 critical issues): - Add timeout protection to browser pool locks (prevent deadlocks) - Implement Redis retry logic with exponential backoff - Add container ID validation (prevent Redis key injection) - Add CLI input sanitization (prevent shell injection) - Add file locking for state management (prevent corruption) - Fix WebSocket resource leaks and connection cleanup - Add graceful degradation and circuit breakers Configuration: - RedisTTLConfig dataclass with environment variable support - Template-based docker-compose.yml and nginx.conf generation - Comprehensive error handling with actionable messages Documentation: - AGENT.md: Complete DevOps context for AI assistants - MULTI_CONTAINER_ARCHITECTURE.md: Technical architecture guide - Reorganized docs into deploy/docker/docs/
77 lines
1.8 KiB
YAML
77 lines
1.8 KiB
YAML
version: '3.8'
|
|
|
|
services:
|
|
redis:
|
|
image: redis:alpine
|
|
command: redis-server --appendonly yes
|
|
volumes:
|
|
- redis_data:/data
|
|
networks:
|
|
- crawl4ai_net
|
|
restart: unless-stopped
|
|
|
|
crawl4ai:
|
|
image: ${IMAGE:-unclecode/crawl4ai:${TAG:-latest}}
|
|
|
|
# Local build config (used with --build)
|
|
build:
|
|
context: .
|
|
dockerfile: Dockerfile
|
|
args:
|
|
INSTALL_TYPE: ${INSTALL_TYPE:-default}
|
|
ENABLE_GPU: ${ENABLE_GPU:-false}
|
|
|
|
# No ports exposed - access via nginx only
|
|
env_file:
|
|
- .llm.env
|
|
environment:
|
|
- OPENAI_API_KEY=${OPENAI_API_KEY:-}
|
|
- DEEPSEEK_API_KEY=${DEEPSEEK_API_KEY:-}
|
|
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
|
|
- GROQ_API_KEY=${GROQ_API_KEY:-}
|
|
- TOGETHER_API_KEY=${TOGETHER_API_KEY:-}
|
|
- MISTRAL_API_KEY=${MISTRAL_API_KEY:-}
|
|
- GEMINI_API_TOKEN=${GEMINI_API_TOKEN:-}
|
|
- LLM_PROVIDER=${LLM_PROVIDER:-}
|
|
- REDIS_HOST=redis
|
|
- REDIS_PORT=6379
|
|
volumes:
|
|
- /dev/shm:/dev/shm # Chromium performance
|
|
deploy:
|
|
replicas: 3 # Default to 3 replicas (can override with --scale)
|
|
resources:
|
|
limits:
|
|
memory: 4G
|
|
reservations:
|
|
memory: 1G
|
|
restart: unless-stopped
|
|
healthcheck:
|
|
test: ["CMD", "curl", "-f", "http://localhost:11235/health"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
start_period: 40s
|
|
user: "appuser"
|
|
depends_on:
|
|
- redis
|
|
networks:
|
|
- crawl4ai_net
|
|
|
|
nginx:
|
|
image: nginx:alpine
|
|
ports:
|
|
- "11235:80" # Expose port 11235 to host
|
|
volumes:
|
|
- ./crawl4ai/templates/nginx.conf.template:/etc/nginx/nginx.conf:ro
|
|
depends_on:
|
|
- crawl4ai
|
|
networks:
|
|
- crawl4ai_net
|
|
restart: unless-stopped
|
|
|
|
networks:
|
|
crawl4ai_net:
|
|
driver: bridge
|
|
|
|
volumes:
|
|
redis_data: |