#1489 refactor(normalize_url): enhance URL normalization logic and add comprehensive test suite

#1489 refactor(normalize_url): improve query parameter handling and sorting
2025-09-18 18:31:07 +08:00 · 2025-09-17 18:56:45 +08:00
167 changed files with 1271 additions and 48683 deletions
--- a/.githooks/pre-commit
+++ b/.githooks/pre-commit
@@ -1,31 +0,0 @@
 #!/bin/bash
 # Pre-commit hook: Auto-sync cnode files when cnode source is modified
 # Colors
 RED='\033[0;31m'
 GREEN='\033[0;32m'
 YELLOW='\033[1;33m'
 NC='\033[0m'
 # Check if cnode source files are being committed
 CNODE_FILES_CHANGED=$(git diff --cached --name-only | grep -E "deploy/docker/(cnode_cli|server_manager)\.py")
 if [ -n "$CNODE_FILES_CHANGED" ]; then
    echo -e "${YELLOW}🔄 cnode source files modified, auto-syncing to package...${NC}"
    # Run sync script
    if [ -f "deploy/installer/sync-cnode.sh" ]; then
        bash deploy/installer/sync-cnode.sh
        # Stage the synced files
        git add deploy/installer/cnode_pkg/cli.py
        git add deploy/installer/cnode_pkg/server_manager.py
        echo -e "${GREEN}✅ cnode package synced and staged${NC}"
    else
        echo -e "${RED}❌ Error: sync-cnode.sh not found${NC}"
        exit 1
    fi
 fi
 exit 0
--- a/.github/workflows/docker-release.yml
+++ b/.github/workflows/docker-release.yml
@@ -1,81 +0,0 @@
 name: Docker Release
 on:
  release:
    types: [published]
  push:
    tags:
      - 'docker-rebuild-v*'  # Allow manual Docker rebuilds via tags
 jobs:
  docker:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
      - name: Extract version from release or tag
        id: get_version
        run: |
          if [ "${{ github.event_name }}" == "release" ]; then
            # Triggered by release event
            VERSION="${{ github.event.release.tag_name }}"
            VERSION=${VERSION#v}  # Remove 'v' prefix
          else
            # Triggered by docker-rebuild-v* tag
            VERSION=${GITHUB_REF#refs/tags/docker-rebuild-v}
          fi
          echo "VERSION=$VERSION" >> $GITHUB_OUTPUT
          echo "Building Docker images for version: $VERSION"
      - name: Extract major and minor versions
        id: versions
        run: |
          VERSION=${{ steps.get_version.outputs.VERSION }}
          MAJOR=$(echo $VERSION | cut -d. -f1)
          MINOR=$(echo $VERSION | cut -d. -f1-2)
          echo "MAJOR=$MAJOR" >> $GITHUB_OUTPUT
          echo "MINOR=$MINOR" >> $GITHUB_OUTPUT
          echo "Semantic versions - Major: $MAJOR, Minor: $MINOR"
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
      - name: Log in to Docker Hub
        uses: docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}
      - name: Build and push Docker images
        uses: docker/build-push-action@v5
        with:
          context: .
          push: true
          tags: |
            unclecode/crawl4ai:${{ steps.get_version.outputs.VERSION }}
            unclecode/crawl4ai:${{ steps.versions.outputs.MINOR }}
            unclecode/crawl4ai:${{ steps.versions.outputs.MAJOR }}
            unclecode/crawl4ai:latest
          platforms: linux/amd64,linux/arm64
          cache-from: type=gha
          cache-to: type=gha,mode=max
      - name: Summary
        run: |
          echo "## 🐳 Docker Release Complete!" >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY
          echo "### Published Images" >> $GITHUB_STEP_SUMMARY
          echo "- \`unclecode/crawl4ai:${{ steps.get_version.outputs.VERSION }}\`" >> $GITHUB_STEP_SUMMARY
          echo "- \`unclecode/crawl4ai:${{ steps.versions.outputs.MINOR }}\`" >> $GITHUB_STEP_SUMMARY
          echo "- \`unclecode/crawl4ai:${{ steps.versions.outputs.MAJOR }}\`" >> $GITHUB_STEP_SUMMARY
          echo "- \`unclecode/crawl4ai:latest\`" >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY
          echo "### Platforms" >> $GITHUB_STEP_SUMMARY
          echo "- linux/amd64" >> $GITHUB_STEP_SUMMARY
          echo "- linux/arm64" >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY
          echo "### 🚀 Pull Command" >> $GITHUB_STEP_SUMMARY
          echo "\`\`\`bash" >> $GITHUB_STEP_SUMMARY
          echo "docker pull unclecode/crawl4ai:${{ steps.get_version.outputs.VERSION }}" >> $GITHUB_STEP_SUMMARY
          echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
--- a/.github/workflows/docs/ARCHITECTURE.md
+++ b/.github/workflows/docs/ARCHITECTURE.md
@@ -1,917 +0,0 @@
 # Workflow Architecture Documentation
 ## Overview
 This document describes the technical architecture of the split release pipeline for Crawl4AI.
 ---
 ## Architecture Diagram
 ```
 ┌─────────────────────────────────────────────────────────────────┐
 │                         Developer                                │
 │                              │                                   │
 │                              ▼                                   │
 │                    git tag v1.2.3                               │
 │                    git push --tags                              │
 └──────────────────────────────┬──────────────────────────────────┘
                               │
                               ▼
 ┌─────────────────────────────────────────────────────────────────┐
 │                      GitHub Repository                           │
 │                                                                  │
 │  ┌────────────────────────────────────────────────────────┐   │
 │  │                  Tag Event: v1.2.3                      │   │
 │  └────────────────────────────────────────────────────────┘   │
 │                              │                                   │
 │                              ▼                                   │
 │  ┌────────────────────────────────────────────────────────┐   │
 │  │           release.yml (Release Pipeline)               │   │
 │  │  ┌──────────────────────────────────────────────┐     │   │
 │  │  │ 1. Extract Version                            │     │   │
 │  │  │    v1.2.3 → 1.2.3                            │     │   │
 │  │  └──────────────────────────────────────────────┘     │   │
 │  │  ┌──────────────────────────────────────────────┐     │   │
 │  │  │ 2. Validate Version                           │     │   │
 │  │  │    Tag == __version__.py                      │     │   │
 │  │  └──────────────────────────────────────────────┘     │   │
 │  │  ┌──────────────────────────────────────────────┐     │   │
 │  │  │ 3. Build Python Package                       │     │   │
 │  │  │    - Source dist (.tar.gz)                    │     │   │
 │  │  │    - Wheel (.whl)                             │     │   │
 │  │  └──────────────────────────────────────────────┘     │   │
 │  │  ┌──────────────────────────────────────────────┐     │   │
 │  │  │ 4. Upload to PyPI                             │     │   │
 │  │  │    - Authenticate with token                  │     │   │
 │  │  │    - Upload dist/*                            │     │   │
 │  │  └──────────────────────────────────────────────┘     │   │
 │  │  ┌──────────────────────────────────────────────┐     │   │
 │  │  │ 5. Create GitHub Release                      │     │   │
 │  │  │    - Tag: v1.2.3                              │     │   │
 │  │  │    - Body: Install instructions               │     │   │
 │  │  │    - Status: Published                        │     │   │
 │  │  └──────────────────────────────────────────────┘     │   │
 │  └────────────────────────────────────────────────────────┘   │
 │                              │                                   │
 │                              ▼                                   │
 │  ┌────────────────────────────────────────────────────────┐   │
 │  │         Release Event: published (v1.2.3)              │   │
 │  └────────────────────────────────────────────────────────┘   │
 │                              │                                   │
 │                              ▼                                   │
 │  ┌────────────────────────────────────────────────────────┐   │
 │  │         docker-release.yml (Docker Pipeline)           │   │
 │  │  ┌──────────────────────────────────────────────┐     │   │
 │  │  │ 1. Extract Version from Release               │     │   │
 │  │  │    github.event.release.tag_name → 1.2.3     │     │   │
 │  │  └──────────────────────────────────────────────┘     │   │
 │  │  ┌──────────────────────────────────────────────┐     │   │
 │  │  │ 2. Parse Semantic Versions                    │     │   │
 │  │  │    1.2.3 → Major: 1, Minor: 1.2              │     │   │
 │  │  └──────────────────────────────────────────────┘     │   │
 │  │  ┌──────────────────────────────────────────────┐     │   │
 │  │  │ 3. Setup Multi-Arch Build                     │     │   │
 │  │  │    - Docker Buildx                            │     │   │
 │  │  │    - QEMU emulation                           │     │   │
 │  │  └──────────────────────────────────────────────┘     │   │
 │  │  ┌──────────────────────────────────────────────┐     │   │
 │  │  │ 4. Authenticate Docker Hub                    │     │   │
 │  │  │    - Username: DOCKER_USERNAME                │     │   │
 │  │  │    - Token: DOCKER_TOKEN                      │     │   │
 │  │  └──────────────────────────────────────────────┘     │   │
 │  │  ┌──────────────────────────────────────────────┐     │   │
 │  │  │ 5. Build Multi-Arch Images                    │     │   │
 │  │  │    ┌────────────────┬────────────────┐       │     │   │
 │  │  │    │  linux/amd64   │  linux/arm64   │       │     │   │
 │  │  │    └────────────────┴────────────────┘       │     │   │
 │  │  │    Cache: GitHub Actions (type=gha)          │     │   │
 │  │  └──────────────────────────────────────────────┘     │   │
 │  │  ┌──────────────────────────────────────────────┐     │   │
 │  │  │ 6. Push to Docker Hub                         │     │   │
 │  │  │    Tags:                                      │     │   │
 │  │  │    - unclecode/crawl4ai:1.2.3                │     │   │
 │  │  │    - unclecode/crawl4ai:1.2                  │     │   │
 │  │  │    - unclecode/crawl4ai:1                    │     │   │
 │  │  │    - unclecode/crawl4ai:latest               │     │   │
 │  │  └──────────────────────────────────────────────┘     │   │
 │  └────────────────────────────────────────────────────────┘   │
 └─────────────────────────────────────────────────────────────────┘
                               │
                               ▼
 ┌─────────────────────────────────────────────────────────────────┐
 │                     External Services                            │
 │                                                                  │
 │  ┌──────────────┐  ┌──────────────┐  ┌──────────────┐         │
 │  │    PyPI      │  │  Docker Hub  │  │   GitHub     │         │
 │  │              │  │              │  │              │         │
 │  │  crawl4ai    │  │ unclecode/   │  │  Releases    │         │
 │  │  1.2.3       │  │ crawl4ai     │  │  v1.2.3      │         │
 │  └──────────────┘  └──────────────┘  └──────────────┘         │
 └─────────────────────────────────────────────────────────────────┘
 ```
 ---
 ## Component Details
 ### 1. Release Pipeline (release.yml)
 #### Purpose
 Fast publication of Python package and GitHub release.
 #### Input
 - **Trigger**: Git tag matching `v*` (excluding `test-v*`)
 - **Example**: `v1.2.3`
 #### Processing Stages
 ##### Stage 1: Version Extraction
 ```bash
 Input:  refs/tags/v1.2.3
 Output: VERSION=1.2.3
 ```
 **Implementation**:
 ```bash
 TAG_VERSION=${GITHUB_REF#refs/tags/v}  # Remove 'refs/tags/v' prefix
 echo "VERSION=$TAG_VERSION" >> $GITHUB_OUTPUT
 ```
 ##### Stage 2: Version Validation
 ```bash
 Input:  TAG_VERSION=1.2.3
 Check:  crawl4ai/__version__.py contains __version__ = "1.2.3"
 Output: Pass/Fail
 ```
 **Implementation**:
 ```bash
 PACKAGE_VERSION=$(python -c "from crawl4ai.__version__ import __version__; print(__version__)")
 if [ "$TAG_VERSION" != "$PACKAGE_VERSION" ]; then
  exit 1
 fi
 ```
 ##### Stage 3: Package Build
 ```bash
 Input:  Source code + pyproject.toml
 Output: dist/crawl4ai-1.2.3.tar.gz
        dist/crawl4ai-1.2.3-py3-none-any.whl
 ```
 **Implementation**:
 ```bash
 python -m build
 # Uses build backend defined in pyproject.toml
 ```
 ##### Stage 4: PyPI Upload
 ```bash
 Input:  dist/*.{tar.gz,whl}
 Auth:   PYPI_TOKEN
 Output: Package published to PyPI
 ```
 **Implementation**:
 ```bash
 twine upload dist/*
 # Environment:
 #   TWINE_USERNAME: __token__
 #   TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
 ```
 ##### Stage 5: GitHub Release Creation
 ```bash
 Input:  Tag: v1.2.3
        Body: Markdown content
 Output: Published GitHub release
 ```
 **Implementation**:
 ```yaml
 uses: softprops/action-gh-release@v2
 with:
  tag_name: v1.2.3
  name: Release v1.2.3
  body: |
    Installation instructions and changelog
  draft: false
  prerelease: false
 ```
 #### Output
 - **PyPI Package**: https://pypi.org/project/crawl4ai/1.2.3/
 - **GitHub Release**: Published release on repository
 - **Event**: `release.published` (triggers Docker workflow)
 #### Timeline
 ```
 0:00 - Tag pushed
 0:01 - Checkout + Python setup
 0:02 - Version validation
 0:03 - Package build
 0:04 - PyPI upload starts
 0:06 - PyPI upload complete
 0:07 - GitHub release created
 0:08 - Workflow complete
 ```
 ---
 ### 2. Docker Release Pipeline (docker-release.yml)
 #### Purpose
 Build and publish multi-architecture Docker images.
 #### Inputs
 ##### Input 1: Release Event (Automatic)
 ```yaml
 Event: release.published
 Data:  github.event.release.tag_name = "v1.2.3"
 ```
 ##### Input 2: Docker Rebuild Tag (Manual)
 ```yaml
 Tag: docker-rebuild-v1.2.3
 ```
 #### Processing Stages
 ##### Stage 1: Version Detection
 ```bash
 # From release event:
 VERSION = github.event.release.tag_name.strip("v")
 # Result: "1.2.3"
 # From rebuild tag:
 VERSION = GITHUB_REF.replace("refs/tags/docker-rebuild-v", "")
 # Result: "1.2.3"
 ```
 ##### Stage 2: Semantic Version Parsing
 ```bash
 Input:  VERSION=1.2.3
 Output: MAJOR=1
        MINOR=1.2
        PATCH=3 (implicit)
 ```
 **Implementation**:
 ```bash
 MAJOR=$(echo $VERSION | cut -d. -f1)    # Extract first component
 MINOR=$(echo $VERSION | cut -d. -f1-2)  # Extract first two components
 ```
 ##### Stage 3: Multi-Architecture Setup
 ```yaml
 Setup:
  - Docker Buildx (multi-platform builder)
  - QEMU (ARM emulation on x86)
 Platforms:
  - linux/amd64 (x86_64)
  - linux/arm64 (aarch64)
 ```
 **Architecture**:
 ```
 GitHub Runner (linux/amd64)
  ├─ Buildx Builder
  │   ├─ Native: Build linux/amd64 image
  │   └─ QEMU: Emulate ARM to build linux/arm64 image
  └─ Generate manifest list (points to both images)
 ```
 ##### Stage 4: Docker Hub Authentication
 ```bash
 Input:  DOCKER_USERNAME
        DOCKER_TOKEN
 Output: Authenticated Docker client
 ```
 ##### Stage 5: Build with Cache
 ```yaml
 Cache Configuration:
  cache-from: type=gha           # Read from GitHub Actions cache
  cache-to: type=gha,mode=max    # Write all layers
 Cache Key Components:
  - Workflow file path
  - Branch name
  - Architecture (amd64/arm64)
 ```
 **Cache Hierarchy**:
 ```
 Cache Entry: main/docker-release.yml/linux-amd64
  ├─ Layer: sha256:abc123... (FROM python:3.12)
  ├─ Layer: sha256:def456... (RUN apt-get update)
  ├─ Layer: sha256:ghi789... (COPY requirements.txt)
  ├─ Layer: sha256:jkl012... (RUN pip install)
  └─ Layer: sha256:mno345... (COPY . /app)
 Cache Hit/Miss Logic:
  - If layer input unchanged → cache hit → skip build
  - If layer input changed → cache miss → rebuild + all subsequent layers
 ```
 ##### Stage 6: Tag Generation
 ```bash
 Input:  VERSION=1.2.3, MAJOR=1, MINOR=1.2
 Output Tags:
  - unclecode/crawl4ai:1.2.3    (exact version)
  - unclecode/crawl4ai:1.2      (minor version)
  - unclecode/crawl4ai:1        (major version)
  - unclecode/crawl4ai:latest   (latest stable)
 ```
 **Tag Strategy**:
 - All tags point to same image SHA
 - Users can pin to desired stability level
 - Pushing new version updates `1`, `1.2`, and `latest` automatically
 ##### Stage 7: Push to Registry
 ```bash
 For each tag:
  For each platform (amd64, arm64):
    Push image to Docker Hub
 Create manifest list:
  Manifest: unclecode/crawl4ai:1.2.3
    ├─ linux/amd64: sha256:abc...
    └─ linux/arm64: sha256:def...
 Docker CLI automatically selects correct platform on pull
 ```
 #### Output
 - **Docker Images**: 4 tags × 2 platforms = 8 image variants + 4 manifests
 - **Docker Hub**: https://hub.docker.com/r/unclecode/crawl4ai/tags
 #### Timeline
 **Cold Cache (First Build)**:
 ```
 0:00 - Release event received
 0:01 - Checkout + Buildx setup
 0:02 - Docker Hub auth
 0:03 - Start build (amd64)
 0:08 - Complete amd64 build
 0:09 - Start build (arm64)
 0:14 - Complete arm64 build
 0:15 - Generate manifests
 0:16 - Push all tags
 0:17 - Workflow complete
 ```
 **Warm Cache (Code Change Only)**:
 ```
 0:00 - Release event received
 0:01 - Checkout + Buildx setup
 0:02 - Docker Hub auth
 0:03 - Start build (amd64) - cache hit for layers 1-4
 0:04 - Complete amd64 build (only layer 5 rebuilt)
 0:05 - Start build (arm64) - cache hit for layers 1-4
 0:06 - Complete arm64 build (only layer 5 rebuilt)
 0:07 - Generate manifests
 0:08 - Push all tags
 0:09 - Workflow complete
 ```
 ---
 ## Data Flow
 ### Version Information Flow
 ```
 Developer
  │
  ▼
 crawl4ai/__version__.py
  __version__ = "1.2.3"
  │
  ├─► Git Tag
  │     v1.2.3
  │       │
  │       ▼
  │     release.yml
  │       │
  │       ├─► Validation
  │       │     ✓ Match
  │       │
  │       ├─► PyPI Package
  │       │     crawl4ai==1.2.3
  │       │
  │       └─► GitHub Release
  │             v1.2.3
  │               │
  │               ▼
  │           docker-release.yml
  │               │
  │               └─► Docker Tags
  │                     1.2.3, 1.2, 1, latest
  │
  └─► Package Metadata
        pyproject.toml
          version = "1.2.3"
 ```
 ### Secrets Flow
 ```
 GitHub Secrets (Encrypted at Rest)
  │
  ├─► PYPI_TOKEN
  │     │
  │     ▼
  │   release.yml
  │     │
  │     ▼
  │   TWINE_PASSWORD env var (masked in logs)
  │     │
  │     ▼
  │   PyPI API (HTTPS)
  │
  ├─► DOCKER_USERNAME
  │     │
  │     ▼
  │   docker-release.yml
  │     │
  │     ▼
  │   docker/login-action (masked in logs)
  │     │
  │     ▼
  │   Docker Hub API (HTTPS)
  │
  └─► DOCKER_TOKEN
        │
        ▼
      docker-release.yml
        │
        ▼
      docker/login-action (masked in logs)
        │
        ▼
      Docker Hub API (HTTPS)
 ```
 ### Artifact Flow
 ```
 Source Code
  │
  ├─► release.yml
  │     │
  │     ▼
  │   python -m build
  │     │
  │     ├─► crawl4ai-1.2.3.tar.gz
  │     │     │
  │     │     ▼
  │     │   PyPI Storage
  │     │     │
  │     │     ▼
  │     │   pip install crawl4ai
  │     │
  │     └─► crawl4ai-1.2.3-py3-none-any.whl
  │           │
  │           ▼
  │         PyPI Storage
  │           │
  │           ▼
  │         pip install crawl4ai
  │
  └─► docker-release.yml
        │
        ▼
      docker build
        │
        ├─► Image: linux/amd64
        │     │
        │     └─► Docker Hub
        │           unclecode/crawl4ai:1.2.3-amd64
        │
        └─► Image: linux/arm64
              │
              └─► Docker Hub
                    unclecode/crawl4ai:1.2.3-arm64
 ```
 ---
 ## State Machines
 ### Release Pipeline State Machine
 ```
 ┌─────────┐
 │  START  │
 └────┬────┘
     │
     ▼
 ┌──────────────┐
 │ Extract      │
 │ Version      │
 └──────┬───────┘
       │
       ▼
 ┌──────────────┐      ┌─────────┐
 │ Validate     │─────►│ FAILED  │
 │ Version      │ No   │ (Exit 1)│
 └──────┬───────┘      └─────────┘
       │ Yes
       ▼
 ┌──────────────┐
 │ Build        │
 │ Package      │
 └──────┬───────┘
       │
       ▼
 ┌──────────────┐      ┌─────────┐
 │ Upload       │─────►│ FAILED  │
 │ to PyPI      │ Error│ (Exit 1)│
 └──────┬───────┘      └─────────┘
       │ Success
       ▼
 ┌──────────────┐
 │ Create       │
 │ GH Release   │
 └──────┬───────┘
       │
       ▼
 ┌──────────────┐
 │  SUCCESS     │
 │ (Emit Event) │
 └──────────────┘
 ```
 ### Docker Pipeline State Machine
 ```
 ┌─────────┐
 │  START  │
 │ (Event) │
 └────┬────┘
     │
     ▼
 ┌──────────────┐
 │ Detect       │
 │ Version      │
 │ Source       │
 └──────┬───────┘
       │
       ▼
 ┌──────────────┐
 │ Parse        │
 │ Semantic     │
 │ Versions     │
 └──────┬───────┘
       │
       ▼
 ┌──────────────┐      ┌─────────┐
 │ Authenticate │─────►│ FAILED  │
 │ Docker Hub   │ Error│ (Exit 1)│
 └──────┬───────┘      └─────────┘
       │ Success
       ▼
 ┌──────────────┐
 │ Build        │
 │ amd64        │
 └──────┬───────┘
       │
       ▼
 ┌──────────────┐      ┌─────────┐
 │ Build        │─────►│ FAILED  │
 │ arm64        │ Error│ (Exit 1)│
 └──────┬───────┘      └─────────┘
       │ Success
       ▼
 ┌──────────────┐
 │ Push All     │
 │ Tags         │
 └──────┬───────┘
       │
       ▼
 ┌──────────────┐
 │  SUCCESS     │
 └──────────────┘
 ```
 ---
 ## Security Architecture
 ### Threat Model
 #### Threats Mitigated
 1. **Secret Exposure**
   - Mitigation: GitHub Actions secret masking
   - Evidence: Secrets never appear in logs
 2. **Unauthorized Package Upload**
   - Mitigation: Scoped PyPI tokens
   - Evidence: Token limited to `crawl4ai` project
 3. **Man-in-the-Middle**
   - Mitigation: HTTPS for all API calls
   - Evidence: PyPI, Docker Hub, GitHub all use TLS
 4. **Supply Chain Tampering**
   - Mitigation: Immutable artifacts, content checksums
   - Evidence: PyPI stores SHA256, Docker uses content-addressable storage
 #### Trust Boundaries
 ```
 ┌─────────────────────────────────────────┐
 │         Trusted Zone                     │
 │  ┌────────────────────────────────┐    │
 │  │  GitHub Actions Runner         │    │
 │  │  - Ephemeral VM                │    │
 │  │  - Isolated environment        │    │
 │  │  - Access to secrets           │    │
 │  └────────────────────────────────┘    │
 │                │                         │
 │                │ HTTPS (TLS 1.2+)       │
 │                ▼                         │
 └─────────────────────────────────────────┘
                 │
    ┌────────────┼────────────┐
    │            │            │
    ▼            ▼            ▼
 ┌────────┐  ┌─────────┐  ┌──────────┐
 │  PyPI  │  │  Docker │  │  GitHub  │
 │  API   │  │  Hub    │  │  API     │
 └────────┘  └─────────┘  └──────────┘
 External     External     External
  Service      Service      Service
 ```
 ### Secret Management
 #### Secret Lifecycle
 ```
 Creation (Developer)
  │
  ├─► PyPI: Create API token (scoped to project)
  ├─► Docker Hub: Create access token (read/write)
  │
  ▼
 Storage (GitHub)
  │
  ├─► Encrypted at rest (AES-256)
  ├─► Access controlled (repo-scoped)
  │
  ▼
 Usage (Workflow)
  │
  ├─► Injected as env vars
  ├─► Masked in logs (GitHub redacts on output)
  ├─► Never persisted to disk (in-memory only)
  │
  ▼
 Transmission (API Call)
  │
  ├─► HTTPS only
  ├─► TLS 1.2+ with strong ciphers
  │
  ▼
 Rotation (Manual)
  │
  └─► Regenerate on PyPI/Docker Hub
      Update GitHub secret
 ```
 ---
 ## Performance Characteristics
 ### Release Pipeline Performance
 | Metric | Value | Notes |
 |--------|-------|-------|
 | Cold start | ~2-3 min | First run on new runner |
 | Warm start | ~2-3 min | Minimal caching benefit |
 | PyPI upload | ~30-60 sec | Network-bound |
 | Package build | ~30 sec | CPU-bound |
 | Parallelization | None | Sequential by design |
 ### Docker Pipeline Performance
 | Metric | Cold Cache | Warm Cache (code) | Warm Cache (deps) |
 |--------|-----------|-------------------|-------------------|
 | Total time | 10-15 min | 1-2 min | 3-5 min |
 | amd64 build | 5-7 min | 30-60 sec | 1-2 min |
 | arm64 build | 5-7 min | 30-60 sec | 1-2 min |
 | Push time | 1-2 min | 30 sec | 30 sec |
 | Cache hit rate | 0% | 85% | 60% |
 ### Cache Performance Model
 ```python
 def estimate_build_time(changes):
    base_time = 60  # seconds (setup + push)
    if "Dockerfile" in changes:
        return base_time + (10 * 60)  # Full rebuild: ~11 min
    elif "requirements.txt" in changes:
        return base_time + (3 * 60)   # Deps rebuild: ~4 min
    elif any(f.endswith(".py") for f in changes):
        return base_time + 60          # Code only: ~2 min
    else:
        return base_time               # No changes: ~1 min
 ```
 ---
 ## Scalability Considerations
 ### Current Limits
 | Resource | Limit | Impact |
 |----------|-------|--------|
 | Workflow concurrency | 20 (default) | Max 20 releases in parallel |
 | Artifact storage | 500 MB/artifact | PyPI packages small (<10 MB) |
 | Cache storage | 10 GB/repo | Docker layers fit comfortably |
 | Workflow run time | 6 hours | Plenty of headroom |
 ### Scaling Strategies
 #### Horizontal Scaling (Multiple Repos)
 ```
 crawl4ai (main)
  ├─ release.yml
  └─ docker-release.yml
 crawl4ai-plugins (separate)
  ├─ release.yml
  └─ docker-release.yml
 Each repo has independent:
  - Secrets
  - Cache (10 GB each)
  - Concurrency limits (20 each)
 ```
 #### Vertical Scaling (Larger Runners)
 ```yaml
 jobs:
  docker:
    runs-on: ubuntu-latest-8-cores  # GitHub-hosted larger runner
    # 4x faster builds for CPU-bound layers
 ```
 ---
 ## Disaster Recovery
 ### Failure Scenarios
 #### Scenario 1: Release Pipeline Fails
 **Failure Point**: PyPI upload fails (network error)
 **State**:
 - ✓ Version validated
 - ✓ Package built
 - ✗ PyPI upload
 - ✗ GitHub release
 **Recovery**:
 ```bash
 # Manual upload
 twine upload dist/*
 # Retry workflow (re-run from GitHub Actions UI)
 ```
 **Prevention**: Add retry logic to PyPI upload
 #### Scenario 2: Docker Pipeline Fails
 **Failure Point**: ARM build fails (dependency issue)
 **State**:
 - ✓ PyPI published
 - ✓ GitHub release created
 - ✓ amd64 image built
 - ✗ arm64 image build
 **Recovery**:
 ```bash
 # Fix Dockerfile
 git commit -am "fix: ARM build dependency"
 # Trigger rebuild
 git tag docker-rebuild-v1.2.3
 git push origin docker-rebuild-v1.2.3
 ```
 **Impact**: PyPI package available, only Docker ARM users affected
 #### Scenario 3: Partial Release
 **Failure Point**: GitHub release creation fails
 **State**:
 - ✓ PyPI published
 - ✗ GitHub release
 - ✗ Docker images
 **Recovery**:
 ```bash
 # Create release manually
 gh release create v1.2.3 \
  --title "Release v1.2.3" \
  --notes "..."
 # This triggers docker-release.yml automatically
 ```
 ---
 ## Monitoring and Observability
 ### Metrics to Track
 #### Release Pipeline
 - Success rate (target: >99%)
 - Duration (target: <3 min)
 - PyPI upload time (target: <60 sec)
 #### Docker Pipeline
 - Success rate (target: >95%)
 - Duration (target: <15 min cold, <2 min warm)
 - Cache hit rate (target: >80% for code changes)
 ### Alerting
 **Critical Alerts**:
 - Release pipeline failure (blocks release)
 - PyPI authentication failure (expired token)
 **Warning Alerts**:
 - Docker build >15 min (performance degradation)
 - Cache hit rate <50% (cache issue)
 ### Logging
 **GitHub Actions Logs**:
 - Retention: 90 days
 - Downloadable: Yes
 - Searchable: Limited
 **Recommended External Logging**:
 ```yaml
 - name: Send logs to external service
  if: failure()
  run: |
    curl -X POST https://logs.example.com/api/v1/logs \
      -H "Content-Type: application/json" \
      -d "{\"workflow\": \"${{ github.workflow }}\", \"status\": \"failed\"}"
 ```
 ---
 ## Future Enhancements
 ### Planned Improvements
 1. **Automated Changelog Generation**
   - Use conventional commits
   - Generate CHANGELOG.md automatically
 2. **Pre-release Testing**
   - Test builds on `test-v*` tags
   - Upload to TestPyPI
 3. **Notification System**
   - Slack/Discord notifications on release
   - Email on failure
 4. **Performance Optimization**
   - Parallel Docker builds (amd64 + arm64 simultaneously)
   - Persistent runners for better caching
 5. **Enhanced Validation**
   - Smoke tests after PyPI upload
   - Container security scanning
 ---
 ## References
 - [GitHub Actions Architecture](https://docs.github.com/en/actions/learn-github-actions/understanding-github-actions)
 - [Docker Build Cache](https://docs.docker.com/build/cache/)
 - [PyPI API Documentation](https://warehouse.pypa.io/api-reference/)
 ---
 **Last Updated**: 2025-01-21
 **Version**: 2.0
--- a/.github/workflows/docs/README.md
+++ b/.github/workflows/docs/README.md
--- a/.github/workflows/docs/WORKFLOW_REFERENCE.md
+++ b/.github/workflows/docs/WORKFLOW_REFERENCE.md
@@ -1,287 +0,0 @@
 # Workflow Quick Reference
 ## Quick Commands
 ### Standard Release
 ```bash
 # 1. Update version
 vim crawl4ai/__version__.py  # Set to "1.2.3"
 # 2. Commit and tag
 git add crawl4ai/__version__.py
 git commit -m "chore: bump version to 1.2.3"
 git tag v1.2.3
 git push origin main
 git push origin v1.2.3
 # 3. Monitor
 # - PyPI: ~2-3 minutes
 # - Docker: ~1-15 minutes
 ```
 ### Docker Rebuild Only
 ```bash
 git tag docker-rebuild-v1.2.3
 git push origin docker-rebuild-v1.2.3
 ```
 ### Delete Tag (Undo Release)
 ```bash
 # Local
 git tag -d v1.2.3
 # Remote
 git push --delete origin v1.2.3
 # GitHub Release
 gh release delete v1.2.3
 ```
 ---
 ## Workflow Triggers
 ### release.yml
 | Event | Pattern | Example |
 |-------|---------|---------|
 | Tag push | `v*` | `v1.2.3` |
 | Excludes | `test-v*` | `test-v1.2.3` |
 ### docker-release.yml
 | Event | Pattern | Example |
 |-------|---------|---------|
 | Release published | `release.published` | Automatic |
 | Tag push | `docker-rebuild-v*` | `docker-rebuild-v1.2.3` |
 ---
 ## Environment Variables
 ### release.yml
 | Variable | Source | Example |
 |----------|--------|---------|
 | `VERSION` | Git tag | `1.2.3` |
 | `TWINE_USERNAME` | Static | `__token__` |
 | `TWINE_PASSWORD` | Secret | `pypi-Ag...` |
 | `GITHUB_TOKEN` | Auto | `ghp_...` |
 ### docker-release.yml
 | Variable | Source | Example |
 |----------|--------|---------|
 | `VERSION` | Release/Tag | `1.2.3` |
 | `MAJOR` | Computed | `1` |
 | `MINOR` | Computed | `1.2` |
 | `DOCKER_USERNAME` | Secret | `unclecode` |
 | `DOCKER_TOKEN` | Secret | `dckr_pat_...` |
 ---
 ## Docker Tags Generated
 | Version | Tags Created |
 |---------|-------------|
 | v1.0.0 | `1.0.0`, `1.0`, `1`, `latest` |
 | v1.1.0 | `1.1.0`, `1.1`, `1`, `latest` |
 | v1.2.3 | `1.2.3`, `1.2`, `1`, `latest` |
 | v2.0.0 | `2.0.0`, `2.0`, `2`, `latest` |
 ---
 ## Workflow Outputs
 ### release.yml
 | Output | Location | Time |
 |--------|----------|------|
 | PyPI Package | https://pypi.org/project/crawl4ai/ | ~2-3 min |
 | GitHub Release | Repository → Releases | ~2-3 min |
 | Workflow Summary | Actions → Run → Summary | Immediate |
 ### docker-release.yml
 | Output | Location | Time |
 |--------|----------|------|
 | Docker Images | https://hub.docker.com/r/unclecode/crawl4ai | ~1-15 min |
 | Workflow Summary | Actions → Run → Summary | Immediate |
 ---
 ## Common Issues
 | Issue | Solution |
 |-------|----------|
 | Version mismatch | Update `crawl4ai/__version__.py` to match tag |
 | PyPI 403 Forbidden | Check `PYPI_TOKEN` secret |
 | PyPI 400 File exists | Version already published, increment version |
 | Docker auth failed | Regenerate `DOCKER_TOKEN` |
 | Docker build timeout | Check Dockerfile, review build logs |
 | Cache not working | First build on branch always cold |
 ---
 ## Secrets Checklist
 - [ ] `PYPI_TOKEN` - PyPI API token (project or account scope)
 - [ ] `DOCKER_USERNAME` - Docker Hub username
 - [ ] `DOCKER_TOKEN` - Docker Hub access token (read/write)
 - [ ] `GITHUB_TOKEN` - Auto-provided (no action needed)
 ---
 ## Workflow Dependencies
 ### release.yml Dependencies
 ```yaml
 Python: 3.12
 Actions:
  - actions/checkout@v4
  - actions/setup-python@v5
  - softprops/action-gh-release@v2
 PyPI Packages:
  - build
  - twine
 ```
 ### docker-release.yml Dependencies
 ```yaml
 Actions:
  - actions/checkout@v4
  - docker/setup-buildx-action@v3
  - docker/login-action@v3
  - docker/build-push-action@v5
 Docker:
  - Buildx
  - QEMU (for multi-arch)
 ```
 ---
 ## Cache Information
 ### Type
 - GitHub Actions Cache (`type=gha`)
 ### Storage
 - **Limit**: 10GB per repository
 - **Retention**: 7 days for unused entries
 - **Cleanup**: Automatic LRU eviction
 ### Performance
 | Scenario | Cache Hit | Build Time |
 |----------|-----------|------------|
 | First build | 0% | 10-15 min |
 | Code change only | 85% | 1-2 min |
 | Dependency update | 60% | 3-5 min |
 | No changes | 100% | 30-60 sec |
 ---
 ## Build Platforms
 | Platform | Architecture | Devices |
 |----------|--------------|---------|
 | linux/amd64 | x86_64 | Intel/AMD servers, AWS EC2, GCP |
 | linux/arm64 | aarch64 | Apple Silicon, AWS Graviton, Raspberry Pi |
 ---
 ## Version Validation
 ### Pre-Tag Checklist
 ```bash
 # Check current version
 python -c "from crawl4ai.__version__ import __version__; print(__version__)"
 # Verify it matches intended tag
 # If tag is v1.2.3, version should be "1.2.3"
 ```
 ### Post-Release Verification
 ```bash
 # PyPI
 pip install crawl4ai==1.2.3
 python -c "import crawl4ai; print(crawl4ai.__version__)"
 # Docker
 docker pull unclecode/crawl4ai:1.2.3
 docker run unclecode/crawl4ai:1.2.3 python -c "import crawl4ai; print(crawl4ai.__version__)"
 ```
 ---
 ## Monitoring URLs
 | Service | URL |
 |---------|-----|
 | GitHub Actions | `https://github.com/{owner}/{repo}/actions` |
 | PyPI Project | `https://pypi.org/project/crawl4ai/` |
 | Docker Hub | `https://hub.docker.com/r/unclecode/crawl4ai` |
 | GitHub Releases | `https://github.com/{owner}/{repo}/releases` |
 ---
 ## Rollback Strategy
 ### PyPI (Cannot Delete)
 ```bash
 # Increment patch version
 git tag v1.2.4
 git push origin v1.2.4
 ```
 ### Docker (Can Overwrite)
 ```bash
 # Rebuild with fix
 git tag docker-rebuild-v1.2.3
 git push origin docker-rebuild-v1.2.3
 ```
 ### GitHub Release
 ```bash
 # Delete release
 gh release delete v1.2.3
 # Delete tag
 git push --delete origin v1.2.3
 ```
 ---
 ## Status Badge Markdown
 ```markdown
 [![Release Pipeline](https://github.com/{owner}/{repo}/actions/workflows/release.yml/badge.svg)](https://github.com/{owner}/{repo}/actions/workflows/release.yml)
 [![Docker Release](https://github.com/{owner}/{repo}/actions/workflows/docker-release.yml/badge.svg)](https://github.com/{owner}/{repo}/actions/workflows/docker-release.yml)
 ```
 ---
 ## Timeline Example
 ```
 0:00 - Push tag v1.2.3
 0:01 - release.yml starts
 0:02 - Version validation passes
 0:03 - Package built
 0:04 - PyPI upload starts
 0:06 - PyPI upload complete ✓
 0:07 - GitHub release created ✓
 0:08 - release.yml complete
 0:08 - docker-release.yml triggered
 0:10 - Docker build starts
 0:12 - amd64 image built (cache hit)
 0:14 - arm64 image built (cache hit)
 0:15 - Images pushed to Docker Hub ✓
 0:16 - docker-release.yml complete
 Total: ~16 minutes
 Critical path (PyPI + GitHub): ~8 minutes
 ```
 ---
 ## Contact
 For workflow issues:
 1. Check Actions tab for logs
 2. Review this reference
 3. See [README.md](./README.md) for detailed docs
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -66,6 +66,36 @@ jobs:
          twine upload dist/*
          echo "✅ Package uploaded to https://pypi.org/project/crawl4ai/"
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
      - name: Log in to Docker Hub
        uses: docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}
      - name: Extract major and minor versions
        id: versions
        run: |
          VERSION=${{ steps.get_version.outputs.VERSION }}
          MAJOR=$(echo $VERSION | cut -d. -f1)
          MINOR=$(echo $VERSION | cut -d. -f1-2)
          echo "MAJOR=$MAJOR" >> $GITHUB_OUTPUT
          echo "MINOR=$MINOR" >> $GITHUB_OUTPUT
      - name: Build and push Docker images
        uses: docker/build-push-action@v5
        with:
          context: .
          push: true
          tags: |
            unclecode/crawl4ai:${{ steps.get_version.outputs.VERSION }}
            unclecode/crawl4ai:${{ steps.versions.outputs.MINOR }}
            unclecode/crawl4ai:${{ steps.versions.outputs.MAJOR }}
            unclecode/crawl4ai:latest
          platforms: linux/amd64,linux/arm64
      - name: Create GitHub Release
        uses: softprops/action-gh-release@v2
        with:
@@ -87,9 +117,6 @@ jobs:
            docker pull unclecode/crawl4ai:latest
            ```
            **Note:** Docker images are being built and will be available shortly.
            Check the [Docker Release workflow](https://github.com/${{ github.repository }}/actions/workflows/docker-release.yml) for build status.
            ### 📝 What's Changed
            See [CHANGELOG.md](https://github.com/${{ github.repository }}/blob/main/CHANGELOG.md) for details.
          draft: false
@@ -105,9 +132,11 @@ jobs:
          echo "- URL: https://pypi.org/project/crawl4ai/" >> $GITHUB_STEP_SUMMARY
          echo "- Install: \`pip install crawl4ai==${{ steps.get_version.outputs.VERSION }}\`" >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY
          echo "### 📋 GitHub Release" >> $GITHUB_STEP_SUMMARY
          echo "- https://github.com/${{ github.repository }}/releases/tag/v${{ steps.get_version.outputs.VERSION }}" >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY
          echo "### 🐳 Docker Images" >> $GITHUB_STEP_SUMMARY
-          echo "Docker images are being built in a separate workflow." >> $GITHUB_STEP_SUMMARY
+          echo "- \`unclecode/crawl4ai:${{ steps.get_version.outputs.VERSION }}\`" >> $GITHUB_STEP_SUMMARY
-          echo "Check: https://github.com/${{ github.repository }}/actions/workflows/docker-release.yml" >> $GITHUB_STEP_SUMMARY
+          echo "- \`unclecode/crawl4ai:${{ steps.versions.outputs.MINOR }}\`" >> $GITHUB_STEP_SUMMARY
          echo "- \`unclecode/crawl4ai:${{ steps.versions.outputs.MAJOR }}\`" >> $GITHUB_STEP_SUMMARY
          echo "- \`unclecode/crawl4ai:latest\`" >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY
          echo "### 📋 GitHub Release" >> $GITHUB_STEP_SUMMARY
          echo "https://github.com/${{ github.repository }}/releases/tag/v${{ steps.get_version.outputs.VERSION }}" >> $GITHUB_STEP_SUMMARY
--- a/.github/workflows/release.yml.backup
+++ b/.github/workflows/release.yml.backup
@@ -1,142 +0,0 @@
 name: Release Pipeline
 on:
  push:
    tags:
      - 'v*'
      - '!test-v*'  # Exclude test tags
 jobs:
  release:
    runs-on: ubuntu-latest
    permissions:
      contents: write  # Required for creating releases
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: '3.12'
      - name: Extract version from tag
        id: get_version
        run: |
          TAG_VERSION=${GITHUB_REF#refs/tags/v}
          echo "VERSION=$TAG_VERSION" >> $GITHUB_OUTPUT
          echo "Releasing version: $TAG_VERSION"
      - name: Install package dependencies
        run: |
          pip install -e .
      - name: Check version consistency
        run: |
          TAG_VERSION=${{ steps.get_version.outputs.VERSION }}
          PACKAGE_VERSION=$(python -c "from crawl4ai.__version__ import __version__; print(__version__)")
          echo "Tag version: $TAG_VERSION"
          echo "Package version: $PACKAGE_VERSION"
          if [ "$TAG_VERSION" != "$PACKAGE_VERSION" ]; then
            echo "❌ Version mismatch! Tag: $TAG_VERSION, Package: $PACKAGE_VERSION"
            echo "Please update crawl4ai/__version__.py to match the tag version"
            exit 1
          fi
          echo "✅ Version check passed: $TAG_VERSION"
      - name: Install build dependencies
        run: |
          python -m pip install --upgrade pip
          pip install build twine
      - name: Build package
        run: python -m build
      - name: Check package
        run: twine check dist/*
      - name: Upload to PyPI
        env:
          TWINE_USERNAME: __token__
          TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
        run: |
          echo "📦 Uploading to PyPI..."
          twine upload dist/*
          echo "✅ Package uploaded to https://pypi.org/project/crawl4ai/"
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
      - name: Log in to Docker Hub
        uses: docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}
      - name: Extract major and minor versions
        id: versions
        run: |
          VERSION=${{ steps.get_version.outputs.VERSION }}
          MAJOR=$(echo $VERSION | cut -d. -f1)
          MINOR=$(echo $VERSION | cut -d. -f1-2)
          echo "MAJOR=$MAJOR" >> $GITHUB_OUTPUT
          echo "MINOR=$MINOR" >> $GITHUB_OUTPUT
      - name: Build and push Docker images
        uses: docker/build-push-action@v5
        with:
          context: .
          push: true
          tags: |
            unclecode/crawl4ai:${{ steps.get_version.outputs.VERSION }}
            unclecode/crawl4ai:${{ steps.versions.outputs.MINOR }}
            unclecode/crawl4ai:${{ steps.versions.outputs.MAJOR }}
            unclecode/crawl4ai:latest
          platforms: linux/amd64,linux/arm64
      - name: Create GitHub Release
        uses: softprops/action-gh-release@v2
        with:
          tag_name: v${{ steps.get_version.outputs.VERSION }}
          name: Release v${{ steps.get_version.outputs.VERSION }}
          body: |
            ## 🎉 Crawl4AI v${{ steps.get_version.outputs.VERSION }} Released!
            ### 📦 Installation
            **PyPI:**
            ```bash
            pip install crawl4ai==${{ steps.get_version.outputs.VERSION }}
            ```
            **Docker:**
            ```bash
            docker pull unclecode/crawl4ai:${{ steps.get_version.outputs.VERSION }}
            docker pull unclecode/crawl4ai:latest
            ```
            ### 📝 What's Changed
            See [CHANGELOG.md](https://github.com/${{ github.repository }}/blob/main/CHANGELOG.md) for details.
          draft: false
          prerelease: false
          token: ${{ secrets.GITHUB_TOKEN }}
      - name: Summary
        run: |
          echo "## 🚀 Release Complete!" >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY
          echo "### 📦 PyPI Package" >> $GITHUB_STEP_SUMMARY
          echo "- Version: ${{ steps.get_version.outputs.VERSION }}" >> $GITHUB_STEP_SUMMARY
          echo "- URL: https://pypi.org/project/crawl4ai/" >> $GITHUB_STEP_SUMMARY
          echo "- Install: \`pip install crawl4ai==${{ steps.get_version.outputs.VERSION }}\`" >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY
          echo "### 🐳 Docker Images" >> $GITHUB_STEP_SUMMARY
          echo "- \`unclecode/crawl4ai:${{ steps.get_version.outputs.VERSION }}\`" >> $GITHUB_STEP_SUMMARY
          echo "- \`unclecode/crawl4ai:${{ steps.versions.outputs.MINOR }}\`" >> $GITHUB_STEP_SUMMARY
          echo "- \`unclecode/crawl4ai:${{ steps.versions.outputs.MAJOR }}\`" >> $GITHUB_STEP_SUMMARY
          echo "- \`unclecode/crawl4ai:latest\`" >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY
          echo "### 📋 GitHub Release" >> $GITHUB_STEP_SUMMARY
          echo "https://github.com/${{ github.repository }}/releases/tag/v${{ steps.get_version.outputs.VERSION }}" >> $GITHUB_STEP_SUMMARY
--- a/.gitignore
+++ b/.gitignore
@@ -1,13 +1,6 @@
 # Scripts folder (private tools)
 .scripts/
 # Database files
 *.db
 # Environment files
 .env
 .env.local
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
@@ -185,8 +178,7 @@ Crawl4AI.egg-info/
 requirements0.txt
 a.txt
-# Ignore shell scripts globally, but allow test scripts
+*.sh
 # *.sh
 .idea
 docs/examples/.chainlit/
 docs/examples/.chainlit/*
@@ -267,8 +259,6 @@ continue_config.json
 .llm.env
 .private/
 .claude/
 CLAUDE_MONITOR.md
 CLAUDE.md
@@ -281,17 +271,3 @@ docs/**/data
 docs/apps/linkdin/debug*/
 docs/apps/linkdin/samples/insights/*
 scripts/
 # Databse files
 *.sqlite3
 *.sqlite3-journal
 *.db-journal
 *.db-wal
 *.db-shm
 *.db
 *.rdb
 *.ldb
 .context/
--- a/2
+++ b/2
@@ -1,7 +1,7 @@
 FROM python:3.12-slim-bookworm AS build
 # C4ai version
-ARG C4AI_VER=0.7.6
+ARG C4AI_VER=0.7.0-r1
 ENV C4AI_VERSION=$C4AI_VER
 LABEL c4ai.version=$C4AI_VER
--- a/README.md
+++ b/README.md
@@ -27,13 +27,11 @@
 Crawl4AI turns the web into clean, LLM ready Markdown for RAG, agents, and data pipelines. Fast, controllable, battle tested by a 50k+ star community.
-[✨ Check out latest update v0.7.6](#-recent-updates)
+[✨ Check out latest update v0.7.4](#-recent-updates)
-✨ **New in v0.7.6**: Complete Webhook Infrastructure for Docker Job Queue API! Real-time notifications for both `/crawl/job` and `/llm/job` endpoints with exponential backoff retry, custom headers, and flexible delivery modes. No more polling! [Release notes →](https://github.com/unclecode/crawl4ai/blob/main/docs/blog/release-v0.7.6.md)
+✨ New in v0.7.4: Revolutionary LLM Table Extraction with intelligent chunking, enhanced concurrency fixes, memory management refactor, and critical stability improvements. [Release notes →](https://github.com/unclecode/crawl4ai/blob/main/docs/blog/release-v0.7.4.md)
-✨ Recent v0.7.5: Docker Hooks System with function-based API for pipeline customization, Enhanced LLM Integration with custom providers, HTTPS Preservation, and multiple community-reported bug fixes. [Release notes →](https://github.com/unclecode/crawl4ai/blob/main/docs/blog/release-v0.7.5.md)
+✨ Recent v0.7.3: Undetected Browser Support, Multi-URL Configurations, Memory Monitoring, Enhanced Table Extraction, GitHub Sponsors. [Release notes →](https://github.com/unclecode/crawl4ai/blob/main/docs/blog/release-v0.7.3.md)
 ✨ Previous v0.7.4: Revolutionary LLM Table Extraction with intelligent chunking, enhanced concurrency fixes, memory management refactor, and critical stability improvements. [Release notes →](https://github.com/unclecode/crawl4ai/blob/main/docs/blog/release-v0.7.4.md)
 <details>
  <summary>🤓 <strong>My Personal Story</strong></summary>
@@ -179,7 +177,7 @@ No rate-limited APIs. No lock-in. Build and own your data pipeline with direct g
 - 📸 **Screenshots**: Capture page screenshots during crawling for debugging or analysis.
 - 📂 **Raw Data Crawling**: Directly process raw HTML (`raw:`) or local files (`file://`).
 - 🔗 **Comprehensive Link Extraction**: Extracts internal, external links, and embedded iframe content.
- 🛠️ **Customizable Hooks**: Define hooks at every step to customize crawling behavior (supports both string and function-based APIs).
+- 🛠️ **Customizable Hooks**: Define hooks at every step to customize crawling behavior.
 - 💾 **Caching**: Cache data for improved speed and to avoid redundant fetches.
 - 📄 **Metadata Extraction**: Retrieve structured metadata from web pages.
 - 📡 **IFrame Content Extraction**: Seamless extraction from embedded iframe content.
@@ -546,54 +544,6 @@ async def test_news_crawl():
 ## ✨ Recent Updates
 <details>
 <summary><strong>Version 0.7.5 Release Highlights - The Docker Hooks & Security Update</strong></summary>
 - **🔧 Docker Hooks System**: Complete pipeline customization with user-provided Python functions at 8 key points
 - **✨ Function-Based Hooks API (NEW)**: Write hooks as regular Python functions with full IDE support:
  ```python
  from crawl4ai import hooks_to_string
  from crawl4ai.docker_client import Crawl4aiDockerClient
  # Define hooks as regular Python functions
  async def on_page_context_created(page, context, **kwargs):
      """Block images to speed up crawling"""
      await context.route("**/*.{png,jpg,jpeg,gif,webp}", lambda route: route.abort())
      await page.set_viewport_size({"width": 1920, "height": 1080})
      return page
  async def before_goto(page, context, url, **kwargs):
      """Add custom headers"""
      await page.set_extra_http_headers({'X-Crawl4AI': 'v0.7.5'})
      return page
  # Option 1: Use hooks_to_string() utility for REST API
  hooks_code = hooks_to_string({
      "on_page_context_created": on_page_context_created,
      "before_goto": before_goto
  })
  # Option 2: Docker client with automatic conversion (Recommended)
  client = Crawl4aiDockerClient(base_url="http://localhost:11235")
  results = await client.crawl(
      urls=["https://httpbin.org/html"],
      hooks={
          "on_page_context_created": on_page_context_created,
          "before_goto": before_goto
      }
  )
  # ✓ Full IDE support, type checking, and reusability!
  ```
 - **🤖 Enhanced LLM Integration**: Custom providers with temperature control and base_url configuration
 - **🔒 HTTPS Preservation**: Secure internal link handling with `preserve_https_for_internal_links=True`
 - **🐍 Python 3.10+ Support**: Modern language features and enhanced performance
 - **🛠️ Bug Fixes**: Resolved multiple community-reported issues including URL processing, JWT authentication, and proxy configuration
 [Full v0.7.5 Release Notes →](https://github.com/unclecode/crawl4ai/blob/main/docs/blog/release-v0.7.5.md)
 </details>
 <details>
 <summary><strong>Version 0.7.4 Release Highlights - The Intelligent Table Extraction & Performance Update</strong></summary>
@@ -969,36 +919,6 @@ We envision a future where AI is powered by real human knowledge, ensuring data
 For more details, see our [full mission statement](./MISSION.md).
 </details>
 ## 🌟 Current Sponsors
 ### 🏢 Enterprise Sponsors & Partners
 Our enterprise sponsors and technology partners help scale Crawl4AI to power production-grade data pipelines.
 | Company | About | Sponsorship Tier |
 |------|------|----------------------------|
 | <a href="https://dashboard.capsolver.com/passport/register?inviteCode=ESVSECTX5Q23" target="_blank"><picture><source width="120" media="(prefers-color-scheme: dark)" srcset="https://docs.crawl4ai.com/uploads/sponsors/20251013045338_72a71fa4ee4d2f40.png"><source width="120" media="(prefers-color-scheme: light)" srcset="https://www.capsolver.com/assets/images/logo-text.png"><img alt="Capsolver" src="https://www.capsolver.com/assets/images/logo-text.png"></picture></a> | AI-powered Captcha solving service. Supports all major Captcha types, including reCAPTCHA, Cloudflare, and more | 🥈 Silver |
 | <a href="https://kipo.ai" target="_blank"><img src="https://docs.crawl4ai.com/uploads/sponsors/20251013045751_2d54f57f117c651e.png" alt="DataSync" width="120"/></a> | Helps engineers and buyers find, compare, and source electronic & industrial parts in seconds, with specs, pricing, lead times & alternatives.| 🥇 Gold |
 | <a href="https://www.kidocode.com/" target="_blank"><img src="https://docs.crawl4ai.com/uploads/sponsors/20251013045045_bb8dace3f0440d65.svg" alt="Kidocode" width="120"/><p align="center">KidoCode</p></a> | Kidocode is a hybrid technology and entrepreneurship school for kids aged 5–18, offering both online and on-campus education. | 🥇 Gold |
 | <a href="https://www.alephnull.sg/" target="_blank"><img src="https://docs.crawl4ai.com/uploads/sponsors/20251013050323_a9e8e8c4c3650421.svg" alt="Aleph null" width="120"/></a> | Singapore-based  Aleph Null is Asia’s leading edtech hub, dedicated to student-centric, AI-driven education—empowering learners with the tools to thrive in a fast-changing world. | 🥇 Gold |
 ### 🧑‍🤝 Individual Sponsors
 A heartfelt thanks to our individual supporters! Every contribution helps us keep our opensource mission alive and thriving!
 <p align="left">
  <a href="https://github.com/hafezparast"><img src="https://avatars.githubusercontent.com/u/14273305?s=60&v=4" style="border-radius:50%;" width="64px;"/></a>
  <a href="https://github.com/ntohidi"><img src="https://avatars.githubusercontent.com/u/17140097?s=60&v=4" style="border-radius:50%;"width="64px;"/></a>
  <a href="https://github.com/Sjoeborg"><img src="https://avatars.githubusercontent.com/u/17451310?s=60&v=4" style="border-radius:50%;"width="64px;"/></a>
  <a href="https://github.com/romek-rozen"><img src="https://avatars.githubusercontent.com/u/30595969?s=60&v=4" style="border-radius:50%;"width="64px;"/></a>
  <a href="https://github.com/Kourosh-Kiyani"><img src="https://avatars.githubusercontent.com/u/34105600?s=60&v=4" style="border-radius:50%;"width="64px;"/></a>
  <a href="https://github.com/Etherdrake"><img src="https://avatars.githubusercontent.com/u/67021215?s=60&v=4" style="border-radius:50%;"width="64px;"/></a>
  <a href="https://github.com/shaman247"><img src="https://avatars.githubusercontent.com/u/211010067?s=60&v=4" style="border-radius:50%;"width="64px;"/></a>
  <a href="https://github.com/work-flow-manager"><img src="https://avatars.githubusercontent.com/u/217665461?s=60&v=4" style="border-radius:50%;"width="64px;"/></a>
 </p>
 > Want to join them? [Sponsor Crawl4AI →](https://github.com/sponsors/unclecode)
 ## Star History
 [![Star History Chart](https://api.star-history.com/svg?repos=unclecode/crawl4ai&type=Date)](https://star-history.com/#unclecode/crawl4ai&Date)
--- a/crawl4ai/init.py
+++ b/crawl4ai/init.py
@@ -103,8 +103,7 @@ from .browser_adapter import (
 from .utils import (
    start_colab_display_server,
-    setup_colab_environment,
+    setup_colab_environment
    hooks_to_string
 )
 __all__ = [
@@ -184,7 +183,6 @@ __all__ = [
    "ProxyConfig",
    "start_colab_display_server",
    "setup_colab_environment",
    "hooks_to_string",
    # C4A Script additions
    "c4a_compile",
    "c4a_validate", 
--- a/crawl4ai/version.py
+++ b/crawl4ai/version.py
@@ -1,7 +1,7 @@
 # crawl4ai/__version__.py
 # This is the version that will be used for stable releases
-__version__ = "0.7.6"
+__version__ = "0.7.4"
 # For nightly builds, this gets set during build process
 __nightly_version__ = None
--- a/crawl4ai/async_dispatcher.py
+++ b/crawl4ai/async_dispatcher.py
@@ -456,6 +456,8 @@ class MemoryAdaptiveDispatcher(BaseDispatcher):
                # Update priorities for waiting tasks if needed
                await self._update_queue_priorities()
            return results
        except Exception as e:
            if self.monitor:
                self.monitor.update_memory_status(f"QUEUE_ERROR: {str(e)}")                
@@ -465,7 +467,6 @@ class MemoryAdaptiveDispatcher(BaseDispatcher):
            memory_monitor.cancel()
            if self.monitor:
                self.monitor.stop()
            return results
    async def _update_queue_priorities(self):
        """Periodically update priorities of items in the queue to prevent starvation"""
--- a/crawl4ai/browser_adapter.py
+++ b/crawl4ai/browser_adapter.py
@@ -148,134 +148,6 @@ class PlaywrightAdapter(BrowserAdapter):
        return Page, Error, PlaywrightTimeoutError
 class StealthAdapter(BrowserAdapter):
    """Adapter for Playwright with stealth features using playwright_stealth"""
    def __init__(self):
        self._console_script_injected = {}
        self._stealth_available = self._check_stealth_availability()
    def _check_stealth_availability(self) -> bool:
        """Check if playwright_stealth is available and get the correct function"""
        try:
            from playwright_stealth import stealth_async
            self._stealth_function = stealth_async
            return True
        except ImportError:
            try:
                from playwright_stealth import stealth_sync
                self._stealth_function = stealth_sync
                return True
            except ImportError:
                self._stealth_function = None
                return False
    async def apply_stealth(self, page: Page):
        """Apply stealth to a page if available"""
        if self._stealth_available and self._stealth_function:
            try:
                if hasattr(self._stealth_function, '__call__'):
                    if 'async' in getattr(self._stealth_function, '__name__', ''):
                        await self._stealth_function(page)
                    else:
                        self._stealth_function(page)
            except Exception as e:
                # Fail silently or log error depending on requirements
                pass
    async def evaluate(self, page: Page, expression: str, arg: Any = None) -> Any:
        """Standard Playwright evaluate with stealth applied"""
        if arg is not None:
            return await page.evaluate(expression, arg)
        return await page.evaluate(expression)
    async def setup_console_capture(self, page: Page, captured_console: List[Dict]) -> Optional[Callable]:
        """Setup console capture using Playwright's event system with stealth"""
        # Apply stealth to the page first
        await self.apply_stealth(page)
        def handle_console_capture(msg):
            try:
                message_type = "unknown"
                try:
                    message_type = msg.type
                except:
                    pass
                message_text = "unknown"
                try:
                    message_text = msg.text
                except:
                    pass
                entry = {
                    "type": message_type,
                    "text": message_text,
                    "timestamp": time.time()
                }
                captured_console.append(entry)
            except Exception as e:
                captured_console.append({
                    "type": "console_capture_error",
                    "error": str(e),
                    "timestamp": time.time()
                })
        page.on("console", handle_console_capture)
        return handle_console_capture
    async def setup_error_capture(self, page: Page, captured_console: List[Dict]) -> Optional[Callable]:
        """Setup error capture using Playwright's event system"""
        def handle_pageerror_capture(err):
            try:
                error_message = "Unknown error"
                try:
                    error_message = err.message
                except:
                    pass
                error_stack = ""
                try:
                    error_stack = err.stack
                except:
                    pass
                captured_console.append({
                    "type": "error",
                    "text": error_message,
                    "stack": error_stack,
                    "timestamp": time.time()
                })
            except Exception as e:
                captured_console.append({
                    "type": "pageerror_capture_error",
                    "error": str(e),
                    "timestamp": time.time()
                })
        page.on("pageerror", handle_pageerror_capture)
        return handle_pageerror_capture
    async def retrieve_console_messages(self, page: Page) -> List[Dict]:
        """Not needed for Playwright - messages are captured via events"""
        return []
    async def cleanup_console_capture(self, page: Page, handle_console: Optional[Callable], handle_error: Optional[Callable]):
        """Remove event listeners"""
        if handle_console:
            page.remove_listener("console", handle_console)
        if handle_error:
            page.remove_listener("pageerror", handle_error)
    def get_imports(self) -> tuple:
        """Return Playwright imports"""
        from playwright.async_api import Page, Error
        from playwright.async_api import TimeoutError as PlaywrightTimeoutError
        return Page, Error, PlaywrightTimeoutError
 class UndetectedAdapter(BrowserAdapter):
    """Adapter for undetected browser automation with stealth features"""
--- a/crawl4ai/browser_manager.py
+++ b/crawl4ai/browser_manager.py
@@ -614,11 +614,9 @@ class BrowserManager:
        # for all racers). Prevents 'Target page/context closed' errors.
        self._page_lock = asyncio.Lock()
-        # Stealth adapter for stealth mode
+        # Stealth-related attributes
-        self._stealth_adapter = None
+        self._stealth_instance = None
-        if self.config.enable_stealth and not self.use_undetected:
+        self._stealth_cm = None 
            from .browser_adapter import StealthAdapter
            self._stealth_adapter = StealthAdapter()
        # Initialize ManagedBrowser if needed
        if self.config.use_managed_browser:
@@ -652,8 +650,16 @@ class BrowserManager:
        else:
            from playwright.async_api import async_playwright
-        # Initialize playwright
+        # Initialize playwright with or without stealth
-        self.playwright = await async_playwright().start()
+        if self.config.enable_stealth and not self.use_undetected:
            # Import stealth only when needed
            from playwright_stealth import Stealth
            # Use the recommended stealth wrapper approach
            self._stealth_instance = Stealth()
            self._stealth_cm = self._stealth_instance.use_async(async_playwright())
            self.playwright = await self._stealth_cm.__aenter__()
        else:
            self.playwright = await async_playwright().start()
        if self.config.cdp_url or self.config.use_managed_browser:
            self.config.use_managed_browser = True
@@ -1003,19 +1009,6 @@ class BrowserManager:
        signature_hash = hashlib.sha256(signature_json.encode("utf-8")).hexdigest()
        return signature_hash
    async def _apply_stealth_to_page(self, page):
        """Apply stealth to a page if stealth mode is enabled"""
        if self._stealth_adapter:
            try:
                await self._stealth_adapter.apply_stealth(page)
            except Exception as e:
                if self.logger:
                    self.logger.warning(
                        message="Failed to apply stealth to page: {error}",
                        tag="STEALTH",
                        params={"error": str(e)}
                    )
    async def get_page(self, crawlerRunConfig: CrawlerRunConfig):
        """
        Get a page for the given session ID, creating a new one if needed.
@@ -1045,7 +1038,6 @@ class BrowserManager:
                # See GH-1198: context.pages can be empty under races
                async with self._page_lock:
                    page = await ctx.new_page()
                await self._apply_stealth_to_page(page)
            else:
                context = self.default_context
                pages = context.pages
@@ -1062,7 +1054,6 @@ class BrowserManager:
                                page = pages[0]
                            else:
                                page = await context.new_page()
                                await self._apply_stealth_to_page(page)
        else:
            # Otherwise, check if we have an existing context for this config
            config_signature = self._make_config_signature(crawlerRunConfig)
@@ -1078,7 +1069,6 @@ class BrowserManager:
            # Create a new page from the chosen context
            page = await context.new_page()
            await self._apply_stealth_to_page(page)
        # If a session_id is specified, store this session so we can reuse later
        if crawlerRunConfig.session_id:
@@ -1145,5 +1135,19 @@ class BrowserManager:
            self.managed_browser = None
        if self.playwright:
-            await self.playwright.stop()
+            # Handle stealth context manager cleanup if it exists
            if hasattr(self, '_stealth_cm') and self._stealth_cm is not None:
                try:
                    await self._stealth_cm.__aexit__(None, None, None)
                except Exception as e:
                    if self.logger:
                        self.logger.error(
                            message="Error closing stealth context: {error}",
                            tag="ERROR", 
                            params={"error": str(e)}
                        )
                self._stealth_cm = None
                self._stealth_instance = None
            else:
                await self.playwright.stop()
            self.playwright = None
--- a/crawl4ai/cli.py
+++ b/crawl4ai/cli.py
@@ -2,8 +2,6 @@ import click
 import os
 import sys
 import time
 import subprocess
 import shutil
 import humanize
 from typing import Dict, Any, Optional, List
@@ -627,76 +625,6 @@ def cli():
    pass
 # Register server command group (Docker orchestration)
 # Redirect to standalone 'cnode' CLI
@cli.command("server", context_settings=dict(
    ignore_unknown_options=True,
    allow_extra_args=True,
    allow_interspersed_args=False
 ))
@click.pass_context
 def server_cmd(ctx):
    """Manage Crawl4AI Docker server instances (deprecated - use 'cnode')
    This command has been moved to a standalone CLI called 'cnode'.
    For new installations, use:
        curl -sSL https://crawl4ai.com/deploy.sh | bash
    This redirect allows existing scripts to continue working.
    Available commands: start, stop, status, scale, logs
    Use 'crwl server <command> --help' for command-specific help.
    """
    # Check if cnode is installed
    cnode_path = shutil.which("cnode")
    # Get all the args (subcommand + options)
    args = ctx.args
    if not cnode_path:
        console.print(Panel(
            "[yellow]The 'crwl server' command has been moved to a standalone CLI.[/yellow]\n\n"
            "Please install 'cnode' (Crawl4AI Node Manager):\n"
            "[cyan]curl -sSL https://crawl4ai.com/deploy.sh | bash[/cyan]\n\n"
            "After installation, use:\n"
            "[green]cnode <command>[/green] instead of [dim]crwl server <command>[/dim]\n\n"
            "For backward compatibility, we're using the local version for now.",
            title="Server Command Moved",
            border_style="yellow"
        ))
        # Try to use local version
        try:
            import sys
            # Add deploy/docker to path
            deploy_path = str(Path(__file__).parent.parent / 'deploy' / 'docker')
            if deploy_path not in sys.path:
                sys.path.insert(0, deploy_path)
            from cnode_cli import cli as cnode_cli
            # Forward to cnode with the args
            sys.argv = ['cnode'] + args
            cnode_cli(standalone_mode=False)
            sys.exit(0)
        except SystemExit as e:
            # Normal exit from click
            sys.exit(e.code if hasattr(e, 'code') else 0)
        except Exception as e:
            console.print(f"[red]Error: Could not find cnode or local server CLI: {e}[/red]")
            console.print(f"[dim]Details: {e}[/dim]")
            import traceback
            console.print(f"[dim]{traceback.format_exc()}[/dim]")
            sys.exit(1)
    # cnode is installed - forward everything to it
    try:
        result = subprocess.run([cnode_path] + args, check=False)
        sys.exit(result.returncode)
    except Exception as e:
        console.print(f"[red]Error running cnode: {e}[/red]")
        sys.exit(1)
@cli.group("browser")
 def browser_cmd():
    """Manage browser instances for Crawl4AI
@@ -1534,15 +1462,9 @@ def default(url: str, example: bool, browser_config: str, crawler_config: str, f
 def main():
    import sys
-    # Don't auto-insert 'crawl' if the command is recognized
+    if len(sys.argv) < 2 or sys.argv[1] not in cli.commands:
    if len(sys.argv) >= 2 and sys.argv[1] in cli.commands:
        cli()
    elif len(sys.argv) < 2:
        cli()
    else:
        # Unknown command - insert 'crawl' for backward compat
        sys.argv.insert(1, "crawl")
-        cli()
+    cli()
 if __name__ == "__main__":
    main()
--- a/crawl4ai/docker_client.py
+++ b/crawl4ai/docker_client.py
@@ -1,4 +1,4 @@
-from typing import List, Optional, Union, AsyncGenerator, Dict, Any, Callable
+from typing import List, Optional, Union, AsyncGenerator, Dict, Any
 import httpx
 import json
 from urllib.parse import urljoin
@@ -7,7 +7,6 @@ import asyncio
 from .async_configs import BrowserConfig, CrawlerRunConfig
 from .models import CrawlResult
 from .async_logger import AsyncLogger, LogLevel
 from .utils import hooks_to_string
 class Crawl4aiClientError(Exception):
@@ -71,41 +70,17 @@ class Crawl4aiDockerClient:
            self.logger.error(f"Server unreachable: {str(e)}", tag="ERROR")
            raise ConnectionError(f"Cannot connect to server: {str(e)}")
-    def _prepare_request(
+    def _prepare_request(self, urls: List[str], browser_config: Optional[BrowserConfig] = None, 
-        self,
+                       crawler_config: Optional[CrawlerRunConfig] = None) -> Dict[str, Any]:
        urls: List[str],
        browser_config: Optional[BrowserConfig] = None,
        crawler_config: Optional[CrawlerRunConfig] = None,
        hooks: Optional[Union[Dict[str, Callable], Dict[str, str]]] = None,
        hooks_timeout: int = 30
    ) -> Dict[str, Any]:
        """Prepare request data from configs."""
        if self._token:
            self._http_client.headers["Authorization"] = f"Bearer {self._token}"
-
+        return {
        request_data = {
            "urls": urls,
            "browser_config": browser_config.dump() if browser_config else {},
            "crawler_config": crawler_config.dump() if crawler_config else {}
        }
        # Handle hooks if provided
        if hooks:
            # Check if hooks are already strings or need conversion
            if any(callable(v) for v in hooks.values()):
                # Convert function objects to strings
                hooks_code = hooks_to_string(hooks)
            else:
                # Already in string format
                hooks_code = hooks
            request_data["hooks"] = {
                "code": hooks_code,
                "timeout": hooks_timeout
            }
        return request_data
    async def _request(self, method: str, endpoint: str, **kwargs) -> httpx.Response:
        """Make an HTTP request with error handling."""
        url = urljoin(self.base_url, endpoint)
@@ -127,38 +102,12 @@ class Crawl4aiDockerClient:
        self,
        urls: List[str],
        browser_config: Optional[BrowserConfig] = None,
-        crawler_config: Optional[CrawlerRunConfig] = None,
+        crawler_config: Optional[CrawlerRunConfig] = None
        hooks: Optional[Union[Dict[str, Callable], Dict[str, str]]] = None,
        hooks_timeout: int = 30
    ) -> Union[CrawlResult, List[CrawlResult], AsyncGenerator[CrawlResult, None]]:
-        """
+        """Execute a crawl operation."""
        Execute a crawl operation.
        Args:
            urls: List of URLs to crawl
            browser_config: Browser configuration
            crawler_config: Crawler configuration
            hooks: Optional hooks - can be either:
                   - Dict[str, Callable]: Function objects that will be converted to strings
                   - Dict[str, str]: Already stringified hook code
            hooks_timeout: Timeout in seconds for each hook execution (1-120)
        Returns:
            Single CrawlResult, list of results, or async generator for streaming
        Example with function hooks:
            >>> async def my_hook(page, context, **kwargs):
            ...     await page.set_viewport_size({"width": 1920, "height": 1080})
            ...     return page
            >>>
            >>> result = await client.crawl(
            ...     ["https://example.com"],
            ...     hooks={"on_page_context_created": my_hook}
            ... )
        """
        await self._check_server()
-        data = self._prepare_request(urls, browser_config, crawler_config, hooks, hooks_timeout)
+        data = self._prepare_request(urls, browser_config, crawler_config)
        is_streaming = crawler_config and crawler_config.stream
        self.logger.info(f"Crawling {len(urls)} URLs {'(streaming)' if is_streaming else ''}", tag="CRAWL")
--- a/crawl4ai/server_cli.py
+++ b/crawl4ai/server_cli.py
@@ -1,479 +0,0 @@
 """
 Crawl4AI Server CLI Commands
 Provides `crwl server` command group for Docker orchestration.
 """
 import click
 import anyio
 from rich.console import Console
 from rich.table import Table
 from rich.panel import Panel
 from rich.prompt import Confirm
 from crawl4ai.server_manager import ServerManager
 console = Console()
@click.group("server")
 def server_cmd():
    """Manage Crawl4AI Docker server instances
    One-command deployment with automatic scaling:
    - Single container for development (N=1)
    - Docker Swarm for production with built-in load balancing (N>1)
    - Docker Compose + Nginx as fallback (N>1)
    Examples:
        crwl server start                    # Single container on port 11235
        crwl server start --replicas 3       # Auto-detect Swarm or Compose
        crwl server start -r 5 --port 8080   # 5 replicas on custom port
        crwl server status                   # Check current deployment
        crwl server scale 10                 # Scale to 10 replicas
        crwl server stop                     # Stop and cleanup
    """
    pass
@server_cmd.command("start")
@click.option(
    "--replicas", "-r",
    type=int,
    default=1,
    help="Number of container replicas (default: 1)"
 )
@click.option(
    "--mode",
    type=click.Choice(["auto", "single", "swarm", "compose"]),
    default="auto",
    help="Deployment mode (default: auto-detect)"
 )
@click.option(
    "--port", "-p",
    type=int,
    default=11235,
    help="External port to expose (default: 11235)"
 )
@click.option(
    "--env-file",
    type=click.Path(exists=True),
    help="Path to environment file"
 )
@click.option(
    "--image",
    default="unclecode/crawl4ai:latest",
    help="Docker image to use (default: unclecode/crawl4ai:latest)"
 )
 def start_cmd(replicas: int, mode: str, port: int, env_file: str, image: str):
    """Start Crawl4AI server with automatic orchestration.
    Deployment modes:
    - auto: Automatically choose best mode (default)
    - single: Single container (N=1 only)
    - swarm: Docker Swarm with built-in load balancing
    - compose: Docker Compose + Nginx reverse proxy
    The server will:
    1. Check if Docker is running
    2. Validate port availability
    3. Pull image if needed
    4. Start container(s) with health checks
    5. Save state for management
    Examples:
        # Development: single container
        crwl server start
        # Production: 5 replicas with Swarm
        crwl server start --replicas 5
        # Custom configuration
        crwl server start -r 3 --port 8080 --env-file .env.prod
    """
    manager = ServerManager()
    console.print(Panel(
        f"[cyan]Starting Crawl4AI Server[/cyan]\n\n"
        f"Replicas: [yellow]{replicas}[/yellow]\n"
        f"Mode: [yellow]{mode}[/yellow]\n"
        f"Port: [yellow]{port}[/yellow]\n"
        f"Image: [yellow]{image}[/yellow]",
        title="Server Start",
        border_style="cyan"
    ))
    with console.status("[cyan]Starting server..."):
        async def _start():
            return await manager.start(
                replicas=replicas,
                mode=mode,
                port=port,
                env_file=env_file,
                image=image
            )
        result = anyio.run(_start)
    if result["success"]:
        console.print(Panel(
            f"[green]✓ Server started successfully![/green]\n\n"
            f"Mode: [cyan]{result.get('state_data', {}).get('mode', mode)}[/cyan]\n"
            f"URL: [bold]http://localhost:{port}[/bold]\n"
            f"Health: [bold]http://localhost:{port}/health[/bold]\n"
            f"Monitor: [bold]http://localhost:{port}/monitor[/bold]",
            title="Server Running",
            border_style="green"
        ))
    else:
        error_msg = result.get("error", result.get("message", "Unknown error"))
        console.print(Panel(
            f"[red]✗ Failed to start server[/red]\n\n"
            f"{error_msg}",
            title="Error",
            border_style="red"
        ))
        if "already running" in error_msg.lower():
            console.print("\n[yellow]Hint: Use 'crwl server status' to check current deployment[/yellow]")
            console.print("[yellow]      Use 'crwl server stop' to stop existing server[/yellow]")
@server_cmd.command("status")
 def status_cmd():
    """Show current server status and deployment info.
    Displays:
    - Running state (up/down)
    - Deployment mode (single/swarm/compose)
    - Number of replicas
    - Port mapping
    - Uptime
    - Image version
    Example:
        crwl server status
    """
    manager = ServerManager()
    async def _status():
        return await manager.status()
    result = anyio.run(_status)
    if result["running"]:
        table = Table(title="Crawl4AI Server Status", border_style="green")
        table.add_column("Property", style="cyan")
        table.add_column("Value", style="green")
        table.add_row("Status", "🟢 Running")
        table.add_row("Mode", result["mode"])
        table.add_row("Replicas", str(result.get("replicas", 1)))
        table.add_row("Port", str(result.get("port", 11235)))
        table.add_row("Image", result.get("image", "unknown"))
        table.add_row("Uptime", result.get("uptime", "unknown"))
        table.add_row("Started", result.get("started_at", "unknown"))
        console.print(table)
        console.print(f"\n[green]✓ Server is healthy[/green]")
        console.print(f"[dim]Access: http://localhost:{result.get('port', 11235)}[/dim]")
    else:
        console.print(Panel(
            f"[yellow]No server is currently running[/yellow]\n\n"
            f"Use 'crwl server start' to launch a server",
            title="Server Status",
            border_style="yellow"
        ))
@server_cmd.command("stop")
@click.option(
    "--remove-volumes",
    is_flag=True,
    help="Remove associated volumes (WARNING: deletes data)"
 )
 def stop_cmd(remove_volumes: bool):
    """Stop running Crawl4AI server and cleanup resources.
    This will:
    1. Stop all running containers/services
    2. Remove containers
    3. Optionally remove volumes (--remove-volumes)
    4. Clean up state files
    WARNING: Use --remove-volumes with caution as it will delete
    persistent data including Redis databases and logs.
    Examples:
        # Stop server, keep volumes
        crwl server stop
        # Stop and remove all data
        crwl server stop --remove-volumes
    """
    manager = ServerManager()
    # Confirm if removing volumes
    if remove_volumes:
        if not Confirm.ask(
            "[red]⚠️  This will delete all server data including Redis databases. Continue?[/red]"
        ):
            console.print("[yellow]Cancelled[/yellow]")
            return
    with console.status("[cyan]Stopping server..."):
        async def _stop():
            return await manager.stop(remove_volumes=remove_volumes)
        result = anyio.run(_stop)
    if result["success"]:
        console.print(Panel(
            f"[green]✓ Server stopped successfully[/green]\n\n"
            f"{result.get('message', 'All resources cleaned up')}",
            title="Server Stopped",
            border_style="green"
        ))
    else:
        console.print(Panel(
            f"[red]✗ Error stopping server[/red]\n\n"
            f"{result.get('error', result.get('message', 'Unknown error'))}",
            title="Error",
            border_style="red"
        ))
@server_cmd.command("scale")
@click.argument("replicas", type=int)
 def scale_cmd(replicas: int):
    """Scale server to specified number of replicas.
    Only works with Swarm or Compose modes. Single container
    mode cannot be scaled (must stop and restart with --replicas).
    Scaling is live and does not require downtime. The load
    balancer will automatically distribute traffic to new replicas.
    Examples:
        # Scale up to 10 replicas
        crwl server scale 10
        # Scale down to 2 replicas
        crwl server scale 2
        # Scale to 1 (minimum)
        crwl server scale 1
    """
    if replicas < 1:
        console.print("[red]Error: Replicas must be at least 1[/red]")
        return
    manager = ServerManager()
    with console.status(f"[cyan]Scaling to {replicas} replicas..."):
        async def _scale():
            return await manager.scale(replicas=replicas)
        result = anyio.run(_scale)
    if result["success"]:
        console.print(Panel(
            f"[green]✓ Scaled successfully[/green]\n\n"
            f"New replica count: [bold]{replicas}[/bold]\n"
            f"Mode: [cyan]{result.get('mode')}[/cyan]",
            title="Scaling Complete",
            border_style="green"
        ))
    else:
        error_msg = result.get("error", result.get("message", "Unknown error"))
        console.print(Panel(
            f"[red]✗ Scaling failed[/red]\n\n"
            f"{error_msg}",
            title="Error",
            border_style="red"
        ))
        if "single container" in error_msg.lower():
            console.print("\n[yellow]Hint: For single container mode:[/yellow]")
            console.print("[yellow]  1. crwl server stop[/yellow]")
            console.print(f"[yellow]  2. crwl server start --replicas {replicas}[/yellow]")
@server_cmd.command("logs")
@click.option(
    "--follow", "-f",
    is_flag=True,
    help="Follow log output (like tail -f)"
 )
@click.option(
    "--tail",
    type=int,
    default=100,
    help="Number of lines to show (default: 100)"
 )
 def logs_cmd(follow: bool, tail: int):
    """View server logs.
    Shows logs from running containers/services. Use --follow
    to stream logs in real-time.
    Examples:
        # Show last 100 lines
        crwl server logs
        # Show last 500 lines
        crwl server logs --tail 500
        # Follow logs in real-time
        crwl server logs --follow
        # Combine options
        crwl server logs -f --tail 50
    """
    manager = ServerManager()
    async def _logs():
        return await manager.logs(follow=follow, tail=tail)
    output = anyio.run(_logs)
    console.print(output)
@server_cmd.command("cleanup")
@click.option(
    "--force",
    is_flag=True,
    help="Force cleanup even if state file doesn't exist"
 )
 def cleanup_cmd(force: bool):
    """Force cleanup of all Crawl4AI Docker resources.
    Stops and removes all containers, networks, and optionally volumes.
    Useful when server is stuck or state is corrupted.
    Examples:
        # Clean up everything
        crwl server cleanup
        # Force cleanup (ignore state file)
        crwl server cleanup --force
    """
    manager = ServerManager()
    console.print(Panel(
        f"[yellow]⚠️  Cleaning up Crawl4AI Docker resources[/yellow]\n\n"
        f"This will stop and remove:\n"
        f"- All Crawl4AI containers\n"
        f"- Nginx load balancer\n"
        f"- Redis instance\n"
        f"- Docker networks\n"
        f"- State files",
        title="Cleanup",
        border_style="yellow"
    ))
    if not force and not Confirm.ask("[yellow]Continue with cleanup?[/yellow]"):
        console.print("[yellow]Cancelled[/yellow]")
        return
    with console.status("[cyan]Cleaning up resources..."):
        async def _cleanup():
            return await manager.cleanup(force=force)
        result = anyio.run(_cleanup)
    if result["success"]:
        console.print(Panel(
            f"[green]✓ Cleanup completed successfully[/green]\n\n"
            f"Removed: {result.get('removed', 0)} containers\n"
            f"{result.get('message', 'All resources cleaned up')}",
            title="Cleanup Complete",
            border_style="green"
        ))
    else:
        console.print(Panel(
            f"[yellow]⚠️  Partial cleanup[/yellow]\n\n"
            f"{result.get('message', 'Some resources may still exist')}",
            title="Cleanup Status",
            border_style="yellow"
        ))
@server_cmd.command("restart")
@click.option(
    "--replicas", "-r",
    type=int,
    help="New replica count (optional)"
 )
 def restart_cmd(replicas: int):
    """Restart server (stop then start with same config).
    Preserves existing configuration unless overridden with options.
    Useful for applying image updates or recovering from errors.
    Examples:
        # Restart with same configuration
        crwl server restart
        # Restart and change replica count
        crwl server restart --replicas 5
    """
    manager = ServerManager()
    # Get current state
    async def _get_status():
        return await manager.status()
    current = anyio.run(_get_status)
    if not current["running"]:
        console.print("[yellow]No server is running. Use 'crwl server start' instead.[/yellow]")
        return
    # Extract current config
    current_replicas = current.get("replicas", 1)
    current_port = current.get("port", 11235)
    current_image = current.get("image", "unclecode/crawl4ai:latest")
    current_mode = current.get("mode", "auto")
    # Override with CLI args
    new_replicas = replicas if replicas is not None else current_replicas
    console.print(Panel(
        f"[cyan]Restarting Crawl4AI Server[/cyan]\n\n"
        f"Replicas: [yellow]{current_replicas}[/yellow] → [green]{new_replicas}[/green]\n"
        f"Port: [yellow]{current_port}[/yellow]\n"
        f"Mode: [yellow]{current_mode}[/yellow]",
        title="Server Restart",
        border_style="cyan"
    ))
    # Stop current
    with console.status("[cyan]Stopping current server..."):
        async def _stop_server():
            return await manager.stop(remove_volumes=False)
        stop_result = anyio.run(_stop_server)
    if not stop_result["success"]:
        console.print(f"[red]Failed to stop server: {stop_result.get('error')}[/red]")
        return
    # Start new
    with console.status("[cyan]Starting server..."):
        async def _start_server():
            return await manager.start(
                replicas=new_replicas,
                mode="auto",
                port=current_port,
                image=current_image
            )
        start_result = anyio.run(_start_server)
    if start_result["success"]:
        console.print(Panel(
            f"[green]✓ Server restarted successfully![/green]\n\n"
            f"URL: [bold]http://localhost:{current_port}[/bold]",
            title="Restart Complete",
            border_style="green"
        ))
    else:
        console.print(Panel(
            f"[red]✗ Failed to restart server[/red]\n\n"
            f"{start_result.get('error', 'Unknown error')}",
            title="Error",
            border_style="red"
        ))
--- a/crawl4ai/server_manager.py
+++ b/crawl4ai/server_manager.py
--- a/crawl4ai/templates/docker-compose.template.yml
+++ b/crawl4ai/templates/docker-compose.template.yml
@@ -1,52 +0,0 @@
 version: '3.8'
 services:
  redis:
    image: redis:alpine
    command: redis-server --appendonly yes
    volumes:
      - redis_data:/data
    networks:
      - crawl4ai_net
    restart: unless-stopped
  crawl4ai:
    image: ${IMAGE}
    deploy:
      replicas: ${REPLICAS}
      resources:
        limits:
          memory: 4G
    shm_size: 1g
    environment:
      - REDIS_HOST=redis
      - REDIS_PORT=6379
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:11235/health"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 40s
    depends_on:
      - redis
    networks:
      - crawl4ai_net
  nginx:
    image: nginx:alpine
    ports:
      - "${PORT}:80"
    volumes:
      - ${NGINX_CONF}:/etc/nginx/nginx.conf:ro
    depends_on:
      - crawl4ai
    networks:
      - crawl4ai_net
    restart: unless-stopped
 networks:
  crawl4ai_net:
    driver: bridge
 volumes:
  redis_data:
--- a/crawl4ai/templates/nginx.conf.template
+++ b/crawl4ai/templates/nginx.conf.template
@@ -1,75 +0,0 @@
 events {
    worker_connections 1024;
 }
 http {
    upstream crawl4ai_backend {
        # DNS-based load balancing to Docker Compose service
        # Docker Compose provides DNS resolution for service name
        server crawl4ai:11235 max_fails=3 fail_timeout=30s;
        # Keep connections alive
        keepalive 32;
    }
    # Sticky sessions for monitoring (same IP always goes to same container)
    upstream crawl4ai_monitor {
        ip_hash;  # Sticky sessions based on client IP
        server crawl4ai:11235 max_fails=3 fail_timeout=30s;
        keepalive 32;
    }
    server {
        listen 80;
        server_name _;
        # Increase timeouts for long-running crawl operations
        proxy_connect_timeout 300;
        proxy_send_timeout 300;
        proxy_read_timeout 300;
        send_timeout 300;
        # WebSocket endpoint for real-time monitoring (exact match)
        location = /monitor/ws {
            proxy_pass http://crawl4ai_monitor/monitor/ws;
            proxy_http_version 1.1;
            proxy_set_header Upgrade $http_upgrade;
            proxy_set_header Connection "upgrade";
            proxy_set_header Host $host;
            proxy_set_header X-Real-IP $remote_addr;
            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
            # WebSocket timeouts
            proxy_connect_timeout 7d;
            proxy_send_timeout 7d;
            proxy_read_timeout 7d;
        }
        # Monitor and dashboard with sticky sessions (regex location)
        location ~ ^/(monitor|dashboard) {
            proxy_pass http://crawl4ai_monitor;
            proxy_set_header Host $host;
            proxy_set_header X-Real-IP $remote_addr;
            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
            proxy_set_header X-Forwarded-Proto $scheme;
        }
        # HTTP endpoints (load balanced)
        location / {
            proxy_pass http://crawl4ai_backend;
            proxy_set_header Host $host;
            proxy_set_header X-Real-IP $remote_addr;
            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
            proxy_set_header X-Forwarded-Proto $scheme;
            # Support large request bodies (for batch operations)
            client_max_body_size 10M;
        }
        # Health check endpoint (bypass load balancer)
        location /health {
            proxy_pass http://crawl4ai_backend/health;
            access_log off;
        }
    }
 }
--- a/crawl4ai/utils.py
+++ b/crawl4ai/utils.py
@@ -47,7 +47,6 @@ from urllib.parse import (
    urljoin, urlparse, urlunparse,
    parse_qsl, urlencode, quote, unquote
 )
 import inspect
 # Monkey patch to fix wildcard handling in urllib.robotparser
@@ -2178,7 +2177,7 @@ def normalize_url(
    str | None
        A clean, canonical URL or None if href is empty/None.
    """
-    if not href:
+    if not href or not href.strip():
        return None
    # Resolve relative paths first
@@ -2201,6 +2200,14 @@ def normalize_url(
    # ── netloc ──
    netloc = parsed.netloc.lower()
    # Remove default ports
    if ':' in netloc:
        host, port = netloc.rsplit(':', 1)
        if (parsed.scheme == 'http' and port == '80') or (parsed.scheme == 'https' and port == '443'):
            netloc = host
        else:
            netloc = f"{host}:{port}"
    # ── path ──
    # Strip duplicate slashes and trailing "/" (except root)
    # IMPORTANT: Don't use quote(unquote()) as it mangles + signs in URLs
@@ -2213,21 +2220,25 @@ def normalize_url(
    query = parsed.query
    if query:
        # explode, mutate, then rebuild
-        params = [(k.lower(), v) for k, v in parse_qsl(query, keep_blank_values=True)]
+        params = list(parse_qsl(query, keep_blank_values=True)) # Parse query string into key-value pairs, preserving blank values
        if drop_query_tracking:
            # Define default tracking parameters to remove for cleaner URLs
            default_tracking = {
                'utm_source', 'utm_medium', 'utm_campaign', 'utm_term',
                'utm_content', 'gclid', 'fbclid', 'ref', 'ref_src'
            }
            if extra_drop_params:
-                default_tracking |= {p.lower() for p in extra_drop_params}
+                default_tracking |= {p.lower() for p in extra_drop_params} # Add any extra parameters to drop, case-insensitive
-            params = [(k, v) for k, v in params if k not in default_tracking]
+            params = [(k, v) for k, v in params if k not in default_tracking] # Filter out tracking parameters
        # Normalize parameter keys 
        params = [(k, v) for k, v in params]
        if sort_query:
-            params.sort(key=lambda kv: kv[0])
+            params.sort(key=lambda kv: kv[0]) # Sort parameters alphabetically by key (now lowercase)
-        query = urlencode(params, doseq=True) if params else ''
+        query = urlencode(params, doseq=True) if params else '' # Rebuild query string, handling sequences properly
    # ── fragment ──
    fragment = parsed.fragment if keep_fragment else ''
@@ -3531,51 +3542,3 @@ def get_memory_stats() -> Tuple[float, float, float]:
    used_percent = get_true_memory_usage_percent()
    return used_percent, available_gb, total_gb
 # Hook utilities for Docker API
 def hooks_to_string(hooks: Dict[str, Callable]) -> Dict[str, str]:
    """
    Convert hook function objects to string representations for Docker API.
    This utility simplifies the process of using hooks with the Docker API by converting
    Python function objects into the string format required by the API.
    Args:
        hooks: Dictionary mapping hook point names to Python function objects.
               Functions should be async and follow hook signature requirements.
    Returns:
        Dictionary mapping hook point names to string representations of the functions.
    Example:
        >>> async def my_hook(page, context, **kwargs):
        ...     await page.set_viewport_size({"width": 1920, "height": 1080})
        ...     return page
        >>>
        >>> hooks_dict = {"on_page_context_created": my_hook}
        >>> api_hooks = hooks_to_string(hooks_dict)
        >>> # api_hooks is now ready to use with Docker API
    Raises:
        ValueError: If a hook is not callable or source cannot be extracted
    """
    result = {}
    for hook_name, hook_func in hooks.items():
        if not callable(hook_func):
            raise ValueError(f"Hook '{hook_name}' must be a callable function, got {type(hook_func)}")
        try:
            # Get the source code of the function
            source = inspect.getsource(hook_func)
            # Remove any leading indentation to get clean source
            source = textwrap.dedent(source)
            result[hook_name] = source
        except (OSError, TypeError) as e:
            raise ValueError(
                f"Cannot extract source code for hook '{hook_name}'. "
                f"Make sure the function is defined in a file (not interactively). Error: {e}"
            )
    return result
--- a/deploy/docker/AGENT.md
+++ b/deploy/docker/AGENT.md
@@ -1,402 +0,0 @@
 # Crawl4AI DevOps Agent Context
 ## Service Overview
 **Crawl4AI**: Browser-based web crawling service with AI extraction. Docker deployment with horizontal scaling (1-N containers), Redis coordination, Nginx load balancing.
 ## Architecture Quick Reference
 ```
 Client → Nginx:11235 → [crawl4ai-1, crawl4ai-2, ...crawl4ai-N] ← Redis
                              ↓
                         Monitor Dashboard
 ```
 **Components:**
 - **Nginx**: Load balancer (round-robin API, sticky monitoring)
 - **Crawl4AI containers**: FastAPI + Playwright browsers
 - **Redis**: Container discovery (heartbeats 30s), monitoring data aggregation
 - **Monitor**: Real-time dashboard at `/dashboard`
 ## CLI Commands
 ### Start/Stop
 ```bash
 crwl server start [-r N] [--port P] [--mode auto|single|swarm|compose] [--env-file F] [--image I]
 crwl server stop [--remove-volumes]
 crwl server restart [-r N]
 ```
 ### Management
 ```bash
 crwl server status        # Show mode, replicas, port, uptime
 crwl server scale N       # Live scaling (Swarm/Compose only)
 crwl server logs [-f] [--tail N]
 ```
 **Defaults**: replicas=1, port=11235, mode=auto, image=unclecode/crawl4ai:latest
 ## Deployment Modes
 | Replicas | Mode | Load Balancer | Use Case |
 |----------|------|---------------|----------|
 | N=1 | single | None | Dev/testing |
 | N>1 | swarm | Built-in | Production (if `docker swarm init` done) |
 | N>1 | compose | Nginx | Production (fallback) |
 **Mode Detection** (when mode=auto):
 1. If N=1 → single
 2. If N>1 & Swarm active → swarm
 3. If N>1 & Swarm inactive → compose
 ## File Locations
 ```
 ~/.crawl4ai/server/
 ├── state.json              # Current deployment state
 ├── docker-compose.yml      # Generated compose file
 └── nginx.conf              # Generated nginx config
 /app/                       # Inside container
 ├── deploy/docker/server.py
 ├── deploy/docker/monitor.py
 ├── deploy/docker/static/monitor/index.html
 └── crawler_pool.py         # Browser pool (PERMANENT, HOT_POOL, COLD_POOL)
 ```
 ## Monitoring & Troubleshooting
 ### Health Checks
 ```bash
 curl http://localhost:11235/health              # Service health
 curl http://localhost:11235/monitor/containers  # Container discovery
 curl http://localhost:11235/monitor/requests    # Aggregated requests
 ```
 ### Dashboard
 - URL: `http://localhost:11235/dashboard/`
 - Features: Container filtering (All/C-1/C-2/C-3), real-time WebSocket, timeline charts
 - WebSocket: `/monitor/ws` (sticky sessions)
 ### Common Issues
 **No containers showing in dashboard:**
 ```bash
 docker exec <redis-container> redis-cli SMEMBERS monitor:active_containers
 docker exec <redis-container> redis-cli KEYS "monitor:heartbeat:*"
 ```
 Wait 30s for heartbeat registration.
 **Load balancing not working:**
 ```bash
 docker exec <nginx-container> cat /etc/nginx/nginx.conf | grep upstream
 docker logs <nginx-container> | grep error
 ```
 Check Nginx upstream has no `ip_hash` for API endpoints.
 **Redis connection errors:**
 ```bash
 docker logs <crawl4ai-container> | grep -i redis
 docker exec <crawl4ai-container> ping redis
 ```
 Verify REDIS_HOST=redis, REDIS_PORT=6379.
 **Containers not scaling:**
 ```bash
 # Swarm
 docker service ls
 docker service ps crawl4ai
 # Compose
 docker compose -f ~/.crawl4ai/server/docker-compose.yml ps
 docker compose -f ~/.crawl4ai/server/docker-compose.yml up -d --scale crawl4ai=N
 ```
 ### Redis Data Structure
 ```
 monitor:active_containers              # SET: {container_ids}
 monitor:heartbeat:{cid}                # STRING: {id, hostname, last_seen} TTL=60s
 monitor:{cid}:active_requests          # STRING: JSON list, TTL=5min
 monitor:{cid}:completed                # STRING: JSON list, TTL=1h
 monitor:{cid}:janitor                  # STRING: JSON list, TTL=1h
 monitor:{cid}:errors                   # STRING: JSON list, TTL=1h
 monitor:endpoint_stats                 # STRING: JSON aggregate, TTL=24h
 ```
 ## Environment Variables
 ### Required for Multi-LLM
 ```bash
 OPENAI_API_KEY=sk-...
 ANTHROPIC_API_KEY=sk-ant-...
 DEEPSEEK_API_KEY=...
 GROQ_API_KEY=...
 TOGETHER_API_KEY=...
 MISTRAL_API_KEY=...
 GEMINI_API_TOKEN=...
 ```
 ### Redis Configuration (Optional)
 ```bash
 REDIS_HOST=redis                       # Default: redis
 REDIS_PORT=6379                        # Default: 6379
 REDIS_TTL_ACTIVE_REQUESTS=300          # Default: 5min
 REDIS_TTL_COMPLETED_REQUESTS=3600      # Default: 1h
 REDIS_TTL_JANITOR_EVENTS=3600          # Default: 1h
 REDIS_TTL_ERRORS=3600                  # Default: 1h
 REDIS_TTL_ENDPOINT_STATS=86400         # Default: 24h
 REDIS_TTL_HEARTBEAT=60                 # Default: 1min
 ```
 ## API Endpoints
 ### Core API
 - `POST /crawl` - Crawl URL (load-balanced)
 - `POST /batch` - Batch crawl (load-balanced)
 - `GET /health` - Health check (load-balanced)
 ### Monitor API (Aggregated from all containers)
 - `GET /monitor/health` - Local container health
 - `GET /monitor/containers` - All active containers
 - `GET /monitor/requests` - All requests (active + completed)
 - `GET /monitor/browsers` - Browser pool status (local only)
 - `GET /monitor/logs/janitor` - Janitor cleanup events
 - `GET /monitor/logs/errors` - Error logs
 - `GET /monitor/endpoints/stats` - Endpoint analytics
 - `WS /monitor/ws` - Real-time updates (aggregated)
 ### Control Actions
 - `POST /monitor/actions/cleanup` - Force browser cleanup
 - `POST /monitor/actions/kill_browser` - Kill specific browser
 - `POST /monitor/actions/restart_browser` - Restart browser
 - `POST /monitor/stats/reset` - Reset endpoint counters
 ## Docker Commands Reference
 ### Inspection
 ```bash
 # List containers
 docker ps --filter "name=crawl4ai"
 # Container logs
 docker logs <container-id> -f --tail 100
 # Redis CLI
 docker exec -it <redis-container> redis-cli
 KEYS monitor:*
 SMEMBERS monitor:active_containers
 GET monitor:<cid>:completed
 TTL monitor:heartbeat:<cid>
 # Nginx config
 docker exec <nginx-container> cat /etc/nginx/nginx.conf
 # Container stats
 docker stats --no-stream --format "table {{.Name}}\t{{.CPUPerc}}\t{{.MemUsage}}"
 ```
 ### Compose Operations
 ```bash
 # Scale
 docker compose -f ~/.crawl4ai/server/docker-compose.yml up -d --scale crawl4ai=5
 # Restart service
 docker compose -f ~/.crawl4ai/server/docker-compose.yml restart crawl4ai
 # View services
 docker compose -f ~/.crawl4ai/server/docker-compose.yml ps
 ```
 ### Swarm Operations
 ```bash
 # Initialize Swarm
 docker swarm init
 # Scale service
 docker service scale crawl4ai=5
 # Service info
 docker service ls
 docker service ps crawl4ai --no-trunc
 # Service logs
 docker service logs crawl4ai --tail 100 -f
 ```
 ## Performance & Scaling
 ### Resource Recommendations
 | Containers | Memory/Container | Total Memory | Use Case |
 |------------|-----------------|--------------|----------|
 | 1 | 4GB | 4GB | Development |
 | 3 | 4GB | 12GB | Small prod |
 | 5 | 4GB | 20GB | Medium prod |
 | 10 | 4GB | 40GB | Large prod |
 **Expected Throughput**: ~10 req/min per container (depends on crawl complexity)
 ### Scaling Guidelines
 - **Horizontal**: Add replicas (`crwl server scale N`)
 - **Vertical**: Adjust `--memory 8G --cpus 4` in kwargs
 - **Browser Pool**: Permanent (1) + Hot pool (adaptive) + Cold pool (cleanup by janitor)
 ### Redis Memory Usage
 - **Per container**: ~110KB (requests + events + errors + heartbeat)
 - **10 containers**: ~1.1MB
 - **Recommendation**: 256MB Redis is sufficient for <100 containers
 ## Security Notes
 ### Input Validation
 All CLI inputs validated:
 - Image name: alphanumeric + `.-/:_@` only, max 256 chars
 - Port: 1-65535
 - Replicas: 1-100
 - Env file: must exist and be readable
 - Container IDs: alphanumeric + `-_` only (prevents Redis injection)
 ### Network Security
 - Nginx forwards to internal `crawl4ai` service (Docker network)
 - Monitor endpoints have NO authentication (add MONITOR_TOKEN env for security)
 - Redis is internal-only (no external port)
 ### Recommended Production Setup
 ```bash
 # Add authentication
 export MONITOR_TOKEN="your-secret-token"
 # Use Redis password
 redis:
  command: redis-server --requirepass ${REDIS_PASSWORD}
 # Enable rate limiting in Nginx
 limit_req_zone $binary_remote_addr zone=api:10m rate=10r/s;
 ```
 ## Common User Scenarios
 ### Scenario 1: Fresh Deployment
 ```bash
 crwl server start --replicas 3 --env-file .env
 # Wait for health check, then access http://localhost:11235/health
 ```
 ### Scenario 2: Scaling Under Load
 ```bash
 crwl server scale 10
 # Live scaling, no downtime
 ```
 ### Scenario 3: Debugging Slow Requests
 ```bash
 # Check dashboard
 open http://localhost:11235/dashboard/
 # Check container logs
 docker logs <slowest-container-id> --tail 100
 # Check browser pool
 curl http://localhost:11235/monitor/browsers | jq
 ```
 ### Scenario 4: Redis Connection Issues
 ```bash
 # Check Redis connectivity
 docker exec <crawl4ai-container> nc -zv redis 6379
 # Check Redis logs
 docker logs <redis-container>
 # Restart containers (triggers reconnect with retry logic)
 crwl server restart
 ```
 ### Scenario 5: Container Not Appearing in Dashboard
 ```bash
 # Wait 30s for heartbeat
 sleep 30
 # Check Redis
 docker exec <redis-container> redis-cli SMEMBERS monitor:active_containers
 # Check container logs for heartbeat errors
 docker logs <missing-container> | grep -i heartbeat
 ```
 ## Code Context for Advanced Debugging
 ### Key Classes
 - `MonitorStats` (monitor.py): Tracks stats, Redis persistence, heartbeat worker
 - `ServerManager` (server_manager.py): CLI orchestration, mode detection
 - Browser pool globals: `PERMANENT`, `HOT_POOL`, `COLD_POOL`, `LOCK` (crawler_pool.py)
 ### Critical Timeouts
 - Browser pool lock: 2s timeout (prevents deadlock)
 - WebSocket connection: 5s timeout
 - Health check: 30-60s timeout
 - Heartbeat interval: 30s, TTL: 60s
 - Redis retry: 3 attempts, backoff: 0.5s/1s/2s
 - Circuit breaker: 5 failures → 5min backoff
 ### State Transitions
 ```
 NOT_RUNNING → STARTING → HEALTHY → RUNNING
                ↓           ↓
            FAILED      UNHEALTHY → STOPPED
 ```
 State file: `~/.crawl4ai/server/state.json` (atomic writes, fcntl locking)
 ## Quick Diagnostic Commands
 ```bash
 # Full system check
 crwl server status
 docker ps
 curl http://localhost:11235/health
 curl http://localhost:11235/monitor/containers | jq
 # Redis check
 docker exec <redis-container> redis-cli PING
 docker exec <redis-container> redis-cli INFO stats
 # Network check
 docker network ls
 docker network inspect <network-name>
 # Logs check
 docker logs <nginx-container> --tail 50
 docker logs <redis-container> --tail 50
 docker compose -f ~/.crawl4ai/server/docker-compose.yml logs --tail 100
 ```
 ## Agent Decision Tree
 **User reports slow crawling:**
 1. Check dashboard for active requests stuck → kill browser if >5min
 2. Check browser pool status → cleanup if hot/cold pool >10
 3. Check container CPU/memory → scale up if >80%
 4. Check Redis latency → restart Redis if >100ms
 **User reports missing containers:**
 1. Wait 30s for heartbeat
 2. Check `docker ps` vs dashboard count
 3. Check Redis SMEMBERS monitor:active_containers
 4. Check container logs for Redis connection errors
 5. Verify REDIS_HOST/PORT env vars
 **User reports 502/503 errors:**
 1. Check Nginx logs for upstream errors
 2. Check container health: `curl http://localhost:11235/health`
 3. Check if all containers are healthy: `docker ps`
 4. Restart Nginx: `docker restart <nginx-container>`
 **User wants to update image:**
 1. `crwl server stop`
 2. `docker pull unclecode/crawl4ai:latest`
 3. `crwl server start --replicas <previous-count>`
 ---
 **Version**: Crawl4AI v0.7.4+
 **Last Updated**: 2025-01-20
 **AI Agent Note**: All commands, file paths, and Redis keys verified against codebase. Use exact syntax shown. For user-facing responses, translate technical details to plain language.
--- a/deploy/docker/ARCHITECTURE.md
+++ b/deploy/docker/ARCHITECTURE.md
@@ -1,822 +0,0 @@
 # Crawl4AI Docker Architecture - AI Context Map
 **Purpose:** Dense technical reference for AI agents to understand complete system architecture.
 **Format:** Symbolic, compressed, high-information-density documentation.
 ---
 ## System Overview
 ```
 ┌─────────────────────────────────────────────────────────────┐
 │ CRAWL4AI DOCKER ORCHESTRATION SYSTEM                        │
 ├─────────────────────────────────────────────────────────────┤
 │ Modes: Single (N=1) | Swarm (N>1) | Compose+Nginx (N>1)     │
 │ Entry: cnode CLI → deploy/docker/cnode_cli.py               │
 │ Core: deploy/docker/server_manager.py                       │
 │ Server: deploy/docker/server.py (FastAPI)                   │
 │ API: deploy/docker/api.py (crawl endpoints)                 │
 │ Monitor: deploy/docker/monitor.py + monitor_routes.py       │
 └─────────────────────────────────────────────────────────────┘
 ```
 ---
 ## Directory Structure & File Map
 ```
 deploy/
 ├── docker/                          # Server runtime & orchestration
 │   ├── server.py                    # FastAPI app entry [CRITICAL]
 │   ├── api.py                       # /crawl, /screenshot, /pdf endpoints
 │   ├── server_manager.py            # Docker orchestration logic [CORE]
 │   ├── cnode_cli.py                 # CLI interface (Click-based)
 │   ├── monitor.py                   # Real-time metrics collector
 │   ├── monitor_routes.py            # /monitor dashboard routes
 │   ├── crawler_pool.py              # Browser pool management
 │   ├── hook_manager.py              # Pre/post crawl hooks
 │   ├── job.py                       # Job queue schema
 │   ├── utils.py                     # Helpers (port check, health)
 │   ├── auth.py                      # API key authentication
 │   ├── schemas.py                   # Pydantic models
 │   ├── mcp_bridge.py                # MCP protocol bridge
 │   ├── supervisord.conf             # Process manager config
 │   ├── config.yml                   # Server config template
 │   ├── requirements.txt             # Python deps
 │   ├── static/                      # Web assets
 │   │   ├── monitor/                 # Dashboard UI
 │   │   └── playground/              # API playground
 │   └── tests/                       # Test suite
 │
 └── installer/                       # User-facing installation
    ├── cnode_pkg/                   # Standalone package
    │   ├── cli.py                   # Copy of cnode_cli.py
    │   ├── server_manager.py        # Copy of server_manager.py
    │   └── requirements.txt         # click, rich, anyio, pyyaml
    ├── install-cnode.sh             # Remote installer (git sparse-checkout)
    ├── sync-cnode.sh                # Dev tool (source→pkg sync)
    ├── USER_GUIDE.md                # Human-readable guide
    ├── README.md                    # Developer documentation
    └── QUICKSTART.md                # Cheat sheet
 ```
 ---
 ## Core Components Deep Dive
 ### 1. `server_manager.py` - Orchestration Engine
 **Role:** Manages Docker container lifecycle, auto-detects deployment mode.
 **Key Classes:**
 - `ServerManager` - Main orchestrator
  - `start(replicas, mode, port, env_file, image)` → Deploy server
  - `stop(remove_volumes)` → Teardown
  - `status()` → Health check
  - `scale(replicas)` → Live scaling
  - `logs(follow, tail)` → Stream logs
  - `cleanup(force)` → Emergency cleanup
 **State Management:**
 - File: `~/.crawl4ai/server_state.yml`
 - Schema: `{mode, replicas, port, image, started_at, containers[]}`
 - Atomic writes with lock file
 **Deployment Modes:**
 ```python
 if replicas == 1:
    mode = "single"  # docker run
 elif swarm_available():
    mode = "swarm"   # docker stack deploy
 else:
    mode = "compose" # docker-compose + nginx
 ```
 **Container Naming:**
 - Single: `crawl4ai-server`
 - Swarm: `crawl4ai-stack_crawl4ai`
 - Compose: `crawl4ai-server-{1..N}`, `crawl4ai-nginx`
 **Networks:**
 - `crawl4ai-network` (bridge mode for all)
 **Volumes:**
 - `crawl4ai-redis-data` - Persistent queue
 - `crawl4ai-profiles` - Browser profiles
 **Health Checks:**
 - Endpoint: `http://localhost:{port}/health`
 - Timeout: 30s startup
 - Retry: 3 attempts
 ---
 ### 2. `server.py` - FastAPI Application
 **Role:** HTTP server exposing crawl API + monitoring.
 **Startup Flow:**
 ```python
 app = FastAPI()
@app.on_event("startup")
 async def startup():
    init_crawler_pool()      # Pre-warm browsers
    init_redis_connection()  # Job queue
    start_monitor_collector() # Metrics
 ```
 **Key Endpoints:**
 ```
 POST /crawl          → api.py:crawl_endpoint()
 POST /crawl/stream   → api.py:crawl_stream_endpoint()
 POST /screenshot     → api.py:screenshot_endpoint()
 POST /pdf            → api.py:pdf_endpoint()
 GET  /health         → server.py:health_check()
 GET  /monitor        → monitor_routes.py:dashboard()
 WS   /monitor/ws     → monitor_routes.py:websocket_endpoint()
 GET  /playground     → static/playground/index.html
 ```
 **Process Manager:**
 - Uses `supervisord` to manage:
  - FastAPI server (port 11235)
  - Redis (port 6379)
  - Background workers
 **Environment:**
 ```bash
 CRAWL4AI_PORT=11235
 REDIS_URL=redis://localhost:6379
 MAX_CONCURRENT_CRAWLS=5
 BROWSER_POOL_SIZE=3
 ```
 ---
 ### 3. `api.py` - Crawl Endpoints
 **Main Endpoint:** `POST /crawl`
 **Request Schema:**
 ```json
 {
  "urls": ["https://example.com"],
  "priority": 10,
  "browser_config": {
    "type": "BrowserConfig",
    "params": {"headless": true, "viewport_width": 1920}
  },
  "crawler_config": {
    "type": "CrawlerRunConfig",
    "params": {"cache_mode": "bypass", "extraction_strategy": {...}}
  }
 }
 ```
 **Processing Flow:**
 ```
 1. Validate request (Pydantic)
 2. Queue job → Redis
 3. Get browser from pool → crawler_pool.py
 4. Execute crawl → AsyncWebCrawler
 5. Apply hooks → hook_manager.py
 6. Return result (JSON)
 7. Release browser to pool
 ```
 **Memory Management:**
 - Browser pool: Max 3 instances
 - LRU eviction when pool full
 - Explicit cleanup: `browser.close()` in finally block
 - Redis TTL: 1 hour for completed jobs
 **Error Handling:**
 ```python
 try:
    result = await crawler.arun(url, config)
 except PlaywrightError as e:
    # Browser crash - release & recreate
    await pool.invalidate(browser_id)
 except TimeoutError as e:
    # Timeout - kill & retry
    await crawler.kill()
 except Exception as e:
    # Unknown - log & fail gracefully
    logger.error(f"Crawl failed: {e}")
 ```
 ---
 ### 4. `crawler_pool.py` - Browser Pool Manager
 **Role:** Manage persistent browser instances to avoid startup overhead.
 **Class:** `CrawlerPool`
 - `get_crawler()` → Lease browser (async with context manager)
 - `release_crawler(id)` → Return to pool
 - `warm_up(count)` → Pre-launch browsers
 - `cleanup()` → Close all browsers
 **Pool Strategy:**
 ```python
 pool = {
    "browser_1": {"crawler": AsyncWebCrawler(), "in_use": False},
    "browser_2": {"crawler": AsyncWebCrawler(), "in_use": False},
    "browser_3": {"crawler": AsyncWebCrawler(), "in_use": False},
 }
 async with pool.get_crawler() as crawler:
    result = await crawler.arun(url)
    # Auto-released on context exit
 ```
 **Anti-Leak Mechanisms:**
 1. Context managers enforce cleanup
 2. Watchdog thread kills stale browsers (>10min idle)
 3. Max lifetime: 1 hour per browser
 4. Force GC after browser close
 ---
 ### 5. `monitor.py` + `monitor_routes.py` - Real-time Dashboard
 **Architecture:**
 ```
 [Browser] <--WebSocket--> [monitor_routes.py] <--Events--> [monitor.py]
                              ↓
                          [Redis Pub/Sub]
                              ↓
                       [Metrics Collector]
 ```
 **Metrics Collected:**
 - Requests/sec (sliding window)
 - Active crawls (real-time count)
 - Response times (p50, p95, p99)
 - Error rate (5min rolling)
 - Memory usage (RSS, heap)
 - Browser pool utilization
 **WebSocket Protocol:**
 ```json
 // Server → Client
 {
  "type": "metrics",
  "data": {
    "rps": 45.3,
    "active_crawls": 12,
    "p95_latency": 1234,
    "error_rate": 0.02
  }
 }
 // Client → Server
 {
  "type": "subscribe",
  "channels": ["metrics", "logs"]
 }
 ```
 **Dashboard Route:** `/monitor`
 - Real-time graphs (Chart.js)
 - Request log stream
 - Container health status
 - Resource utilization
 ---
 ### 6. `cnode_cli.py` - CLI Interface
 **Framework:** Click (Python CLI framework)
 **Command Structure:**
 ```
 cnode
 ├── start [--replicas N] [--port P] [--mode M] [--image I]
 ├── stop [--remove-volumes]
 ├── status
 ├── scale N
 ├── logs [--follow] [--tail N]
 ├── restart [--replicas N]
 └── cleanup [--force]
 ```
 **Execution Flow:**
 ```python
@cli.command("start")
 def start_cmd(replicas, mode, port, env_file, image):
    manager = ServerManager()
    result = anyio.run(manager.start(...))  # Async bridge
    if result["success"]:
        console.print(success_panel)
 ```
 **User Feedback:**
 - Rich library for colors/tables
 - Progress spinners during operations
 - Error messages with hints
 - Status tables with health indicators
 **State Persistence:**
 - Saves deployment config to `~/.crawl4ai/server_state.yml`
 - Enables stateless commands (status, scale, restart)
 ---
 ### 7. Docker Orchestration Details
 **Single Container Mode (N=1):**
 ```bash
 docker run -d \
  --name crawl4ai-server \
  --network crawl4ai-network \
  -p 11235:11235 \
  -v crawl4ai-redis-data:/data \
  unclecode/crawl4ai:latest
 ```
 **Docker Swarm Mode (N>1, Swarm available):**
 ```yaml
 # docker-compose.swarm.yml
 version: '3.8'
 services:
  crawl4ai:
    image: unclecode/crawl4ai:latest
    deploy:
      replicas: 5
      update_config:
        parallelism: 2
        delay: 10s
      restart_policy:
        condition: on-failure
    ports:
      - "11235:11235"
    networks:
      - crawl4ai-network
 ```
 Deploy: `docker stack deploy -c docker-compose.swarm.yml crawl4ai-stack`
 **Docker Compose + Nginx Mode (N>1, fallback):**
 ```yaml
 # docker-compose.yml
 services:
  crawl4ai-1:
    image: unclecode/crawl4ai:latest
    networks: [crawl4ai-network]
  crawl4ai-2:
    image: unclecode/crawl4ai:latest
    networks: [crawl4ai-network]
  nginx:
    image: nginx:alpine
    ports: ["11235:80"]
    volumes:
      - ./nginx.conf:/etc/nginx/nginx.conf
    networks: [crawl4ai-network]
 ```
 Nginx config (round-robin load balancing):
 ```nginx
 upstream crawl4ai_backend {
    server crawl4ai-1:11235;
    server crawl4ai-2:11235;
    server crawl4ai-3:11235;
 }
 server {
    listen 80;
    location / {
        proxy_pass http://crawl4ai_backend;
        proxy_set_header Host $host;
    }
 }
 ```
 ---
 ## Memory Leak Prevention Strategy
 ### Problem Areas & Solutions
 **1. Browser Instances**
 ```python
 # ❌ BAD - Leak risk
 crawler = AsyncWebCrawler()
 result = await crawler.arun(url)
 # Browser never closed!
 # ✅ GOOD - Guaranteed cleanup
 async with AsyncWebCrawler() as crawler:
    result = await crawler.arun(url)
    # Auto-closed on exit
 ```
 **2. WebSocket Connections**
 ```python
 # monitor_routes.py
 active_connections = set()
@app.websocket("/monitor/ws")
 async def websocket_endpoint(websocket):
    await websocket.accept()
    active_connections.add(websocket)
    try:
        while True:
            await websocket.send_json(get_metrics())
    finally:
        active_connections.remove(websocket)  # Critical!
 ```
 **3. Redis Connections**
 ```python
 # Use connection pooling
 redis_pool = aioredis.ConnectionPool(
    host="localhost",
    port=6379,
    max_connections=10,
    decode_responses=True
 )
 # Reuse connections
 async def get_job(job_id):
    async with redis_pool.get_connection() as conn:
        data = await conn.get(f"job:{job_id}")
    # Connection auto-returned to pool
 ```
 **4. Async Task Cleanup**
 ```python
 # Track background tasks
 background_tasks = set()
 async def crawl_task(url):
    try:
        result = await crawl(url)
    finally:
        background_tasks.discard(asyncio.current_task())
 # On shutdown
 async def shutdown():
    tasks = list(background_tasks)
    for task in tasks:
        task.cancel()
    await asyncio.gather(*tasks, return_exceptions=True)
 ```
 **5. File Descriptor Leaks**
 ```python
 # Use context managers for files
 async def save_screenshot(url):
    async with aiofiles.open(f"{job_id}.png", "wb") as f:
        await f.write(screenshot_bytes)
    # File auto-closed
 ```
 ---
 ## Installation & Distribution
 ### User Installation Flow
 **Script:** `deploy/installer/install-cnode.sh`
 **Steps:**
 1. Check Python 3.8+ exists
 2. Check pip available
 3. Check Docker installed (warn if missing)
 4. Create temp dir: `mktemp -d`
 5. Git sparse-checkout:
   ```bash
   git init
   git remote add origin https://github.com/unclecode/crawl4ai.git
   git config core.sparseCheckout true
   echo "deploy/installer/cnode_pkg/*" > .git/info/sparse-checkout
   git pull --depth=1 origin main
   ```
 6. Install deps: `pip install click rich anyio pyyaml`
 7. Copy package: `cnode_pkg/ → /usr/local/lib/cnode/`
 8. Create wrapper: `/usr/local/bin/cnode`
   ```bash
   #!/usr/bin/env bash
   export PYTHONPATH="/usr/local/lib/cnode:$PYTHONPATH"
   exec python3 -m cnode_pkg.cli "$@"
   ```
 9. Cleanup temp dir
 **Result:**
 - Binary-like experience (fast startup: ~0.1s)
 - No need for PyInstaller (49x faster)
 - Platform-independent (any OS with Python)
 ---
 ## Development Workflow
 ### Source Code Sync (Auto)
 **Git Hook:** `.githooks/pre-commit`
 **Trigger:** When committing `deploy/docker/cnode_cli.py` or `server_manager.py`
 **Action:**
 ```bash
 1. Diff source vs package
 2. If different:
   - Run sync-cnode.sh
   - Copy cnode_cli.py → cnode_pkg/cli.py
   - Fix imports: s/deploy.docker/cnode_pkg/g
   - Copy server_manager.py → cnode_pkg/
   - Stage synced files
 3. Continue commit
 ```
 **Setup:** `./setup-hooks.sh` (configures `git config core.hooksPath .githooks`)
 **Smart Behavior:**
 - Silent when no sync needed
 - Only syncs if content differs
 - Minimal output: `✓ cnode synced`
 ---
 ## API Request/Response Flow
 ### Example: POST /crawl
 **Request:**
 ```bash
 curl -X POST http://localhost:11235/crawl \
  -H "Content-Type: application/json" \
  -d '{
    "urls": ["https://example.com"],
    "browser_config": {
      "type": "BrowserConfig",
      "params": {"headless": true}
    },
    "crawler_config": {
      "type": "CrawlerRunConfig",
      "params": {"cache_mode": "bypass"}
    }
  }'
 ```
 **Processing:**
 ```
 1. FastAPI receives request → api.py:crawl_endpoint()
 2. Validate schema → Pydantic models in schemas.py
 3. Create job → job.py:Job(id=uuid4(), urls=[...])
 4. Queue to Redis → LPUSH crawl_queue {job_json}
 5. Get browser from pool → crawler_pool.py:get_crawler()
 6. Execute crawl:
   a. Launch page → browser.new_page()
   b. Navigate → page.goto(url)
   c. Extract → extraction_strategy.extract()
   d. Generate markdown → markdown_generator.generate()
 7. Store result → Redis SETEX result:{job_id} 3600 {result_json}
 8. Release browser → pool.release(browser_id)
 9. Return response:
   {
     "success": true,
     "result": {
       "url": "https://example.com",
       "markdown": "# Example Domain...",
       "metadata": {"title": "Example Domain"},
       "extracted_content": {...}
     }
   }
 ```
 **Error Cases:**
 - 400: Invalid request schema
 - 429: Rate limit exceeded
 - 500: Internal error (browser crash, timeout)
 - 503: Service unavailable (all browsers busy)
 ---
 ## Scaling Behavior
 ### Scale-Up (1 → 10 replicas)
 **Command:** `cnode scale 10`
 **Swarm Mode:**
 ```bash
 docker service scale crawl4ai-stack_crawl4ai=10
 # Docker handles:
 # - Container creation
 # - Network attachment
 # - Load balancer update
 # - Rolling deployment
 ```
 **Compose Mode:**
 ```bash
 # Update docker-compose.yml
 # Change replica count in all service definitions
 docker-compose up -d --scale crawl4ai=10
 # Regenerate nginx.conf with 10 upstreams
 docker exec nginx nginx -s reload
 ```
 **Load Distribution:**
 - Swarm: Built-in ingress network (VIP-based round-robin)
 - Compose: Nginx upstream (round-robin, can configure least_conn)
 **Zero-Downtime:**
 - Swarm: Yes (rolling update, parallelism=2)
 - Compose: Partial (nginx reload is graceful, but brief spike)
 ---
 ## Configuration Files
 ### `config.yml` - Server Configuration
 ```yaml
 server:
  port: 11235
  host: "0.0.0.0"
  workers: 4
 crawler:
  max_concurrent: 5
  timeout: 30
  retries: 3
 browser:
  pool_size: 3
  headless: true
  args:
    - "--no-sandbox"
    - "--disable-dev-shm-usage"
 redis:
  host: "localhost"
  port: 6379
  db: 0
 monitoring:
  enabled: true
  metrics_interval: 5  # seconds
 ```
 ### `supervisord.conf` - Process Management
 ```ini
 [supervisord]
 nodaemon=true
 [program:redis]
 command=redis-server --port 6379
 autorestart=true
 [program:fastapi]
 command=uvicorn server:app --host 0.0.0.0 --port 11235
 autorestart=true
 stdout_logfile=/var/log/crawl4ai/api.log
 [program:monitor]
 command=python monitor.py
 autorestart=true
 ```
 ---
 ## Testing & Quality
 ### Test Structure
 ```
 deploy/docker/tests/
 ├── cli/                    # CLI command tests
 │   └── test_commands.py    # start, stop, scale, status
 ├── monitor/                # Dashboard tests
 │   └── test_websocket.py   # WS connection, metrics
 └── codebase_test/          # Integration tests
    └── test_api.py         # End-to-end crawl tests
 ```
 ### Key Test Cases
 **CLI Tests:**
 - `test_start_single()` - Starts 1 replica
 - `test_start_cluster()` - Starts N replicas
 - `test_scale_up()` - Scales 1→5
 - `test_scale_down()` - Scales 5→2
 - `test_status()` - Reports correct state
 - `test_logs()` - Streams logs
 **API Tests:**
 - `test_crawl_success()` - Basic crawl works
 - `test_crawl_timeout()` - Handles slow sites
 - `test_concurrent_crawls()` - Parallel requests
 - `test_browser_pool()` - Reuses browsers
 - `test_memory_cleanup()` - No leaks after 100 crawls
 **Monitor Tests:**
 - `test_websocket_connect()` - WS handshake
 - `test_metrics_stream()` - Receives updates
 - `test_multiple_clients()` - Handles N connections
 ---
 ## Critical File Cross-Reference
 | Component | Primary File | Dependencies |
 |-----------|--------------|--------------|
 | **CLI Entry** | `cnode_cli.py:482` | `server_manager.py`, `click`, `rich` |
 | **Orchestrator** | `server_manager.py:45` | `docker`, `yaml`, `anyio` |
 | **API Server** | `server.py:120` | `api.py`, `monitor_routes.py` |
 | **Crawl Logic** | `api.py:78` | `crawler_pool.py`, `AsyncWebCrawler` |
 | **Browser Pool** | `crawler_pool.py:23` | `AsyncWebCrawler`, `asyncio` |
 | **Monitoring** | `monitor.py:156` | `redis`, `psutil` |
 | **Dashboard** | `monitor_routes.py:89` | `monitor.py`, `websockets` |
 | **Hooks** | `hook_manager.py:12` | `api.py`, custom user hooks |
 **Startup Chain:**
 ```
 cnode start
  └→ cnode_cli.py:start_cmd()
      └→ server_manager.py:start()
          └→ docker run/stack/compose
              └→ supervisord
                  ├→ redis-server
                  ├→ server.py
                  │   └→ api.py (routes)
                  │   └→ crawler_pool.py (init)
                  └→ monitor.py (collector)
 ```
 ---
 ## Symbolic Notation Summary
 ```
 ⊕ Addition/Creation      ⊖ Removal/Cleanup
 ⊗ Multiplication/Scale   ⊘ Division/Split
 → Flow/Dependency        ← Reverse flow
 ⇄ Bidirectional          ⇵ Up/Down scale
 ✓ Success/Complete       ✗ Failure/Error
 ⚠ Warning                ⚡ Performance critical
 🔒 Lock/Exclusive        🔓 Unlock/Shared
 📊 Metrics               📝 Logs
 🌐 Network               💾 Storage
 🧠 Memory                🐳 Docker
 ```
 **State Machine:**
 ```
 [STOPPED] →start→ [STARTING] →ready→ [RUNNING]
 [RUNNING] →scale→ [SCALING] →done→ [RUNNING]
 [RUNNING] →stop→ [STOPPING] →done→ [STOPPED]
 [ANY] →error→ [ERROR] →cleanup→ [STOPPED]
 ```
 ---
 ## Quick Reference Commands
 ```bash
 # Install
 curl -sSL https://raw.githubusercontent.com/unclecode/crawl4ai/main/deploy/installer/install-cnode.sh | bash
 # Deploy
 cnode start --replicas 5 --port 11235
 # Monitor
 open http://localhost:11235/monitor
 # Scale
 cnode scale 10
 # Test
 curl -X POST http://localhost:11235/crawl -H "Content-Type: application/json" -d '{"urls":["https://example.com"]}'
 # Logs
 cnode logs -f
 # Teardown
 cnode stop --remove-volumes
 ```
 ---
 ## Future AI Agent Instructions
 **When debugging:** Read this file first, then check specific source files mentioned.
 **When extending:** Follow existing patterns (context managers, async/await, type hints).
 **When optimizing:** Profile first (monitor.py metrics), then optimize hot paths (crawler_pool.py, api.py).
 **When troubleshooting memory:** Check browser pool cleanup, WebSocket connection tracking, Redis connection pooling.
 **When scaling issues:** Verify Docker mode (swarm vs compose), check nginx config if compose, review load balancer logs.
 ---
 **END OF ARCHITECTURE MAP**
 *Version: 1.0.0 | Last Updated: 2025-10-21 | Token-Optimized for AI Consumption*
--- a/deploy/docker/README.md
+++ b/deploy/docker/README.md
@@ -12,7 +12,6 @@
  - [Python SDK](#python-sdk)
  - [Understanding Request Schema](#understanding-request-schema)
  - [REST API Examples](#rest-api-examples)
  - [Asynchronous Jobs with Webhooks](#asynchronous-jobs-with-webhooks)
 - [Additional API Endpoints](#additional-api-endpoints)
  - [HTML Extraction Endpoint](#html-extraction-endpoint)
  - [Screenshot Endpoint](#screenshot-endpoint)
@@ -59,13 +58,15 @@ Pull and run images directly from Docker Hub without building locally.
 #### 1. Pull the Image
-Our latest stable release is `0.7.6`. Images are built with multi-arch manifests, so Docker automatically pulls the correct version for your system.
+Our latest release candidate is `0.7.0-r1`. Images are built with multi-arch manifests, so Docker automatically pulls the correct version for your system.
 > ⚠️ **Important Note**: The `latest` tag currently points to the stable `0.6.0` version. After testing and validation, `0.7.0` (without -r1) will be released and `latest` will be updated. For now, please use `0.7.0-r1` to test the new features.
 ```bash
-# Pull the latest stable version (0.7.6)
+# Pull the release candidate (for testing new features)
-docker pull unclecode/crawl4ai:0.7.6
+docker pull unclecode/crawl4ai:0.7.0-r1
-# Or use the latest tag (points to 0.7.6)
+# Or pull the current stable version (0.6.0)
 docker pull unclecode/crawl4ai:latest
 ```
@@ -100,7 +101,7 @@ EOL
      -p 11235:11235 \
      --name crawl4ai \
      --shm-size=1g \
-      unclecode/crawl4ai:0.7.6
+      unclecode/crawl4ai:0.7.0-r1
    ```
 *   **With LLM support:**
@@ -111,7 +112,7 @@ EOL
      --name crawl4ai \
      --env-file .llm.env \
      --shm-size=1g \
-      unclecode/crawl4ai:0.7.6
+      unclecode/crawl4ai:0.7.0-r1
    ```
 > The server will be available at `http://localhost:11235`. Visit `/playground` to access the interactive testing interface.
@@ -184,7 +185,7 @@ The `docker-compose.yml` file in the project root provides a simplified approach
    ```bash
    # Pulls and runs the release candidate from Docker Hub
    # Automatically selects the correct architecture
-    IMAGE=unclecode/crawl4ai:0.7.6 docker compose up -d
+    IMAGE=unclecode/crawl4ai:0.7.0-r1 docker compose up -d
    ```
 *   **Build and Run Locally:**
@@ -647,194 +648,6 @@ async def test_stream_crawl(token: str = None): # Made token optional
 # asyncio.run(test_stream_crawl())
 ```
 ### Asynchronous Jobs with Webhooks
 For long-running crawls or when you want to avoid keeping connections open, use the job queue endpoints. Instead of polling for results, configure a webhook to receive notifications when jobs complete.
 #### Why Use Jobs & Webhooks?
 - **No Polling Required** - Get notified when crawls complete instead of constantly checking status
 - **Better Resource Usage** - Free up client connections while jobs run in the background
 - **Scalable Architecture** - Ideal for high-volume crawling with TypeScript/Node.js clients or microservices
 - **Reliable Delivery** - Automatic retry with exponential backoff (5 attempts: 1s → 2s → 4s → 8s → 16s)
 #### How It Works
 1. **Submit Job** → POST to `/crawl/job` with optional `webhook_config`
 2. **Get Task ID** → Receive a `task_id` immediately
 3. **Job Runs** → Crawl executes in the background
 4. **Webhook Fired** → Server POSTs completion notification to your webhook URL
 5. **Fetch Results** → If data wasn't included in webhook, GET `/crawl/job/{task_id}`
 #### Quick Example
 ```bash
 # Submit a crawl job with webhook notification
 curl -X POST http://localhost:11235/crawl/job \
  -H "Content-Type: application/json" \
  -d '{
    "urls": ["https://example.com"],
    "webhook_config": {
      "webhook_url": "https://myapp.com/webhooks/crawl-complete",
      "webhook_data_in_payload": false
    }
  }'
 # Response: {"task_id": "crawl_a1b2c3d4"}
 ```
 **Your webhook receives:**
 ```json
 {
  "task_id": "crawl_a1b2c3d4",
  "task_type": "crawl",
  "status": "completed",
  "timestamp": "2025-10-21T10:30:00.000000+00:00",
  "urls": ["https://example.com"]
 }
 ```
 Then fetch the results:
 ```bash
 curl http://localhost:11235/crawl/job/crawl_a1b2c3d4
 ```
 #### Include Data in Webhook
 Set `webhook_data_in_payload: true` to receive the full crawl results directly in the webhook:
 ```bash
 curl -X POST http://localhost:11235/crawl/job \
  -H "Content-Type: application/json" \
  -d '{
    "urls": ["https://example.com"],
    "webhook_config": {
      "webhook_url": "https://myapp.com/webhooks/crawl-complete",
      "webhook_data_in_payload": true
    }
  }'
 ```
 **Your webhook receives the complete data:**
 ```json
 {
  "task_id": "crawl_a1b2c3d4",
  "task_type": "crawl",
  "status": "completed",
  "timestamp": "2025-10-21T10:30:00.000000+00:00",
  "urls": ["https://example.com"],
  "data": {
    "markdown": "...",
    "html": "...",
    "links": {...},
    "metadata": {...}
  }
 }
 ```
 #### Webhook Authentication
 Add custom headers for authentication:
 ```json
 {
  "urls": ["https://example.com"],
  "webhook_config": {
    "webhook_url": "https://myapp.com/webhooks/crawl",
    "webhook_data_in_payload": false,
    "webhook_headers": {
      "X-Webhook-Secret": "your-secret-token",
      "X-Service-ID": "crawl4ai-prod"
    }
  }
 }
 ```
 #### Global Default Webhook
 Configure a default webhook URL in `config.yml` for all jobs:
 ```yaml
 webhooks:
  enabled: true
  default_url: "https://myapp.com/webhooks/default"
  data_in_payload: false
  retry:
    max_attempts: 5
    initial_delay_ms: 1000
    max_delay_ms: 32000
    timeout_ms: 30000
 ```
 Now jobs without `webhook_config` automatically use the default webhook.
 #### Job Status Polling (Without Webhooks)
 If you prefer polling instead of webhooks, just omit `webhook_config`:
 ```bash
 # Submit job
 curl -X POST http://localhost:11235/crawl/job \
  -H "Content-Type: application/json" \
  -d '{"urls": ["https://example.com"]}'
 # Response: {"task_id": "crawl_xyz"}
 # Poll for status
 curl http://localhost:11235/crawl/job/crawl_xyz
 ```
 The response includes `status` field: `"processing"`, `"completed"`, or `"failed"`.
 #### LLM Extraction Jobs with Webhooks
 The same webhook system works for LLM extraction jobs via `/llm/job`:
 ```bash
 # Submit LLM extraction job with webhook
 curl -X POST http://localhost:11235/llm/job \
  -H "Content-Type: application/json" \
  -d '{
    "url": "https://example.com/article",
    "q": "Extract the article title, author, and main points",
    "provider": "openai/gpt-4o-mini",
    "webhook_config": {
      "webhook_url": "https://myapp.com/webhooks/llm-complete",
      "webhook_data_in_payload": true,
      "webhook_headers": {
        "X-Webhook-Secret": "your-secret-token"
      }
    }
  }'
 # Response: {"task_id": "llm_1234567890"}
 ```
 **Your webhook receives:**
 ```json
 {
  "task_id": "llm_1234567890",
  "task_type": "llm_extraction",
  "status": "completed",
  "timestamp": "2025-10-22T12:30:00.000000+00:00",
  "urls": ["https://example.com/article"],
  "data": {
    "extracted_content": {
      "title": "Understanding Web Scraping",
      "author": "John Doe",
      "main_points": ["Point 1", "Point 2", "Point 3"]
    }
  }
 }
 ```
 **Key Differences for LLM Jobs:**
 - Task type is `"llm_extraction"` instead of `"crawl"`
 - Extracted data is in `data.extracted_content`
 - Single URL only (not an array)
 - Supports schema-based extraction with `schema` parameter
 > 💡 **Pro tip**: See [WEBHOOK_EXAMPLES.md](./WEBHOOK_EXAMPLES.md) for detailed examples including TypeScript client code, Flask webhook handlers, and failure handling.
 ---
 ## Metrics & Monitoring
@@ -1017,7 +830,6 @@ In this guide, we've covered everything you need to get started with Crawl4AI's
 - Using the interactive playground for testing
 - Making API requests with proper typing
 - Using the Python SDK
 - Asynchronous job queues with webhook notifications
 - Leveraging specialized endpoints for screenshots, PDFs, and JavaScript execution
 - Connecting via the Model Context Protocol (MCP)
 - Monitoring your deployment
--- a/deploy/docker/WEBHOOK_EXAMPLES.md
+++ b/deploy/docker/WEBHOOK_EXAMPLES.md
@@ -1,378 +0,0 @@
 # Webhook Feature Examples
 This document provides examples of how to use the webhook feature for crawl jobs in Crawl4AI.
 ## Overview
 The webhook feature allows you to receive notifications when crawl jobs complete, eliminating the need for polling. Webhooks are sent with exponential backoff retry logic to ensure reliable delivery.
 ## Configuration
 ### Global Configuration (config.yml)
 You can configure default webhook settings in `config.yml`:
 ```yaml
 webhooks:
  enabled: true
  default_url: null  # Optional: default webhook URL for all jobs
  data_in_payload: false  # Optional: default behavior for including data
  retry:
    max_attempts: 5
    initial_delay_ms: 1000  # 1s, 2s, 4s, 8s, 16s exponential backoff
    max_delay_ms: 32000
    timeout_ms: 30000  # 30s timeout per webhook call
  headers:  # Optional: default headers to include
    User-Agent: "Crawl4AI-Webhook/1.0"
 ```
 ## API Usage Examples
 ### Example 1: Basic Webhook (Notification Only)
 Send a webhook notification without including the crawl data in the payload.
 **Request:**
 ```bash
 curl -X POST http://localhost:11235/crawl/job \
  -H "Content-Type: application/json" \
  -d '{
    "urls": ["https://example.com"],
    "webhook_config": {
      "webhook_url": "https://myapp.com/webhooks/crawl-complete",
      "webhook_data_in_payload": false
    }
  }'
 ```
 **Response:**
 ```json
 {
  "task_id": "crawl_a1b2c3d4"
 }
 ```
 **Webhook Payload Received:**
 ```json
 {
  "task_id": "crawl_a1b2c3d4",
  "task_type": "crawl",
  "status": "completed",
  "timestamp": "2025-10-21T10:30:00.000000+00:00",
  "urls": ["https://example.com"]
 }
 ```
 Your webhook handler should then fetch the results:
 ```bash
 curl http://localhost:11235/crawl/job/crawl_a1b2c3d4
 ```
 ### Example 2: Webhook with Data Included
 Include the full crawl results in the webhook payload.
 **Request:**
 ```bash
 curl -X POST http://localhost:11235/crawl/job \
  -H "Content-Type: application/json" \
  -d '{
    "urls": ["https://example.com"],
    "webhook_config": {
      "webhook_url": "https://myapp.com/webhooks/crawl-complete",
      "webhook_data_in_payload": true
    }
  }'
 ```
 **Webhook Payload Received:**
 ```json
 {
  "task_id": "crawl_a1b2c3d4",
  "task_type": "crawl",
  "status": "completed",
  "timestamp": "2025-10-21T10:30:00.000000+00:00",
  "urls": ["https://example.com"],
  "data": {
    "markdown": "...",
    "html": "...",
    "links": {...},
    "metadata": {...}
  }
 }
 ```
 ### Example 3: Webhook with Custom Headers
 Include custom headers for authentication or identification.
 **Request:**
 ```bash
 curl -X POST http://localhost:11235/crawl/job \
  -H "Content-Type: application/json" \
  -d '{
    "urls": ["https://example.com"],
    "webhook_config": {
      "webhook_url": "https://myapp.com/webhooks/crawl-complete",
      "webhook_data_in_payload": false,
      "webhook_headers": {
        "X-Webhook-Secret": "my-secret-token",
        "X-Service-ID": "crawl4ai-production"
      }
    }
  }'
 ```
 The webhook will be sent with these additional headers plus the default headers from config.
 ### Example 4: Failure Notification
 When a crawl job fails, a webhook is sent with error details.
 **Webhook Payload on Failure:**
 ```json
 {
  "task_id": "crawl_a1b2c3d4",
  "task_type": "crawl",
  "status": "failed",
  "timestamp": "2025-10-21T10:30:00.000000+00:00",
  "urls": ["https://example.com"],
  "error": "Connection timeout after 30s"
 }
 ```
 ### Example 5: Using Global Default Webhook
 If you set a `default_url` in config.yml, jobs without webhook_config will use it:
 **config.yml:**
 ```yaml
 webhooks:
  enabled: true
  default_url: "https://myapp.com/webhooks/default"
  data_in_payload: false
 ```
 **Request (no webhook_config needed):**
 ```bash
 curl -X POST http://localhost:11235/crawl/job \
  -H "Content-Type: application/json" \
  -d '{
    "urls": ["https://example.com"]
  }'
 ```
 The webhook will be sent to the default URL configured in config.yml.
 ### Example 6: LLM Extraction Job with Webhook
 Use webhooks with the LLM extraction endpoint for asynchronous processing.
 **Request:**
 ```bash
 curl -X POST http://localhost:11235/llm/job \
  -H "Content-Type: application/json" \
  -d '{
    "url": "https://example.com/article",
    "q": "Extract the article title, author, and publication date",
    "schema": "{\"type\": \"object\", \"properties\": {\"title\": {\"type\": \"string\"}, \"author\": {\"type\": \"string\"}, \"date\": {\"type\": \"string\"}}}",
    "cache": false,
    "provider": "openai/gpt-4o-mini",
    "webhook_config": {
      "webhook_url": "https://myapp.com/webhooks/llm-complete",
      "webhook_data_in_payload": true
    }
  }'
 ```
 **Response:**
 ```json
 {
  "task_id": "llm_1698765432_12345"
 }
 ```
 **Webhook Payload Received:**
 ```json
 {
  "task_id": "llm_1698765432_12345",
  "task_type": "llm_extraction",
  "status": "completed",
  "timestamp": "2025-10-21T10:30:00.000000+00:00",
  "urls": ["https://example.com/article"],
  "data": {
    "extracted_content": {
      "title": "Understanding Web Scraping",
      "author": "John Doe",
      "date": "2025-10-21"
    }
  }
 }
 ```
 ## Webhook Handler Example
 Here's a simple Python Flask webhook handler that supports both crawl and LLM extraction jobs:
 ```python
 from flask import Flask, request, jsonify
 import requests
 app = Flask(__name__)
@app.route('/webhooks/crawl-complete', methods=['POST'])
 def handle_crawl_webhook():
    payload = request.json
    task_id = payload['task_id']
    task_type = payload['task_type']
    status = payload['status']
    if status == 'completed':
        # If data not in payload, fetch it
        if 'data' not in payload:
            # Determine endpoint based on task type
            endpoint = 'crawl' if task_type == 'crawl' else 'llm'
            response = requests.get(f'http://localhost:11235/{endpoint}/job/{task_id}')
            data = response.json()
        else:
            data = payload['data']
        # Process based on task type
        if task_type == 'crawl':
            print(f"Processing crawl results for {task_id}")
            # Handle crawl results
            results = data.get('results', [])
            for result in results:
                print(f"  - {result.get('url')}: {len(result.get('markdown', ''))} chars")
        elif task_type == 'llm_extraction':
            print(f"Processing LLM extraction for {task_id}")
            # Handle LLM extraction
            # Note: Webhook sends 'extracted_content', API returns 'result'
            extracted = data.get('extracted_content', data.get('result', {}))
            print(f"  - Extracted: {extracted}")
        # Your business logic here...
    elif status == 'failed':
        error = payload.get('error', 'Unknown error')
        print(f"{task_type} job {task_id} failed: {error}")
        # Handle failure...
    return jsonify({"status": "received"}), 200
 if __name__ == '__main__':
    app.run(port=8080)
 ```
 ## Retry Logic
 The webhook delivery service uses exponential backoff retry logic:
 - **Attempts:** Up to 5 attempts by default
 - **Delays:** 1s → 2s → 4s → 8s → 16s
 - **Timeout:** 30 seconds per attempt
 - **Retry Conditions:**
  - Server errors (5xx status codes)
  - Network errors
  - Timeouts
 - **No Retry:**
  - Client errors (4xx status codes)
  - Successful delivery (2xx status codes)
 ## Benefits
 1. **No Polling Required** - Eliminates constant API calls to check job status
 2. **Real-time Notifications** - Immediate notification when jobs complete
 3. **Reliable Delivery** - Exponential backoff ensures webhooks are delivered
 4. **Flexible** - Choose between notification-only or full data delivery
 5. **Secure** - Support for custom headers for authentication
 6. **Configurable** - Global defaults or per-job configuration
 7. **Universal Support** - Works with both `/crawl/job` and `/llm/job` endpoints
 ## TypeScript Client Example
 ```typescript
 interface WebhookConfig {
  webhook_url: string;
  webhook_data_in_payload?: boolean;
  webhook_headers?: Record<string, string>;
 }
 interface CrawlJobRequest {
  urls: string[];
  browser_config?: Record<string, any>;
  crawler_config?: Record<string, any>;
  webhook_config?: WebhookConfig;
 }
 interface LLMJobRequest {
  url: string;
  q: string;
  schema?: string;
  cache?: boolean;
  provider?: string;
  webhook_config?: WebhookConfig;
 }
 async function createCrawlJob(request: CrawlJobRequest) {
  const response = await fetch('http://localhost:11235/crawl/job', {
    method: 'POST',
    headers: { 'Content-Type': 'application/json' },
    body: JSON.stringify(request)
  });
  const { task_id } = await response.json();
  return task_id;
 }
 async function createLLMJob(request: LLMJobRequest) {
  const response = await fetch('http://localhost:11235/llm/job', {
    method: 'POST',
    headers: { 'Content-Type': 'application/json' },
    body: JSON.stringify(request)
  });
  const { task_id } = await response.json();
  return task_id;
 }
 // Usage - Crawl Job
 const crawlTaskId = await createCrawlJob({
  urls: ['https://example.com'],
  webhook_config: {
    webhook_url: 'https://myapp.com/webhooks/crawl-complete',
    webhook_data_in_payload: false,
    webhook_headers: {
      'X-Webhook-Secret': 'my-secret'
    }
  }
 });
 // Usage - LLM Extraction Job
 const llmTaskId = await createLLMJob({
  url: 'https://example.com/article',
  q: 'Extract the main points from this article',
  provider: 'openai/gpt-4o-mini',
  webhook_config: {
    webhook_url: 'https://myapp.com/webhooks/llm-complete',
    webhook_data_in_payload: true,
    webhook_headers: {
      'X-Webhook-Secret': 'my-secret'
    }
  }
 });
 ```
 ## Monitoring and Debugging
 Webhook delivery attempts are logged at INFO level:
 - Successful deliveries
 - Retry attempts with delays
 - Final failures after max attempts
 Check the application logs for webhook delivery status:
 ```bash
 docker logs crawl4ai-container | grep -i webhook
 ```
--- a/deploy/docker/init.py
+++ b/deploy/docker/init.py
@@ -1 +0,0 @@
 # Deploy docker module
--- a/deploy/docker/api.py
+++ b/deploy/docker/api.py
@@ -46,7 +46,6 @@ from utils import (
    get_llm_temperature,
    get_llm_base_url
 )
 from webhook import WebhookDeliveryService
 import psutil, time
@@ -67,7 +66,6 @@ async def handle_llm_qa(
    config: dict
 ) -> str:
    """Process QA using LLM with crawled content as context."""
    from crawler_pool import get_crawler
    try:
        if not url.startswith(('http://', 'https://')) and not url.startswith(("raw:", "raw://")):
            url = 'https://' + url
@@ -76,21 +74,15 @@ async def handle_llm_qa(
        if last_q_index != -1:
            url = url[:last_q_index]
-        # Get markdown content (use default config)
+        # Get markdown content
-        from utils import load_config
+        async with AsyncWebCrawler() as crawler:
-        cfg = load_config()
+            result = await crawler.arun(url)
-        browser_cfg = BrowserConfig(
+            if not result.success:
-            extra_args=cfg["crawler"]["browser"].get("extra_args", []),
+                raise HTTPException(
-            **cfg["crawler"]["browser"].get("kwargs", {}),
+                    status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-        )
+                    detail=result.error_message
-        crawler = await get_crawler(browser_cfg)
+                )
-        result = await crawler.arun(url)
+            content = result.markdown.fit_markdown or result.markdown.raw_markdown
        if not result.success:
            raise HTTPException(
                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
                detail=result.error_message
            )
        content = result.markdown.fit_markdown or result.markdown.raw_markdown
        # Create prompt and get LLM response
        prompt = f"""Use the following content as context to answer the question.
@@ -128,14 +120,10 @@ async def process_llm_extraction(
    schema: Optional[str] = None,
    cache: str = "0",
    provider: Optional[str] = None,
    webhook_config: Optional[Dict] = None,
    temperature: Optional[float] = None,
    base_url: Optional[str] = None
 ) -> None:
    """Process LLM extraction in background."""
    # Initialize webhook service
    webhook_service = WebhookDeliveryService(config)
    try:
        # Validate provider
        is_valid, error_msg = validate_llm_provider(config, provider)
@@ -144,16 +132,6 @@ async def process_llm_extraction(
                "status": TaskStatus.FAILED,
                "error": error_msg
            })
            # Send webhook notification on failure
            await webhook_service.notify_job_completion(
                task_id=task_id,
                task_type="llm_extraction",
                status="failed",
                urls=[url],
                webhook_config=webhook_config,
                error=error_msg
            )
            return
        api_key = get_llm_api_key(config, provider)  # Returns None to let litellm handle it
        llm_strategy = LLMExtractionStrategy(
@@ -184,40 +162,17 @@ async def process_llm_extraction(
                "status": TaskStatus.FAILED,
                "error": result.error_message
            })
            # Send webhook notification on failure
            await webhook_service.notify_job_completion(
                task_id=task_id,
                task_type="llm_extraction",
                status="failed",
                urls=[url],
                webhook_config=webhook_config,
                error=result.error_message
            )
            return
        try:
            content = json.loads(result.extracted_content)
        except json.JSONDecodeError:
            content = result.extracted_content
        result_data = {"extracted_content": content}
        await redis.hset(f"task:{task_id}", mapping={
            "status": TaskStatus.COMPLETED,
            "result": json.dumps(content)
        })
        # Send webhook notification on successful completion
        await webhook_service.notify_job_completion(
            task_id=task_id,
            task_type="llm_extraction",
            status="completed",
            urls=[url],
            webhook_config=webhook_config,
            result=result_data
        )
    except Exception as e:
        logger.error(f"LLM extraction error: {str(e)}", exc_info=True)
        await redis.hset(f"task:{task_id}", mapping={
@@ -225,16 +180,6 @@ async def process_llm_extraction(
            "error": str(e)
        })
        # Send webhook notification on failure
        await webhook_service.notify_job_completion(
            task_id=task_id,
            task_type="llm_extraction",
            status="failed",
            urls=[url],
            webhook_config=webhook_config,
            error=str(e)
        )
 async def handle_markdown_request(
    url: str,
    filter_type: FilterType,
@@ -279,32 +224,25 @@ async def handle_markdown_request(
        cache_mode = CacheMode.ENABLED if cache == "1" else CacheMode.WRITE_ONLY
-        from crawler_pool import get_crawler
+        async with AsyncWebCrawler() as crawler:
-        from utils import load_config as _load_config
+            result = await crawler.arun(
-        _cfg = _load_config()
+                url=decoded_url,
-        browser_cfg = BrowserConfig(
+                config=CrawlerRunConfig(
-            extra_args=_cfg["crawler"]["browser"].get("extra_args", []),
+                    markdown_generator=md_generator,
-            **_cfg["crawler"]["browser"].get("kwargs", {}),
+                    scraping_strategy=LXMLWebScrapingStrategy(),
-        )
+                    cache_mode=cache_mode
-        crawler = await get_crawler(browser_cfg)
+                )
        result = await crawler.arun(
            url=decoded_url,
            config=CrawlerRunConfig(
                markdown_generator=md_generator,
                scraping_strategy=LXMLWebScrapingStrategy(),
                cache_mode=cache_mode
            )
        )
        if not result.success:
            raise HTTPException(
                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
                detail=result.error_message
            )
-        return (result.markdown.raw_markdown
+            if not result.success:
-               if filter_type == FilterType.RAW
+                raise HTTPException(
-               else result.markdown.fit_markdown)
+                    status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
                    detail=result.error_message
                )
            return (result.markdown.raw_markdown 
                   if filter_type == FilterType.RAW 
                   else result.markdown.fit_markdown)
    except Exception as e:
        logger.error(f"Markdown error: {str(e)}", exc_info=True)
@@ -323,7 +261,6 @@ async def handle_llm_request(
    cache: str = "0",
    config: Optional[dict] = None,
    provider: Optional[str] = None,
    webhook_config: Optional[Dict] = None,
    temperature: Optional[float] = None,
    api_base_url: Optional[str] = None
 ) -> JSONResponse:
@@ -357,7 +294,6 @@ async def handle_llm_request(
            base_url,
            config,
            provider,
            webhook_config,
            temperature,
            api_base_url
        )
@@ -405,7 +341,6 @@ async def create_new_task(
    base_url: str,
    config: dict,
    provider: Optional[str] = None,
    webhook_config: Optional[Dict] = None,
    temperature: Optional[float] = None,
    api_base_url: Optional[str] = None
 ) -> JSONResponse:
@@ -417,17 +352,11 @@ async def create_new_task(
    from datetime import datetime
    task_id = f"llm_{int(datetime.now().timestamp())}_{id(background_tasks)}"
-    task_data = {
+    await redis.hset(f"task:{task_id}", mapping={
        "status": TaskStatus.PROCESSING,
        "created_at": datetime.now().isoformat(),
        "url": decoded_url
-    }
+    })
    # Store webhook config if provided
    if webhook_config:
        task_data["webhook_config"] = json.dumps(webhook_config)
    await redis.hset(f"task:{task_id}", mapping=task_data)
    background_tasks.add_task(
        process_llm_extraction,
@@ -439,7 +368,6 @@ async def create_new_task(
        schema,
        cache,
        provider,
        webhook_config,
        temperature,
        api_base_url
    )
@@ -514,25 +442,13 @@ async def handle_crawl_request(
    urls: List[str],
    browser_config: dict,
    crawler_config: dict,
-    config: dict,
+    config: dict
    hooks_config: Optional[dict] = None
 ) -> dict:
-    """Handle non-streaming crawl requests with optional hooks."""
+    """Handle non-streaming crawl requests."""
    # Track request start
    request_id = f"req_{uuid4().hex[:8]}"
    try:
        from monitor import get_monitor
        await get_monitor().track_request_start(
            request_id, "/crawl", urls[0] if urls else "batch", browser_config
        )
    except:
        pass  # Monitor not critical
    start_mem_mb = _get_memory_mb() # <--- Get memory before
    start_time = time.time()
    mem_delta_mb = None
    peak_mem_mb = start_mem_mb
    hook_manager = None
    try:
        urls = [('https://' + url) if not url.startswith(('http://', 'https://')) and not url.startswith(("raw:", "raw://")) else url for url in urls]
@@ -552,27 +468,11 @@ async def handle_crawl_request(
        # crawler: AsyncWebCrawler = AsyncWebCrawler(config=browser_config)
        # await crawler.start()
        # Attach hooks if provided
        hooks_status = {}
        if hooks_config:
            from hook_manager import attach_user_hooks_to_crawler, UserHookManager
            hook_manager = UserHookManager(timeout=hooks_config.get('timeout', 30))
            hooks_status, hook_manager = await attach_user_hooks_to_crawler(
                crawler,
                hooks_config.get('code', {}),
                timeout=hooks_config.get('timeout', 30),
                hook_manager=hook_manager
            )
            logger.info(f"Hooks attachment status: {hooks_status['status']}")
        base_config = config["crawler"]["base_config"]
-        # Iterate on key-value pairs in global_config then use hasattr to set them
+        # Iterate on key-value pairs in global_config then use haseattr to set them 
        for key, value in base_config.items():
            if hasattr(crawler_config, key):
-                current_value = getattr(crawler_config, key)
+                setattr(crawler_config, key, value)
                # Only set base config if user didn't provide a value
                if current_value is None or current_value == "":
                    setattr(crawler_config, key, value)
        results = []
        func = getattr(crawler, "arun" if len(urls) == 1 else "arun_many")
@@ -582,10 +482,6 @@ async def handle_crawl_request(
                                dispatcher=dispatcher)
        results = await partial_func()
        # Ensure results is always a list
        if not isinstance(results, list):
            results = [results]
        # await crawler.close()
        end_mem_mb = _get_memory_mb() # <--- Get memory after
@@ -599,39 +495,16 @@ async def handle_crawl_request(
        # Process results to handle PDF bytes
        processed_results = []
        for result in results:
-            try:
+            result_dict = result.model_dump()
-                # Check if result has model_dump method (is a proper CrawlResult)
+            # if fit_html is not a string, set it to None to avoid serialization errors
-                if hasattr(result, 'model_dump'):
+            if "fit_html" in result_dict and not (result_dict["fit_html"] is None or isinstance(result_dict["fit_html"], str)):
-                    result_dict = result.model_dump()
+                result_dict["fit_html"] = None
-                elif isinstance(result, dict):
+            # If PDF exists, encode it to base64
-                    result_dict = result
+            if result_dict.get('pdf') is not None:
-                else:
+                result_dict['pdf'] = b64encode(result_dict['pdf']).decode('utf-8')
-                    # Handle unexpected result type
+            processed_results.append(result_dict)
                    logger.warning(f"Unexpected result type: {type(result)}")
                    result_dict = {
                        "url": str(result) if hasattr(result, '__str__') else "unknown",
                        "success": False,
                        "error_message": f"Unexpected result type: {type(result).__name__}"
                    }
-                # if fit_html is not a string, set it to None to avoid serialization errors
+        return {
                if "fit_html" in result_dict and not (result_dict["fit_html"] is None or isinstance(result_dict["fit_html"], str)):
                    result_dict["fit_html"] = None
                # If PDF exists, encode it to base64
                if result_dict.get('pdf') is not None and isinstance(result_dict.get('pdf'), bytes):
                    result_dict['pdf'] = b64encode(result_dict['pdf']).decode('utf-8')
                processed_results.append(result_dict)
            except Exception as e:
                logger.error(f"Error processing result: {e}")
                processed_results.append({
                    "url": "unknown",
                    "success": False,
                    "error_message": str(e)
                })
        response = {
            "success": True,
            "results": processed_results,
            "server_processing_time_s": end_time - start_time,
@@ -639,53 +512,8 @@ async def handle_crawl_request(
            "server_peak_memory_mb": peak_mem_mb
        }
        # Track request completion
        try:
            from monitor import get_monitor
            await get_monitor().track_request_end(
                request_id, success=True, pool_hit=True, status_code=200
            )
        except:
            pass
        # Add hooks information if hooks were used
        if hooks_config and hook_manager:
            from hook_manager import UserHookManager
            if isinstance(hook_manager, UserHookManager):
                try:
                    # Ensure all hook data is JSON serializable
                    hook_data = {
                        "status": hooks_status,
                        "execution_log": hook_manager.execution_log,
                        "errors": hook_manager.errors,
                        "summary": hook_manager.get_summary()
                    }
                    # Test that it's serializable
                    json.dumps(hook_data)
                    response["hooks"] = hook_data
                except (TypeError, ValueError) as e:
                    logger.error(f"Hook data not JSON serializable: {e}")
                    response["hooks"] = {
                        "status": {"status": "error", "message": "Hook data serialization failed"},
                        "execution_log": [],
                        "errors": [{"error": str(e)}],
                        "summary": {}
                    }
        return response
    except Exception as e:
        logger.error(f"Crawl error: {str(e)}", exc_info=True)
        # Track request error
        try:
            from monitor import get_monitor
            await get_monitor().track_request_end(
                request_id, success=False, error=str(e), status_code=500
            )
        except:
            pass
        if 'crawler' in locals() and crawler.ready: # Check if crawler was initialized and started
            #  try:
            #      await crawler.close()
@@ -711,11 +539,9 @@ async def handle_stream_crawl_request(
    urls: List[str],
    browser_config: dict,
    crawler_config: dict,
-    config: dict,
+    config: dict
-    hooks_config: Optional[dict] = None
+) -> Tuple[AsyncWebCrawler, AsyncGenerator]:
-) -> Tuple[AsyncWebCrawler, AsyncGenerator, Optional[Dict]]:
+    """Handle streaming crawl requests."""
    """Handle streaming crawl requests with optional hooks."""
    hooks_info = None
    try:
        browser_config = BrowserConfig.load(browser_config)
        # browser_config.verbose = True # Set to False or remove for production stress testing
@@ -737,27 +563,13 @@ async def handle_stream_crawl_request(
        # crawler = AsyncWebCrawler(config=browser_config)
        # await crawler.start()
        # Attach hooks if provided
        if hooks_config:
            from hook_manager import attach_user_hooks_to_crawler, UserHookManager
            hook_manager = UserHookManager(timeout=hooks_config.get('timeout', 30))
            hooks_status, hook_manager = await attach_user_hooks_to_crawler(
                crawler,
                hooks_config.get('code', {}),
                timeout=hooks_config.get('timeout', 30),
                hook_manager=hook_manager
            )
            logger.info(f"Hooks attachment status for streaming: {hooks_status['status']}")
            # Include hook manager in hooks_info for proper tracking
            hooks_info = {'status': hooks_status, 'manager': hook_manager}
        results_gen = await crawler.arun_many(
            urls=urls,
            config=crawler_config,
            dispatcher=dispatcher
        )
-        return crawler, results_gen, hooks_info
+        return crawler, results_gen
    except Exception as e:
        # Make sure to close crawler if started during an error here
@@ -781,7 +593,6 @@ async def handle_crawl_job(
    browser_config: Dict,
    crawler_config: Dict,
    config: Dict,
    webhook_config: Optional[Dict] = None,
 ) -> Dict:
    """
    Fire-and-forget version of handle_crawl_request.
@@ -789,24 +600,13 @@ async def handle_crawl_job(
    lets /crawl/job/{task_id} polling fetch the result.
    """
    task_id = f"crawl_{uuid4().hex[:8]}"
-
+    await redis.hset(f"task:{task_id}", mapping={
    # Store task data in Redis
    task_data = {
        "status": TaskStatus.PROCESSING,         # <-- keep enum values consistent
        "created_at": datetime.now(timezone.utc).replace(tzinfo=None).isoformat(),
        "url": json.dumps(urls),                 # store list as JSON string
        "result": "",
        "error": "",
-    }
+    })
    # Store webhook config if provided
    if webhook_config:
        task_data["webhook_config"] = json.dumps(webhook_config)
    await redis.hset(f"task:{task_id}", mapping=task_data)
    # Initialize webhook service
    webhook_service = WebhookDeliveryService(config)
    async def _runner():
        try:
@@ -820,17 +620,6 @@ async def handle_crawl_job(
                "status": TaskStatus.COMPLETED,
                "result": json.dumps(result),
            })
            # Send webhook notification on successful completion
            await webhook_service.notify_job_completion(
                task_id=task_id,
                task_type="crawl",
                status="completed",
                urls=urls,
                webhook_config=webhook_config,
                result=result
            )
            await asyncio.sleep(5)  # Give Redis time to process the update
        except Exception as exc:
            await redis.hset(f"task:{task_id}", mapping={
@@ -838,15 +627,5 @@ async def handle_crawl_job(
                "error": str(exc),
            })
            # Send webhook notification on failure
            await webhook_service.notify_job_completion(
                task_id=task_id,
                task_type="crawl",
                status="failed",
                urls=urls,
                webhook_config=webhook_config,
                error=str(exc)
            )
    background_tasks.add_task(_runner)
    return {"task_id": task_id}
--- a/deploy/docker/docs/c4ai-code-context.md
+++ b/deploy/docker/docs/c4ai-code-context.md
--- a/deploy/docker/docs/c4ai-doc-context.md
+++ b/deploy/docker/docs/c4ai-doc-context.md
--- a/deploy/docker/cnode_cli.py
+++ b/deploy/docker/cnode_cli.py
@@ -1,492 +0,0 @@
 """
 Crawl4AI Server CLI Commands
 Provides `cnode` command group for Docker orchestration.
 """
 import click
 import anyio
 from rich.console import Console
 from rich.table import Table
 from rich.panel import Panel
 from rich.prompt import Confirm
 from deploy.docker.server_manager import ServerManager
 console = Console()
@click.group()
 def cli():
    """Manage Crawl4AI Docker server instances
    \b
    One-command deployment with automatic scaling:
      • Single container for development (N=1)
      • Docker Swarm for production with built-in load balancing (N>1)
      • Docker Compose + Nginx as fallback (N>1)
    \b
    Examples:
      cnode start                    # Single container on port 11235
      cnode start --replicas 3       # Auto-detect Swarm or Compose
      cnode start -r 5 --port 8080   # 5 replicas on custom port
      cnode status                   # Check current deployment
      cnode scale 10                 # Scale to 10 replicas
      cnode stop                     # Stop and cleanup
    """
    pass
@cli.command("start")
@click.option(
    "--replicas", "-r",
    type=int,
    default=1,
    help="Number of container replicas (default: 1)"
 )
@click.option(
    "--mode",
    type=click.Choice(["auto", "single", "swarm", "compose"]),
    default="auto",
    help="Deployment mode (default: auto-detect)"
 )
@click.option(
    "--port", "-p",
    type=int,
    default=11235,
    help="External port to expose (default: 11235)"
 )
@click.option(
    "--env-file",
    type=click.Path(exists=True),
    help="Path to environment file"
 )
@click.option(
    "--image",
    default="unclecode/crawl4ai:latest",
    help="Docker image to use (default: unclecode/crawl4ai:latest)"
 )
 def start_cmd(replicas: int, mode: str, port: int, env_file: str, image: str):
    """Start Crawl4AI server with automatic orchestration.
    Deployment modes:
    - auto: Automatically choose best mode (default)
    - single: Single container (N=1 only)
    - swarm: Docker Swarm with built-in load balancing
    - compose: Docker Compose + Nginx reverse proxy
    The server will:
    1. Check if Docker is running
    2. Validate port availability
    3. Pull image if needed
    4. Start container(s) with health checks
    5. Save state for management
    Examples:
        # Development: single container
        cnode start
        # Production: 5 replicas with Swarm
        cnode start --replicas 5
        # Custom configuration
        cnode start -r 3 --port 8080 --env-file .env.prod
    """
    manager = ServerManager()
    console.print(Panel(
        f"[cyan]Starting Crawl4AI Server[/cyan]\n\n"
        f"Replicas: [yellow]{replicas}[/yellow]\n"
        f"Mode: [yellow]{mode}[/yellow]\n"
        f"Port: [yellow]{port}[/yellow]\n"
        f"Image: [yellow]{image}[/yellow]",
        title="Server Start",
        border_style="cyan"
    ))
    with console.status("[cyan]Starting server..."):
        async def _start():
            return await manager.start(
                replicas=replicas,
                mode=mode,
                port=port,
                env_file=env_file,
                image=image
            )
        result = anyio.run(_start)
    if result["success"]:
        console.print(Panel(
            f"[green]✓ Server started successfully![/green]\n\n"
            f"Mode: [cyan]{result.get('state_data', {}).get('mode', mode)}[/cyan]\n"
            f"URL: [bold]http://localhost:{port}[/bold]\n"
            f"Health: [bold]http://localhost:{port}/health[/bold]\n"
            f"Monitor: [bold]http://localhost:{port}/monitor[/bold]",
            title="Server Running",
            border_style="green"
        ))
    else:
        error_msg = result.get("error", result.get("message", "Unknown error"))
        console.print(Panel(
            f"[red]✗ Failed to start server[/red]\n\n"
            f"{error_msg}",
            title="Error",
            border_style="red"
        ))
        if "already running" in error_msg.lower():
            console.print("\n[yellow]Hint: Use 'cnode status' to check current deployment[/yellow]")
            console.print("[yellow]      Use 'cnode stop' to stop existing server[/yellow]")
@cli.command("status")
 def status_cmd():
    """Show current server status and deployment info.
    Displays:
    - Running state (up/down)
    - Deployment mode (single/swarm/compose)
    - Number of replicas
    - Port mapping
    - Uptime
    - Image version
    Example:
        cnode status
    """
    manager = ServerManager()
    async def _status():
        return await manager.status()
    result = anyio.run(_status)
    if result["running"]:
        table = Table(title="Crawl4AI Server Status", border_style="green")
        table.add_column("Property", style="cyan")
        table.add_column("Value", style="green")
        table.add_row("Status", "🟢 Running")
        table.add_row("Mode", result["mode"])
        table.add_row("Replicas", str(result.get("replicas", 1)))
        table.add_row("Port", str(result.get("port", 11235)))
        table.add_row("Image", result.get("image", "unknown"))
        table.add_row("Uptime", result.get("uptime", "unknown"))
        table.add_row("Started", result.get("started_at", "unknown"))
        console.print(table)
        console.print(f"\n[green]✓ Server is healthy[/green]")
        console.print(f"[dim]Access: http://localhost:{result.get('port', 11235)}[/dim]")
    else:
        console.print(Panel(
            f"[yellow]No server is currently running[/yellow]\n\n"
            f"Use 'cnode start' to launch a server",
            title="Server Status",
            border_style="yellow"
        ))
@cli.command("stop")
@click.option(
    "--remove-volumes",
    is_flag=True,
    help="Remove associated volumes (WARNING: deletes data)"
 )
 def stop_cmd(remove_volumes: bool):
    """Stop running Crawl4AI server and cleanup resources.
    This will:
    1. Stop all running containers/services
    2. Remove containers
    3. Optionally remove volumes (--remove-volumes)
    4. Clean up state files
    WARNING: Use --remove-volumes with caution as it will delete
    persistent data including Redis databases and logs.
    Examples:
        # Stop server, keep volumes
        cnode stop
        # Stop and remove all data
        cnode stop --remove-volumes
    """
    manager = ServerManager()
    # Confirm if removing volumes
    if remove_volumes:
        if not Confirm.ask(
            "[red]⚠️  This will delete all server data including Redis databases. Continue?[/red]"
        ):
            console.print("[yellow]Cancelled[/yellow]")
            return
    with console.status("[cyan]Stopping server..."):
        async def _stop():
            return await manager.stop(remove_volumes=remove_volumes)
        result = anyio.run(_stop)
    if result["success"]:
        console.print(Panel(
            f"[green]✓ Server stopped successfully[/green]\n\n"
            f"{result.get('message', 'All resources cleaned up')}",
            title="Server Stopped",
            border_style="green"
        ))
    else:
        console.print(Panel(
            f"[red]✗ Error stopping server[/red]\n\n"
            f"{result.get('error', result.get('message', 'Unknown error'))}",
            title="Error",
            border_style="red"
        ))
@cli.command("scale")
@click.argument("replicas", type=int)
 def scale_cmd(replicas: int):
    """Scale server to specified number of replicas.
    Only works with Swarm or Compose modes. Single container
    mode cannot be scaled (must stop and restart with --replicas).
    Scaling is live and does not require downtime. The load
    balancer will automatically distribute traffic to new replicas.
    Examples:
        # Scale up to 10 replicas
        cnode scale 10
        # Scale down to 2 replicas
        cnode scale 2
        # Scale to 1 (minimum)
        cnode scale 1
    """
    if replicas < 1:
        console.print("[red]Error: Replicas must be at least 1[/red]")
        return
    manager = ServerManager()
    with console.status(f"[cyan]Scaling to {replicas} replicas..."):
        async def _scale():
            return await manager.scale(replicas=replicas)
        result = anyio.run(_scale)
    if result["success"]:
        console.print(Panel(
            f"[green]✓ Scaled successfully[/green]\n\n"
            f"New replica count: [bold]{replicas}[/bold]\n"
            f"Mode: [cyan]{result.get('mode')}[/cyan]",
            title="Scaling Complete",
            border_style="green"
        ))
    else:
        error_msg = result.get("error", result.get("message", "Unknown error"))
        console.print(Panel(
            f"[red]✗ Scaling failed[/red]\n\n"
            f"{error_msg}",
            title="Error",
            border_style="red"
        ))
        if "single container" in error_msg.lower():
            console.print("\n[yellow]Hint: For single container mode:[/yellow]")
            console.print("[yellow]  1. cnode stop[/yellow]")
            console.print(f"[yellow]  2. cnode start --replicas {replicas}[/yellow]")
@cli.command("logs")
@click.option(
    "--follow", "-f",
    is_flag=True,
    help="Follow log output (like tail -f)"
 )
@click.option(
    "--tail",
    type=int,
    default=100,
    help="Number of lines to show (default: 100)"
 )
 def logs_cmd(follow: bool, tail: int):
    """View server logs.
    Shows logs from running containers/services. Use --follow
    to stream logs in real-time.
    Examples:
        # Show last 100 lines
        cnode logs
        # Show last 500 lines
        cnode logs --tail 500
        # Follow logs in real-time
        cnode logs --follow
        # Combine options
        cnode logs -f --tail 50
    """
    manager = ServerManager()
    async def _logs():
        return await manager.logs(follow=follow, tail=tail)
    output = anyio.run(_logs)
    console.print(output)
@cli.command("cleanup")
@click.option(
    "--force",
    is_flag=True,
    help="Force cleanup even if state file doesn't exist"
 )
 def cleanup_cmd(force: bool):
    """Force cleanup of all Crawl4AI Docker resources.
    Stops and removes all containers, networks, and optionally volumes.
    Useful when server is stuck or state is corrupted.
    Examples:
        # Clean up everything
        cnode cleanup
        # Force cleanup (ignore state file)
        cnode cleanup --force
    """
    manager = ServerManager()
    console.print(Panel(
        f"[yellow]⚠️  Cleaning up Crawl4AI Docker resources[/yellow]\n\n"
        f"This will stop and remove:\n"
        f"- All Crawl4AI containers\n"
        f"- Nginx load balancer\n"
        f"- Redis instance\n"
        f"- Docker networks\n"
        f"- State files",
        title="Cleanup",
        border_style="yellow"
    ))
    if not force and not Confirm.ask("[yellow]Continue with cleanup?[/yellow]"):
        console.print("[yellow]Cancelled[/yellow]")
        return
    with console.status("[cyan]Cleaning up resources..."):
        async def _cleanup():
            return await manager.cleanup(force=force)
        result = anyio.run(_cleanup)
    if result["success"]:
        console.print(Panel(
            f"[green]✓ Cleanup completed successfully[/green]\n\n"
            f"Removed: {result.get('removed', 0)} containers\n"
            f"{result.get('message', 'All resources cleaned up')}",
            title="Cleanup Complete",
            border_style="green"
        ))
    else:
        console.print(Panel(
            f"[yellow]⚠️  Partial cleanup[/yellow]\n\n"
            f"{result.get('message', 'Some resources may still exist')}",
            title="Cleanup Status",
            border_style="yellow"
        ))
@cli.command("restart")
@click.option(
    "--replicas", "-r",
    type=int,
    help="New replica count (optional)"
 )
 def restart_cmd(replicas: int):
    """Restart server (stop then start with same config).
    Preserves existing configuration unless overridden with options.
    Useful for applying image updates or recovering from errors.
    Examples:
        # Restart with same configuration
        cnode restart
        # Restart and change replica count
        cnode restart --replicas 5
    """
    manager = ServerManager()
    # Get current state
    async def _get_status():
        return await manager.status()
    current = anyio.run(_get_status)
    if not current["running"]:
        console.print("[yellow]No server is running. Use 'cnode start' instead.[/yellow]")
        return
    # Extract current config
    current_replicas = current.get("replicas", 1)
    current_port = current.get("port", 11235)
    current_image = current.get("image", "unclecode/crawl4ai:latest")
    current_mode = current.get("mode", "auto")
    # Override with CLI args
    new_replicas = replicas if replicas is not None else current_replicas
    console.print(Panel(
        f"[cyan]Restarting Crawl4AI Server[/cyan]\n\n"
        f"Replicas: [yellow]{current_replicas}[/yellow] → [green]{new_replicas}[/green]\n"
        f"Port: [yellow]{current_port}[/yellow]\n"
        f"Mode: [yellow]{current_mode}[/yellow]",
        title="Server Restart",
        border_style="cyan"
    ))
    # Stop current
    with console.status("[cyan]Stopping current server..."):
        async def _stop_server():
            return await manager.stop(remove_volumes=False)
        stop_result = anyio.run(_stop_server)
    if not stop_result["success"]:
        console.print(f"[red]Failed to stop server: {stop_result.get('error')}[/red]")
        return
    # Start new
    with console.status("[cyan]Starting server..."):
        async def _start_server():
            return await manager.start(
                replicas=new_replicas,
                mode="auto",
                port=current_port,
                image=current_image
            )
        start_result = anyio.run(_start_server)
    if start_result["success"]:
        console.print(Panel(
            f"[green]✓ Server restarted successfully![/green]\n\n"
            f"URL: [bold]http://localhost:{current_port}[/bold]",
            title="Restart Complete",
            border_style="green"
        ))
    else:
        console.print(Panel(
            f"[red]✗ Failed to restart server[/red]\n\n"
            f"{start_result.get('error', 'Unknown error')}",
            title="Error",
            border_style="red"
        ))
 def main():
    """Entry point for cnode CLI"""
    cli()
 if __name__ == "__main__":
    main()
 # Test comment
--- a/deploy/docker/config.yml
+++ b/deploy/docker/config.yml
@@ -3,7 +3,7 @@ app:
  title: "Crawl4AI API"
  version: "1.0.0"
  host: "0.0.0.0"
-  port: 11235
+  port: 11234
  reload: False
  workers: 1
  timeout_keep_alive: 300
@@ -61,7 +61,7 @@ crawler:
    batch_process: 300.0  # Timeout for batch processing
  pool:
    max_pages: 40                          # ← GLOBAL_SEM permits
-    idle_ttl_sec: 300                     # ← 30 min janitor cutoff
+    idle_ttl_sec: 1800                     # ← 30 min janitor cutoff
  browser:
    kwargs:
      headless: true
@@ -88,16 +88,3 @@ observability:
    endpoint: "/metrics"
  health_check:
    endpoint: "/health"
 # Webhook Configuration
 webhooks:
  enabled: true
  default_url: null  # Optional: default webhook URL for all jobs
  data_in_payload: false  # Optional: default behavior for including data
  retry:
    max_attempts: 5
    initial_delay_ms: 1000  # 1s, 2s, 4s, 8s, 16s exponential backoff
    max_delay_ms: 32000
    timeout_ms: 30000  # 30s timeout per webhook call
  headers:  # Optional: default headers to include
    User-Agent: "Crawl4AI-Webhook/1.0"
--- a/deploy/docker/crawler_pool.py
+++ b/deploy/docker/crawler_pool.py
@@ -1,170 +1,60 @@
-# crawler_pool.py - Smart browser pool with tiered management
+# crawler_pool.py  (new file)
-import asyncio, json, hashlib, time
+import asyncio, json, hashlib, time, psutil
 from contextlib import suppress
-from typing import Dict, Optional
+from typing import Dict
 from crawl4ai import AsyncWebCrawler, BrowserConfig
-from utils import load_config, get_container_memory_percent
+from typing import Dict
-import logging
+from utils import load_config 
 logger = logging.getLogger(__name__)
 CONFIG = load_config()
-# Pool tiers
+POOL: Dict[str, AsyncWebCrawler] = {}
 PERMANENT: Optional[AsyncWebCrawler] = None  # Always-ready default browser
 HOT_POOL: Dict[str, AsyncWebCrawler] = {}    # Frequent configs
 COLD_POOL: Dict[str, AsyncWebCrawler] = {}   # Rare configs
 LAST_USED: Dict[str, float] = {}
 USAGE_COUNT: Dict[str, int] = {}
 LOCK = asyncio.Lock()
-# Config
+MEM_LIMIT  = CONFIG.get("crawler", {}).get("memory_threshold_percent", 95.0)   # % RAM – refuse new browsers above this
-MEM_LIMIT = CONFIG.get("crawler", {}).get("memory_threshold_percent", 95.0)
+IDLE_TTL  = CONFIG.get("crawler", {}).get("pool", {}).get("idle_ttl_sec", 1800)   # close if unused for 30 min
 BASE_IDLE_TTL = CONFIG.get("crawler", {}).get("pool", {}).get("idle_ttl_sec", 300)
 DEFAULT_CONFIG_SIG = None  # Cached sig for default config
 def _sig(cfg: BrowserConfig) -> str:
    """Generate config signature."""
    payload = json.dumps(cfg.to_dict(), sort_keys=True, separators=(",",":"))
    return hashlib.sha1(payload.encode()).hexdigest()
 def _is_default_config(sig: str) -> bool:
    """Check if config matches default."""
    return sig == DEFAULT_CONFIG_SIG
 async def get_crawler(cfg: BrowserConfig) -> AsyncWebCrawler:
-    """Get crawler from pool with tiered strategy."""
+    try:
-    sig = _sig(cfg)
+        sig = _sig(cfg)
-    async with LOCK:
+        async with LOCK:
-        # Check permanent browser for default config
+            if sig in POOL:
-        if PERMANENT and _is_default_config(sig):
+                LAST_USED[sig] = time.time();  
                return POOL[sig]
            if psutil.virtual_memory().percent >= MEM_LIMIT:
                raise MemoryError("RAM pressure – new browser denied")
            crawler = AsyncWebCrawler(config=cfg, thread_safe=False)
            await crawler.start()
            POOL[sig] = crawler; LAST_USED[sig] = time.time()
            return crawler
    except MemoryError as e:
        raise MemoryError(f"RAM pressure – new browser denied: {e}")
    except Exception as e:
        raise RuntimeError(f"Failed to start browser: {e}")
    finally:
        if sig in POOL:
            LAST_USED[sig] = time.time()
-            USAGE_COUNT[sig] = USAGE_COUNT.get(sig, 0) + 1
+        else:
-            logger.info("🔥 Using permanent browser")
+            # If we failed to start the browser, we should remove it from the pool
-            return PERMANENT
+            POOL.pop(sig, None)
-
+            LAST_USED.pop(sig, None)
-        # Check hot pool
+        # If we failed to start the browser, we should remove it from the pool
        if sig in HOT_POOL:
            LAST_USED[sig] = time.time()
            USAGE_COUNT[sig] = USAGE_COUNT.get(sig, 0) + 1
            logger.info(f"♨️  Using hot pool browser (sig={sig[:8]})")
            return HOT_POOL[sig]
        # Check cold pool (promote to hot if used 3+ times)
        if sig in COLD_POOL:
            LAST_USED[sig] = time.time()
            USAGE_COUNT[sig] = USAGE_COUNT.get(sig, 0) + 1
            if USAGE_COUNT[sig] >= 3:
                logger.info(f"⬆️  Promoting to hot pool (sig={sig[:8]}, count={USAGE_COUNT[sig]})")
                HOT_POOL[sig] = COLD_POOL.pop(sig)
                # Track promotion in monitor
                try:
                    from monitor import get_monitor
                    await get_monitor().track_janitor_event("promote", sig, {"count": USAGE_COUNT[sig]})
                except:
                    pass
                return HOT_POOL[sig]
            logger.info(f"❄️  Using cold pool browser (sig={sig[:8]})")
            return COLD_POOL[sig]
        # Memory check before creating new
        mem_pct = get_container_memory_percent()
        if mem_pct >= MEM_LIMIT:
            logger.error(f"💥 Memory pressure: {mem_pct:.1f}% >= {MEM_LIMIT}%")
            raise MemoryError(f"Memory at {mem_pct:.1f}%, refusing new browser")
        # Create new in cold pool
        logger.info(f"🆕 Creating new browser in cold pool (sig={sig[:8]}, mem={mem_pct:.1f}%)")
        crawler = AsyncWebCrawler(config=cfg, thread_safe=False)
        await crawler.start()
        COLD_POOL[sig] = crawler
        LAST_USED[sig] = time.time()
        USAGE_COUNT[sig] = 1
        return crawler
 async def init_permanent(cfg: BrowserConfig):
    """Initialize permanent default browser."""
    global PERMANENT, DEFAULT_CONFIG_SIG
    async with LOCK:
        if PERMANENT:
            return
        DEFAULT_CONFIG_SIG = _sig(cfg)
        logger.info("🔥 Creating permanent default browser")
        PERMANENT = AsyncWebCrawler(config=cfg, thread_safe=False)
        await PERMANENT.start()
        LAST_USED[DEFAULT_CONFIG_SIG] = time.time()
        USAGE_COUNT[DEFAULT_CONFIG_SIG] = 0
 async def close_all():
    """Close all browsers."""
    async with LOCK:
-        tasks = []
+        await asyncio.gather(*(c.close() for c in POOL.values()), return_exceptions=True)
-        if PERMANENT:
+        POOL.clear(); LAST_USED.clear()
            tasks.append(PERMANENT.close())
        tasks.extend([c.close() for c in HOT_POOL.values()])
        tasks.extend([c.close() for c in COLD_POOL.values()])
        await asyncio.gather(*tasks, return_exceptions=True)
        HOT_POOL.clear()
        COLD_POOL.clear()
        LAST_USED.clear()
        USAGE_COUNT.clear()
 async def janitor():
    """Adaptive cleanup based on memory pressure."""
    while True:
-        mem_pct = get_container_memory_percent()
+        await asyncio.sleep(60)
        # Adaptive intervals and TTLs
        if mem_pct > 80:
            interval, cold_ttl, hot_ttl = 10, 30, 120
        elif mem_pct > 60:
            interval, cold_ttl, hot_ttl = 30, 60, 300
        else:
            interval, cold_ttl, hot_ttl = 60, BASE_IDLE_TTL, BASE_IDLE_TTL * 2
        await asyncio.sleep(interval)
        now = time.time()
        async with LOCK:
-            # Clean cold pool
+            for sig, crawler in list(POOL.items()):
-            for sig in list(COLD_POOL.keys()):
+                if now - LAST_USED[sig] > IDLE_TTL:
-                if now - LAST_USED.get(sig, now) > cold_ttl:
+                    with suppress(Exception): await crawler.close()
-                    idle_time = now - LAST_USED[sig]
+                    POOL.pop(sig, None); LAST_USED.pop(sig, None)
                    logger.info(f"🧹 Closing cold browser (sig={sig[:8]}, idle={idle_time:.0f}s)")
                    with suppress(Exception):
                        await COLD_POOL[sig].close()
                    COLD_POOL.pop(sig, None)
                    LAST_USED.pop(sig, None)
                    USAGE_COUNT.pop(sig, None)
                    # Track in monitor
                    try:
                        from monitor import get_monitor
                        await get_monitor().track_janitor_event("close_cold", sig, {"idle_seconds": int(idle_time), "ttl": cold_ttl})
                    except:
                        pass
            # Clean hot pool (more conservative)
            for sig in list(HOT_POOL.keys()):
                if now - LAST_USED.get(sig, now) > hot_ttl:
                    idle_time = now - LAST_USED[sig]
                    logger.info(f"🧹 Closing hot browser (sig={sig[:8]}, idle={idle_time:.0f}s)")
                    with suppress(Exception):
                        await HOT_POOL[sig].close()
                    HOT_POOL.pop(sig, None)
                    LAST_USED.pop(sig, None)
                    USAGE_COUNT.pop(sig, None)
                    # Track in monitor
                    try:
                        from monitor import get_monitor
                        await get_monitor().track_janitor_event("close_hot", sig, {"idle_seconds": int(idle_time), "ttl": hot_ttl})
                    except:
                        pass
            # Log pool stats
            if mem_pct > 60:
                logger.info(f"📊 Pool: hot={len(HOT_POOL)}, cold={len(COLD_POOL)}, mem={mem_pct:.1f}%")
--- a/deploy/docker/docs/ARCHITECTURE.md
+++ b/deploy/docker/docs/ARCHITECTURE.md
--- a/deploy/docker/docs/DOCKER_ORCHESTRATION.md
+++ b/deploy/docker/docs/DOCKER_ORCHESTRATION.md
--- a/deploy/docker/docs/MULTI_CONTAINER_ARCHITECTURE.md
+++ b/deploy/docker/docs/MULTI_CONTAINER_ARCHITECTURE.md
--- a/deploy/docker/docs/STRESS_TEST_PIPELINE.md
+++ b/deploy/docker/docs/STRESS_TEST_PIPELINE.md
@@ -1,241 +0,0 @@
 # Crawl4AI Docker Memory & Pool Optimization - Implementation Log
 ## Critical Issues Identified
 ### Memory Management
 - **Host vs Container**: `psutil.virtual_memory()` reported host memory, not container limits
 - **Browser Pooling**: No pool reuse - every endpoint created new browsers
 - **Warmup Waste**: Permanent browser sat idle with mismatched config signature
 - **Idle Cleanup**: 30min TTL too long, janitor ran every 60s
 - **Endpoint Inconsistency**: 75% of endpoints bypassed pool (`/md`, `/html`, `/screenshot`, `/pdf`, `/execute_js`, `/llm`)
 ### Pool Design Flaws
 - **Config Mismatch**: Permanent browser used `config.yml` args, endpoints used empty `BrowserConfig()`
 - **Logging Level**: Pool hit markers at DEBUG, invisible with INFO logging
 ## Implementation Changes
 ### 1. Container-Aware Memory Detection (`utils.py`)
 ```python
 def get_container_memory_percent() -> float:
    # Try cgroup v2 → v1 → fallback to psutil
    # Reads /sys/fs/cgroup/memory.{current,max} OR memory/memory.{usage,limit}_in_bytes
 ```
 ### 2. Smart Browser Pool (`crawler_pool.py`)
 **3-Tier System:**
 - **PERMANENT**: Always-ready default browser (never cleaned)
 - **HOT_POOL**: Configs used 3+ times (longer TTL)
 - **COLD_POOL**: New/rare configs (short TTL)
 **Key Functions:**
 - `get_crawler(cfg)`: Check permanent → hot → cold → create new
 - `init_permanent(cfg)`: Initialize permanent at startup
 - `janitor()`: Adaptive cleanup (10s/30s/60s intervals based on memory)
 - `_sig(cfg)`: SHA1 hash of config dict for pool keys
 **Logging Fix**: Changed `logger.debug()` → `logger.info()` for pool hits
 ### 3. Endpoint Unification
 **Helper Function** (`server.py`):
 ```python
 def get_default_browser_config() -> BrowserConfig:
    return BrowserConfig(
        extra_args=config["crawler"]["browser"].get("extra_args", []),
        **config["crawler"]["browser"].get("kwargs", {}),
    )
 ```
 **Migrated Endpoints:**
 - `/html`, `/screenshot`, `/pdf`, `/execute_js` → use `get_default_browser_config()`
 - `handle_llm_qa()`, `handle_markdown_request()` → same
 **Result**: All endpoints now hit permanent browser pool
 ### 4. Config Updates (`config.yml`)
 - `idle_ttl_sec: 1800` → `300` (30min → 5min base TTL)
 - `port: 11234` → `11235` (fixed mismatch with Gunicorn)
 ### 5. Lifespan Fix (`server.py`)
 ```python
 await init_permanent(BrowserConfig(
    extra_args=config["crawler"]["browser"].get("extra_args", []),
    **config["crawler"]["browser"].get("kwargs", {}),
 ))
 ```
 Permanent browser now matches endpoint config signatures
 ## Test Results
 ### Test 1: Basic Health
 - 10 requests to `/health`
 - **Result**: 100% success, avg 3ms latency
 - **Baseline**: Container starts in ~5s, 270 MB idle
 ### Test 2: Memory Monitoring
 - 20 requests with Docker stats tracking
 - **Result**: 100% success, no memory leak (-0.2 MB delta)
 - **Baseline**: 269.7 MB container overhead
 ### Test 3: Pool Validation
 - 30 requests to `/html` endpoint
 - **Result**: **100% permanent browser hits**, 0 new browsers created
 - **Memory**: 287 MB baseline → 396 MB active (+109 MB)
 - **Latency**: Avg 4s (includes network to httpbin.org)
 ### Test 4: Concurrent Load
 - Light (10) → Medium (50) → Heavy (100) concurrent
 - **Total**: 320 requests
 - **Result**: 100% success, **320/320 permanent hits**, 0 new browsers
 - **Memory**: 269 MB → peak 1533 MB → final 993 MB
 - **Latency**: P99 at 100 concurrent = 34s (expected with single browser)
 ### Test 5: Pool Stress (Mixed Configs)
 - 20 requests with 4 different viewport configs
 - **Result**: 4 new browsers, 4 cold hits, **4 promotions to hot**, 8 hot hits
 - **Reuse Rate**: 60% (12 pool hits / 20 requests)
 - **Memory**: 270 MB → 928 MB peak (+658 MB = ~165 MB per browser)
 - **Proves**: Cold → hot promotion at 3 uses working perfectly
 ### Test 6: Multi-Endpoint
 - 10 requests each: `/html`, `/screenshot`, `/pdf`, `/crawl`
 - **Result**: 100% success across all 4 endpoints
 - **Latency**: 5-8s avg (PDF slowest at 7.2s)
 ### Test 7: Cleanup Verification
 - 20 requests (load spike) → 90s idle
 - **Memory**: 269 MB → peak 1107 MB → final 780 MB
 - **Recovery**: 327 MB (39%) - partial cleanup
 - **Note**: Hot pool browsers persist (by design), janitor working correctly
 ## Performance Metrics
 | Metric | Before | After | Improvement |
 |--------|--------|-------|-------------|
 | Pool Reuse | 0% | 100% (default config) | ∞ |
 | Memory Leak | Unknown | 0 MB/cycle | Stable |
 | Browser Reuse | No | Yes | ~3-5s saved per request |
 | Idle Memory | 500-700 MB × N | 270-400 MB | 10x reduction |
 | Concurrent Capacity | ~20 | 100+ | 5x |
 ## Key Learnings
 1. **Config Signature Matching**: Permanent browser MUST match endpoint default config exactly (SHA1 hash)
 2. **Logging Levels**: Pool diagnostics need INFO level, not DEBUG
 3. **Memory in Docker**: Must read cgroup files, not host metrics
 4. **Janitor Timing**: 60s interval adequate, but TTLs should be short (5min) for cold pool
 5. **Hot Promotion**: 3-use threshold works well for production patterns
 6. **Memory Per Browser**: ~150-200 MB per Chromium instance with headless + text_mode
 ## Test Infrastructure
 **Location**: `deploy/docker/tests/`
 **Dependencies**: `httpx`, `docker` (Python SDK)
 **Pattern**: Sequential build - each test adds one capability
 **Files**:
 - `test_1_basic.py`: Health check + container lifecycle
 - `test_2_memory.py`: + Docker stats monitoring
 - `test_3_pool.py`: + Log analysis for pool markers
 - `test_4_concurrent.py`: + asyncio.Semaphore for concurrency control
 - `test_5_pool_stress.py`: + Config variants (viewports)
 - `test_6_multi_endpoint.py`: + Multiple endpoint testing
 - `test_7_cleanup.py`: + Time-series memory tracking for janitor
 **Run Pattern**:
 ```bash
 cd deploy/docker/tests
 pip install -r requirements.txt
 # Rebuild after code changes:
 cd /path/to/repo && docker buildx build -t crawl4ai-local:latest --load .
 # Run test:
 python test_N_name.py
 ```
 ## Architecture Decisions
 **Why Permanent Browser?**
 - 90% of requests use default config → single browser serves most traffic
 - Eliminates 3-5s startup overhead per request
 **Why 3-Tier Pool?**
 - Permanent: Zero cost for common case
 - Hot: Amortized cost for frequent variants
 - Cold: Lazy allocation for rare configs
 **Why Adaptive Janitor?**
 - Memory pressure triggers aggressive cleanup
 - Low memory allows longer TTLs for better reuse
 **Why Not Close After Each Request?**
 - Browser startup: 3-5s overhead
 - Pool reuse: <100ms overhead
 - Net: 30-50x faster
 ## Future Optimizations
 1. **Request Queuing**: When at capacity, queue instead of reject
 2. **Pre-warming**: Predict common configs, pre-create browsers
 3. **Metrics Export**: Prometheus metrics for pool efficiency
 4. **Config Normalization**: Group similar viewports (e.g., 1920±50 → 1920)
 ## Critical Code Paths
 **Browser Acquisition** (`crawler_pool.py:34-78`):
 ```
 get_crawler(cfg) →
  _sig(cfg) →
  if sig == DEFAULT_CONFIG_SIG → PERMANENT
  elif sig in HOT_POOL → HOT_POOL[sig]
  elif sig in COLD_POOL → promote if count >= 3
  else → create new in COLD_POOL
 ```
 **Janitor Loop** (`crawler_pool.py:107-146`):
 ```
 while True:
  mem% = get_container_memory_percent()
  if mem% > 80: interval=10s, cold_ttl=30s
  elif mem% > 60: interval=30s, cold_ttl=60s
  else: interval=60s, cold_ttl=300s
  sleep(interval)
  close idle browsers (COLD then HOT)
 ```
 **Endpoint Pattern** (`server.py` example):
 ```python
@app.post("/html")
 async def generate_html(...):
    from crawler_pool import get_crawler
    crawler = await get_crawler(get_default_browser_config())
    results = await crawler.arun(url=body.url, config=cfg)
    # No crawler.close() - returned to pool
 ```
 ## Debugging Tips
 **Check Pool Activity**:
 ```bash
 docker logs crawl4ai-test | grep -E "(🔥|♨️|❄️|🆕|⬆️)"
 ```
 **Verify Config Signature**:
 ```python
 from crawl4ai import BrowserConfig
 import json, hashlib
 cfg = BrowserConfig(...)
 sig = hashlib.sha1(json.dumps(cfg.to_dict(), sort_keys=True).encode()).hexdigest()
 print(sig[:8])  # Compare with logs
 ```
 **Monitor Memory**:
 ```bash
 docker stats crawl4ai-test
 ```
 ## Known Limitations
 - **Mac Docker Stats**: CPU metrics unreliable, memory works
 - **PDF Generation**: Slowest endpoint (~7s), no optimization yet
 - **Hot Pool Persistence**: May hold memory longer than needed (trade-off for performance)
 - **Janitor Lag**: Up to 60s before cleanup triggers in low-memory scenarios
--- a/deploy/docker/hook_manager.py
+++ b/deploy/docker/hook_manager.py
@@ -1,512 +0,0 @@
 """
 Hook Manager for User-Provided Hook Functions
 Handles validation, compilation, and safe execution of user-provided hook code
 """
 import ast
 import asyncio
 import traceback
 from typing import Dict, Callable, Optional, Tuple, List, Any
 import logging
 logger = logging.getLogger(__name__)
 class UserHookManager:
    """Manages user-provided hook functions with error isolation"""
    # Expected signatures for each hook point
    HOOK_SIGNATURES = {
        "on_browser_created": ["browser"],
        "on_page_context_created": ["page", "context"],
        "before_goto": ["page", "context", "url"],
        "after_goto": ["page", "context", "url", "response"],
        "on_user_agent_updated": ["page", "context", "user_agent"],
        "on_execution_started": ["page", "context"],
        "before_retrieve_html": ["page", "context"],
        "before_return_html": ["page", "context", "html"]
    }
    # Default timeout for hook execution (in seconds)
    DEFAULT_TIMEOUT = 30
    def __init__(self, timeout: int = DEFAULT_TIMEOUT):
        self.timeout = timeout
        self.errors: List[Dict[str, Any]] = []
        self.compiled_hooks: Dict[str, Callable] = {}
        self.execution_log: List[Dict[str, Any]] = []
    def validate_hook_structure(self, hook_code: str, hook_point: str) -> Tuple[bool, str]:
        """
        Validate the structure of user-provided hook code
        Args:
            hook_code: The Python code string containing the hook function
            hook_point: The hook point name (e.g., 'on_page_context_created')
        Returns:
            Tuple of (is_valid, error_message)
        """
        try:
            # Parse the code
            tree = ast.parse(hook_code)
            # Check if it's empty
            if not tree.body:
                return False, "Hook code is empty"
            # Find the function definition
            func_def = None
            for node in tree.body:
                if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
                    func_def = node
                    break
            if not func_def:
                return False, "Hook must contain a function definition (def or async def)"
            # Check if it's async (all hooks should be async)
            if not isinstance(func_def, ast.AsyncFunctionDef):
                return False, f"Hook function must be async (use 'async def' instead of 'def')"
            # Get function name for better error messages
            func_name = func_def.name
            # Validate parameters
            expected_params = self.HOOK_SIGNATURES.get(hook_point, [])
            if not expected_params:
                return False, f"Unknown hook point: {hook_point}"
            func_params = [arg.arg for arg in func_def.args.args]
            # Check if it has **kwargs for flexibility
            has_kwargs = func_def.args.kwarg is not None
            # Must have at least the expected parameters
            missing_params = []
            for expected in expected_params:
                if expected not in func_params:
                    missing_params.append(expected)
            if missing_params and not has_kwargs:
                return False, f"Hook function '{func_name}' must accept parameters: {', '.join(expected_params)} (missing: {', '.join(missing_params)})"
            # Check if it returns something (should return page or browser)
            has_return = any(isinstance(node, ast.Return) for node in ast.walk(func_def))
            if not has_return:
                # Warning, not error - we'll handle this
                logger.warning(f"Hook function '{func_name}' should return the {expected_params[0]} object")
            return True, "Valid"
        except SyntaxError as e:
            return False, f"Syntax error at line {e.lineno}: {str(e)}"
        except Exception as e:
            return False, f"Failed to parse hook code: {str(e)}"
    def compile_hook(self, hook_code: str, hook_point: str) -> Optional[Callable]:
        """
        Compile user-provided hook code into a callable function
        Args:
            hook_code: The Python code string
            hook_point: The hook point name
        Returns:
            Compiled function or None if compilation failed
        """
        try:
            # Create a safe namespace for the hook
            # Use a more complete builtins that includes __import__
            import builtins
            safe_builtins = {}
            # Add safe built-in functions
            allowed_builtins = [
                'print', 'len', 'str', 'int', 'float', 'bool',
                'list', 'dict', 'set', 'tuple', 'range', 'enumerate',
                'zip', 'map', 'filter', 'any', 'all', 'sum', 'min', 'max',
                'sorted', 'reversed', 'abs', 'round', 'isinstance', 'type',
                'getattr', 'hasattr', 'setattr', 'callable', 'iter', 'next',
                '__import__', '__build_class__'  # Required for exec
            ]
            for name in allowed_builtins:
                if hasattr(builtins, name):
                    safe_builtins[name] = getattr(builtins, name)
            namespace = {
                '__name__': f'user_hook_{hook_point}',
                '__builtins__': safe_builtins
            }
            # Add commonly needed imports
            exec("import asyncio", namespace)
            exec("import json", namespace)
            exec("import re", namespace)
            exec("from typing import Dict, List, Optional", namespace)
            # Execute the code to define the function
            exec(hook_code, namespace)
            # Find the async function in the namespace
            for name, obj in namespace.items():
                if callable(obj) and not name.startswith('_') and asyncio.iscoroutinefunction(obj):
                    return obj
            # If no async function found, look for any function
            for name, obj in namespace.items():
                if callable(obj) and not name.startswith('_'):
                    logger.warning(f"Found non-async function '{name}' - wrapping it")
                    # Wrap sync function in async
                    async def async_wrapper(*args, **kwargs):
                        return obj(*args, **kwargs)
                    return async_wrapper
            raise ValueError("No callable function found in hook code")
        except Exception as e:
            error = {
                'hook_point': hook_point,
                'error': f"Failed to compile hook: {str(e)}",
                'type': 'compilation_error',
                'traceback': traceback.format_exc()
            }
            self.errors.append(error)
            logger.error(f"Hook compilation failed for {hook_point}: {str(e)}")
            return None
    async def execute_hook_safely(
        self, 
        hook_func: Callable, 
        hook_point: str,
        *args, 
        **kwargs
    ) -> Tuple[Any, Optional[Dict]]:
        """
        Execute a user hook with error isolation and timeout
        Args:
            hook_func: The compiled hook function
            hook_point: The hook point name
            *args, **kwargs: Arguments to pass to the hook
        Returns:
            Tuple of (result, error_dict)
        """
        start_time = asyncio.get_event_loop().time()
        try:
            # Add timeout to prevent infinite loops
            result = await asyncio.wait_for(
                hook_func(*args, **kwargs),
                timeout=self.timeout
            )
            # Log successful execution
            execution_time = asyncio.get_event_loop().time() - start_time
            self.execution_log.append({
                'hook_point': hook_point,
                'status': 'success',
                'execution_time': execution_time,
                'timestamp': start_time
            })
            return result, None
        except asyncio.TimeoutError:
            error = {
                'hook_point': hook_point,
                'error': f'Hook execution timed out ({self.timeout}s limit)',
                'type': 'timeout',
                'execution_time': self.timeout
            }
            self.errors.append(error)
            self.execution_log.append({
                'hook_point': hook_point,
                'status': 'timeout',
                'error': error['error'],
                'execution_time': self.timeout,
                'timestamp': start_time
            })
            # Return the first argument (usually page/browser) to continue
            return args[0] if args else None, error
        except Exception as e:
            execution_time = asyncio.get_event_loop().time() - start_time
            error = {
                'hook_point': hook_point,
                'error': str(e),
                'type': type(e).__name__,
                'traceback': traceback.format_exc(),
                'execution_time': execution_time
            }
            self.errors.append(error)
            self.execution_log.append({
                'hook_point': hook_point,
                'status': 'failed',
                'error': str(e),
                'error_type': type(e).__name__,
                'execution_time': execution_time,
                'timestamp': start_time
            })
            # Return the first argument (usually page/browser) to continue
            return args[0] if args else None, error
    def get_summary(self) -> Dict[str, Any]:
        """Get a summary of hook execution"""
        total_hooks = len(self.execution_log)
        successful = sum(1 for log in self.execution_log if log['status'] == 'success')
        failed = sum(1 for log in self.execution_log if log['status'] == 'failed')
        timed_out = sum(1 for log in self.execution_log if log['status'] == 'timeout')
        return {
            'total_executions': total_hooks,
            'successful': successful,
            'failed': failed,
            'timed_out': timed_out,
            'success_rate': (successful / total_hooks * 100) if total_hooks > 0 else 0,
            'total_errors': len(self.errors)
        }
 class IsolatedHookWrapper:
    """Wraps user hooks with error isolation and reporting"""
    def __init__(self, hook_manager: UserHookManager):
        self.hook_manager = hook_manager
    def create_hook_wrapper(self, user_hook: Callable, hook_point: str) -> Callable:
        """
        Create a wrapper that isolates hook errors from main process
        Args:
            user_hook: The compiled user hook function
            hook_point: The hook point name
        Returns:
            Wrapped async function that handles errors gracefully
        """
        async def wrapped_hook(*args, **kwargs):
            """Wrapped hook with error isolation"""
            # Get the main return object (page/browser)
            # This ensures we always have something to return
            return_obj = None
            if args:
                return_obj = args[0]
            elif 'page' in kwargs:
                return_obj = kwargs['page']
            elif 'browser' in kwargs:
                return_obj = kwargs['browser']
            try:
                # Execute user hook with safety
                result, error = await self.hook_manager.execute_hook_safely(
                    user_hook, 
                    hook_point,
                    *args, 
                    **kwargs
                )
                if error:
                    # Hook failed but we continue with original object
                    logger.warning(f"User hook failed at {hook_point}: {error['error']}")
                    return return_obj
                # Hook succeeded - return its result or the original object
                if result is None:
                    logger.debug(f"Hook at {hook_point} returned None, using original object")
                    return return_obj
                return result
            except Exception as e:
                # This should rarely happen due to execute_hook_safely
                logger.error(f"Unexpected error in hook wrapper for {hook_point}: {e}")
                return return_obj
        # Set function name for debugging
        wrapped_hook.__name__ = f"wrapped_{hook_point}"
        return wrapped_hook
 async def process_user_hooks(
    hooks_input: Dict[str, str],
    timeout: int = 30
 ) -> Tuple[Dict[str, Callable], List[Dict], UserHookManager]:
    """
    Process and compile user-provided hook functions
    Args:
        hooks_input: Dictionary mapping hook points to code strings
        timeout: Timeout for each hook execution
    Returns:
        Tuple of (compiled_hooks, validation_errors, hook_manager)
    """
    hook_manager = UserHookManager(timeout=timeout)
    wrapper = IsolatedHookWrapper(hook_manager)
    compiled_hooks = {}
    validation_errors = []
    for hook_point, hook_code in hooks_input.items():
        # Skip empty hooks
        if not hook_code or not hook_code.strip():
            continue
        # Validate hook point
        if hook_point not in UserHookManager.HOOK_SIGNATURES:
            validation_errors.append({
                'hook_point': hook_point,
                'error': f'Unknown hook point. Valid points: {", ".join(UserHookManager.HOOK_SIGNATURES.keys())}',
                'code_preview': hook_code[:100] + '...' if len(hook_code) > 100 else hook_code
            })
            continue
        # Validate structure
        is_valid, message = hook_manager.validate_hook_structure(hook_code, hook_point)
        if not is_valid:
            validation_errors.append({
                'hook_point': hook_point,
                'error': message,
                'code_preview': hook_code[:100] + '...' if len(hook_code) > 100 else hook_code
            })
            continue
        # Compile the hook
        hook_func = hook_manager.compile_hook(hook_code, hook_point)
        if hook_func:
            # Wrap with error isolation
            wrapped_hook = wrapper.create_hook_wrapper(hook_func, hook_point)
            compiled_hooks[hook_point] = wrapped_hook
            logger.info(f"Successfully compiled hook for {hook_point}")
        else:
            validation_errors.append({
                'hook_point': hook_point,
                'error': 'Failed to compile hook function - check syntax and structure',
                'code_preview': hook_code[:100] + '...' if len(hook_code) > 100 else hook_code
            })
    return compiled_hooks, validation_errors, hook_manager
 async def process_user_hooks_with_manager(
    hooks_input: Dict[str, str],
    hook_manager: UserHookManager
 ) -> Tuple[Dict[str, Callable], List[Dict]]:
    """
    Process and compile user-provided hook functions with existing manager
    Args:
        hooks_input: Dictionary mapping hook points to code strings
        hook_manager: Existing UserHookManager instance
    Returns:
        Tuple of (compiled_hooks, validation_errors)
    """
    wrapper = IsolatedHookWrapper(hook_manager)
    compiled_hooks = {}
    validation_errors = []
    for hook_point, hook_code in hooks_input.items():
        # Skip empty hooks
        if not hook_code or not hook_code.strip():
            continue
        # Validate hook point
        if hook_point not in UserHookManager.HOOK_SIGNATURES:
            validation_errors.append({
                'hook_point': hook_point,
                'error': f'Unknown hook point. Valid points: {", ".join(UserHookManager.HOOK_SIGNATURES.keys())}',
                'code_preview': hook_code[:100] + '...' if len(hook_code) > 100 else hook_code
            })
            continue
        # Validate structure
        is_valid, message = hook_manager.validate_hook_structure(hook_code, hook_point)
        if not is_valid:
            validation_errors.append({
                'hook_point': hook_point,
                'error': message,
                'code_preview': hook_code[:100] + '...' if len(hook_code) > 100 else hook_code
            })
            continue
        # Compile the hook
        hook_func = hook_manager.compile_hook(hook_code, hook_point)
        if hook_func:
            # Wrap with error isolation
            wrapped_hook = wrapper.create_hook_wrapper(hook_func, hook_point)
            compiled_hooks[hook_point] = wrapped_hook
            logger.info(f"Successfully compiled hook for {hook_point}")
        else:
            validation_errors.append({
                'hook_point': hook_point,
                'error': 'Failed to compile hook function - check syntax and structure',
                'code_preview': hook_code[:100] + '...' if len(hook_code) > 100 else hook_code
            })
    return compiled_hooks, validation_errors
 async def attach_user_hooks_to_crawler(
    crawler,  # AsyncWebCrawler instance
    user_hooks: Dict[str, str],
    timeout: int = 30,
    hook_manager: Optional[UserHookManager] = None
 ) -> Tuple[Dict[str, Any], UserHookManager]:
    """
    Attach user-provided hooks to crawler with full error reporting
    Args:
        crawler: AsyncWebCrawler instance
        user_hooks: Dictionary mapping hook points to code strings
        timeout: Timeout for each hook execution
        hook_manager: Optional existing UserHookManager instance
    Returns:
        Tuple of (status_dict, hook_manager)
    """
    # Use provided hook_manager or create a new one
    if hook_manager is None:
        hook_manager = UserHookManager(timeout=timeout)
    # Process hooks with the hook_manager
    compiled_hooks, validation_errors = await process_user_hooks_with_manager(
        user_hooks, hook_manager
    )
    # Log validation errors
    if validation_errors:
        logger.warning(f"Hook validation errors: {validation_errors}")
    # Attach successfully compiled hooks
    attached_hooks = []
    for hook_point, wrapped_hook in compiled_hooks.items():
        try:
            crawler.crawler_strategy.set_hook(hook_point, wrapped_hook)
            attached_hooks.append(hook_point)
            logger.info(f"Attached hook to {hook_point}")
        except Exception as e:
            logger.error(f"Failed to attach hook to {hook_point}: {e}")
            validation_errors.append({
                'hook_point': hook_point,
                'error': f'Failed to attach hook: {str(e)}'
            })
    status = 'success' if not validation_errors else ('partial' if attached_hooks else 'failed')
    status_dict = {
        'status': status,
        'attached_hooks': attached_hooks,
        'validation_errors': validation_errors,
        'total_hooks_provided': len(user_hooks),
        'successfully_attached': len(attached_hooks),
        'failed_validation': len(validation_errors)
    }
    return status_dict, hook_manager
--- a/deploy/docker/job.py
+++ b/deploy/docker/job.py
@@ -12,7 +12,6 @@ from api import (
    handle_crawl_job,
    handle_task_status,
 )
 from schemas import WebhookConfig
 # ------------- dependency placeholders -------------
 _redis = None        # will be injected from server.py
@@ -38,7 +37,6 @@ class LlmJobPayload(BaseModel):
    schema: Optional[str] = None
    cache:  bool = False
    provider: Optional[str] = None
    webhook_config: Optional[WebhookConfig] = None
    temperature: Optional[float] = None
    base_url: Optional[str] = None
@@ -47,7 +45,6 @@ class CrawlJobPayload(BaseModel):
    urls:           list[HttpUrl]
    browser_config: Dict = {}
    crawler_config: Dict = {}
    webhook_config: Optional[WebhookConfig] = None
 # ---------- LLM job ---------------------------------------------------------
@@ -58,10 +55,6 @@ async def llm_job_enqueue(
        request: Request,
        _td: Dict = Depends(lambda: _token_dep()),   # late-bound dep
 ):
    webhook_config = None
    if payload.webhook_config:
        webhook_config = payload.webhook_config.model_dump(mode='json')
    return await handle_llm_request(
        _redis,
        background_tasks,
@@ -72,7 +65,6 @@ async def llm_job_enqueue(
        cache=payload.cache,
        config=_config,
        provider=payload.provider,
        webhook_config=webhook_config,
        temperature=payload.temperature,
        api_base_url=payload.base_url,
    )
@@ -94,10 +86,6 @@ async def crawl_job_enqueue(
        background_tasks: BackgroundTasks,
        _td: Dict = Depends(lambda: _token_dep()),
 ):
    webhook_config = None
    if payload.webhook_config:
        webhook_config = payload.webhook_config.model_dump(mode='json')
    return await handle_crawl_job(
        _redis,
        background_tasks,
@@ -105,7 +93,6 @@ async def crawl_job_enqueue(
        payload.browser_config,
        payload.crawler_config,
        config=_config,
        webhook_config=webhook_config,
    )
--- a/deploy/docker/monitor.py
+++ b/deploy/docker/monitor.py
@@ -1,663 +0,0 @@
 # monitor.py - Real-time monitoring stats with Redis persistence
 import time
 import json
 import asyncio
 from typing import Dict, List, Optional
 from datetime import datetime, timezone
 from collections import deque
 from dataclasses import dataclass
 from redis import asyncio as aioredis
 from utils import get_container_memory_percent
 import psutil
 import logging
 logger = logging.getLogger(__name__)
 # ========== Configuration ==========
@dataclass
 class RedisTTLConfig:
    """Redis TTL configuration (in seconds).
    Configures how long different types of monitoring data are retained in Redis.
    Adjust based on your monitoring needs and Redis memory constraints.
    """
    active_requests: int = 300  # 5 minutes - short-lived active request data
    completed_requests: int = 3600  # 1 hour - recent completed requests
    janitor_events: int = 3600  # 1 hour - browser cleanup events
    errors: int = 3600  # 1 hour - error logs
    endpoint_stats: int = 86400  # 24 hours - aggregated endpoint statistics
    heartbeat: int = 60  # 1 minute - container heartbeat (2x the 30s interval)
    @classmethod
    def from_env(cls) -> 'RedisTTLConfig':
        """Load TTL configuration from environment variables."""
        import os
        return cls(
            active_requests=int(os.getenv('REDIS_TTL_ACTIVE_REQUESTS', 300)),
            completed_requests=int(os.getenv('REDIS_TTL_COMPLETED_REQUESTS', 3600)),
            janitor_events=int(os.getenv('REDIS_TTL_JANITOR_EVENTS', 3600)),
            errors=int(os.getenv('REDIS_TTL_ERRORS', 3600)),
            endpoint_stats=int(os.getenv('REDIS_TTL_ENDPOINT_STATS', 86400)),
            heartbeat=int(os.getenv('REDIS_TTL_HEARTBEAT', 60)),
        )
 class MonitorStats:
    """Tracks real-time server stats with Redis persistence."""
    def __init__(self, redis: aioredis.Redis, ttl_config: Optional[RedisTTLConfig] = None):
        self.redis = redis
        self.ttl = ttl_config or RedisTTLConfig.from_env()
        self.start_time = time.time()
        # Get container ID for Redis keys
        from utils import get_container_id
        self.container_id = get_container_id()
        # In-memory queues (fast reads, Redis backup)
        self.active_requests: Dict[str, Dict] = {}  # id -> request info
        self.completed_requests: deque = deque(maxlen=100)  # Last 100
        self.janitor_events: deque = deque(maxlen=100)
        self.errors: deque = deque(maxlen=100)
        # Endpoint stats (persisted in Redis)
        self.endpoint_stats: Dict[str, Dict] = {}  # endpoint -> {count, total_time, errors, ...}
        # Background persistence queue (max 10 pending persist requests)
        self._persist_queue: asyncio.Queue = asyncio.Queue(maxsize=10)
        self._persist_worker_task: Optional[asyncio.Task] = None
        # Heartbeat task for container discovery
        self._heartbeat_task: Optional[asyncio.Task] = None
        # Timeline data (5min window, 5s resolution = 60 points)
        self.memory_timeline: deque = deque(maxlen=60)
        self.requests_timeline: deque = deque(maxlen=60)
        self.browser_timeline: deque = deque(maxlen=60)
    async def track_request_start(self, request_id: str, endpoint: str, url: str, config: Dict = None):
        """Track new request start."""
        req_info = {
            "id": request_id,
            "endpoint": endpoint,
            "url": url[:100],  # Truncate long URLs
            "start_time": time.time(),
            "config_sig": config.get("sig", "default") if config else "default",
            "mem_start": psutil.Process().memory_info().rss / (1024 * 1024),
            "container_id": self.container_id
        }
        self.active_requests[request_id] = req_info
        # Persist to Redis
        await self._persist_active_requests()
        # Increment endpoint counter
        if endpoint not in self.endpoint_stats:
            self.endpoint_stats[endpoint] = {
                "count": 0, "total_time": 0, "errors": 0,
                "pool_hits": 0, "success": 0
            }
        self.endpoint_stats[endpoint]["count"] += 1
        # Queue persistence (handled by background worker)
        try:
            self._persist_queue.put_nowait(True)
        except asyncio.QueueFull:
            logger.warning("Persistence queue full, skipping")
    async def track_request_end(self, request_id: str, success: bool, error: str = None,
                               pool_hit: bool = True, status_code: int = 200):
        """Track request completion."""
        if request_id not in self.active_requests:
            return
        req_info = self.active_requests.pop(request_id)
        end_time = time.time()
        elapsed = end_time - req_info["start_time"]
        mem_end = psutil.Process().memory_info().rss / (1024 * 1024)
        mem_delta = mem_end - req_info["mem_start"]
        # Update stats
        endpoint = req_info["endpoint"]
        if endpoint in self.endpoint_stats:
            self.endpoint_stats[endpoint]["total_time"] += elapsed
            if success:
                self.endpoint_stats[endpoint]["success"] += 1
            else:
                self.endpoint_stats[endpoint]["errors"] += 1
            if pool_hit:
                self.endpoint_stats[endpoint]["pool_hits"] += 1
        # Add to completed queue
        completed = {
            **req_info,
            "end_time": end_time,
            "elapsed": round(elapsed, 2),
            "mem_delta": round(mem_delta, 1),
            "success": success,
            "error": error,
            "status_code": status_code,
            "pool_hit": pool_hit,
            "container_id": self.container_id
        }
        self.completed_requests.append(completed)
        # Persist to Redis
        await self._persist_completed_requests()
        await self._persist_active_requests()  # Update active (removed this request)
        # Track errors
        if not success and error:
            error_entry = {
                "timestamp": end_time,
                "endpoint": endpoint,
                "url": req_info["url"],
                "error": error,
                "request_id": request_id,
                "message": error,
                "level": "ERROR",
                "container_id": self.container_id
            }
            self.errors.append(error_entry)
            await self._persist_errors()
        await self._persist_endpoint_stats()
    async def track_janitor_event(self, event_type: str, sig: str, details: Dict):
        """Track janitor cleanup events."""
        self.janitor_events.append({
            "timestamp": time.time(),
            "type": event_type,  # "close_cold", "close_hot", "promote"
            "sig": sig[:8],
            "details": details,
            "container_id": self.container_id
        })
        await self._persist_janitor_events()
    def _cleanup_old_entries(self, max_age_seconds: int = 300):
        """Remove entries older than max_age_seconds (default 5min)."""
        now = time.time()
        cutoff = now - max_age_seconds
        # Clean completed requests
        while self.completed_requests and self.completed_requests[0].get("end_time", 0) < cutoff:
            self.completed_requests.popleft()
        # Clean janitor events
        while self.janitor_events and self.janitor_events[0].get("timestamp", 0) < cutoff:
            self.janitor_events.popleft()
        # Clean errors
        while self.errors and self.errors[0].get("timestamp", 0) < cutoff:
            self.errors.popleft()
    async def update_timeline(self):
        """Update timeline data points (called every 5s)."""
        now = time.time()
        mem_pct = get_container_memory_percent()
        # Clean old entries (keep last 5 minutes)
        self._cleanup_old_entries(max_age_seconds=300)
        # Count requests in last 5s
        recent_reqs = sum(1 for req in self.completed_requests
                         if now - req.get("end_time", 0) < 5)
        # Browser counts (acquire lock with timeout to prevent deadlock)
        from crawler_pool import PERMANENT, HOT_POOL, COLD_POOL, LOCK
        try:
            async with asyncio.timeout(2.0):
                async with LOCK:
                    browser_count = {
                        "permanent": 1 if PERMANENT else 0,
                        "hot": len(HOT_POOL),
                        "cold": len(COLD_POOL)
                    }
        except asyncio.TimeoutError:
            logger.warning("Lock acquisition timeout in update_timeline, using cached browser counts")
            # Use last known values or defaults
            browser_count = {
                "permanent": 1,
                "hot": 0,
                "cold": 0
            }
        self.memory_timeline.append({"time": now, "value": mem_pct})
        self.requests_timeline.append({"time": now, "value": recent_reqs})
        self.browser_timeline.append({"time": now, "browsers": browser_count})
    async def _persist_endpoint_stats(self):
        """Persist endpoint stats to Redis with retry logic."""
        max_retries = 3
        for attempt in range(max_retries):
            try:
                await self.redis.set(
                    "monitor:endpoint_stats",
                    json.dumps(self.endpoint_stats),
                    ex=self.ttl.endpoint_stats
                )
                return  # Success
            except aioredis.ConnectionError as e:
                if attempt < max_retries - 1:
                    backoff = 0.5 * (2 ** attempt)  # 0.5s, 1s, 2s
                    logger.warning(f"Redis connection error persisting endpoint stats (attempt {attempt + 1}/{max_retries}), retrying in {backoff}s: {e}")
                    await asyncio.sleep(backoff)
                else:
                    logger.error(f"Failed to persist endpoint stats after {max_retries} attempts: {e}")
            except Exception as e:
                logger.error(f"Non-retryable error persisting endpoint stats: {e}")
                break
    async def _persist_active_requests(self):
        """Persist active requests to Redis with retry logic."""
        max_retries = 3
        for attempt in range(max_retries):
            try:
                if self.active_requests:
                    await self.redis.set(
                        f"monitor:{self.container_id}:active_requests",
                        json.dumps(list(self.active_requests.values())),
                        ex=self.ttl.active_requests
                    )
                else:
                    await self.redis.delete(f"monitor:{self.container_id}:active_requests")
                return  # Success
            except aioredis.ConnectionError as e:
                if attempt < max_retries - 1:
                    backoff = 0.5 * (2 ** attempt)  # 0.5s, 1s, 2s
                    logger.warning(f"Redis connection error persisting active requests (attempt {attempt + 1}/{max_retries}), retrying in {backoff}s: {e}")
                    await asyncio.sleep(backoff)
                else:
                    logger.error(f"Failed to persist active requests after {max_retries} attempts: {e}")
            except Exception as e:
                logger.error(f"Non-retryable error persisting active requests: {e}")
                break
    async def _persist_completed_requests(self):
        """Persist completed requests to Redis with retry logic."""
        max_retries = 3
        for attempt in range(max_retries):
            try:
                await self.redis.set(
                    f"monitor:{self.container_id}:completed",
                    json.dumps(list(self.completed_requests)),
                    ex=self.ttl.completed_requests
                )
                return  # Success
            except aioredis.ConnectionError as e:
                if attempt < max_retries - 1:
                    backoff = 0.5 * (2 ** attempt)  # 0.5s, 1s, 2s
                    logger.warning(f"Redis connection error persisting completed requests (attempt {attempt + 1}/{max_retries}), retrying in {backoff}s: {e}")
                    await asyncio.sleep(backoff)
                else:
                    logger.error(f"Failed to persist completed requests after {max_retries} attempts: {e}")
            except Exception as e:
                logger.error(f"Non-retryable error persisting completed requests: {e}")
                break
    async def _persist_janitor_events(self):
        """Persist janitor events to Redis with retry logic."""
        max_retries = 3
        for attempt in range(max_retries):
            try:
                await self.redis.set(
                    f"monitor:{self.container_id}:janitor",
                    json.dumps(list(self.janitor_events)),
                    ex=self.ttl.janitor_events
                )
                return  # Success
            except aioredis.ConnectionError as e:
                if attempt < max_retries - 1:
                    backoff = 0.5 * (2 ** attempt)  # 0.5s, 1s, 2s
                    logger.warning(f"Redis connection error persisting janitor events (attempt {attempt + 1}/{max_retries}), retrying in {backoff}s: {e}")
                    await asyncio.sleep(backoff)
                else:
                    logger.error(f"Failed to persist janitor events after {max_retries} attempts: {e}")
            except Exception as e:
                logger.error(f"Non-retryable error persisting janitor events: {e}")
                break
    async def _persist_errors(self):
        """Persist errors to Redis with retry logic."""
        max_retries = 3
        for attempt in range(max_retries):
            try:
                await self.redis.set(
                    f"monitor:{self.container_id}:errors",
                    json.dumps(list(self.errors)),
                    ex=self.ttl.errors
                )
                return  # Success
            except aioredis.ConnectionError as e:
                if attempt < max_retries - 1:
                    backoff = 0.5 * (2 ** attempt)  # 0.5s, 1s, 2s
                    logger.warning(f"Redis connection error persisting errors (attempt {attempt + 1}/{max_retries}), retrying in {backoff}s: {e}")
                    await asyncio.sleep(backoff)
                else:
                    logger.error(f"Failed to persist errors after {max_retries} attempts: {e}")
            except Exception as e:
                logger.error(f"Non-retryable error persisting errors: {e}")
                break
    async def _persistence_worker(self):
        """Background worker to persist stats to Redis."""
        while True:
            try:
                await self._persist_queue.get()
                await self._persist_endpoint_stats()
                self._persist_queue.task_done()
            except asyncio.CancelledError:
                break
            except Exception as e:
                logger.error(f"Persistence worker error: {e}")
    def start_persistence_worker(self):
        """Start the background persistence worker."""
        if not self._persist_worker_task:
            self._persist_worker_task = asyncio.create_task(self._persistence_worker())
            logger.info("Started persistence worker")
    async def stop_persistence_worker(self):
        """Stop the background persistence worker."""
        if self._persist_worker_task:
            self._persist_worker_task.cancel()
            try:
                await self._persist_worker_task
            except asyncio.CancelledError:
                pass
            self._persist_worker_task = None
            logger.info("Stopped persistence worker")
    async def _heartbeat_worker(self):
        """Send heartbeat to Redis every 30s with circuit breaker for failures."""
        from utils import detect_deployment_mode
        import os
        heartbeat_failures = 0
        max_failures = 5  # Circuit breaker threshold
        while True:
            try:
                # Get hostname/container name for friendly display
                # Try HOSTNAME env var first (set by Docker Compose), then socket.gethostname()
                import socket
                hostname = os.getenv("HOSTNAME", socket.gethostname())
                # Register this container
                mode, containers = detect_deployment_mode()
                container_info = {
                    "id": self.container_id,
                    "hostname": hostname,
                    "last_seen": time.time(),
                    "mode": mode,
                    "failure_count": heartbeat_failures
                }
                # Set heartbeat with configured TTL
                await self.redis.setex(
                    f"monitor:heartbeat:{self.container_id}",
                    self.ttl.heartbeat,
                    json.dumps(container_info)
                )
                # Add to active containers set
                await self.redis.sadd("monitor:active_containers", self.container_id)
                # Reset failure counter on success
                heartbeat_failures = 0
                # Wait 30s before next heartbeat
                await asyncio.sleep(30)
            except asyncio.CancelledError:
                break
            except aioredis.ConnectionError as e:
                heartbeat_failures += 1
                logger.error(
                    f"Heartbeat Redis connection error (attempt {heartbeat_failures}/{max_failures}): {e}"
                )
                if heartbeat_failures >= max_failures:
                    # Circuit breaker - back off for longer
                    logger.critical(
                        f"Heartbeat circuit breaker triggered after {heartbeat_failures} failures. "
                        f"Container will appear offline for 5 minutes."
                    )
                    await asyncio.sleep(300)  # 5 min backoff
                    heartbeat_failures = 0
                else:
                    # Exponential backoff
                    backoff = min(30 * (2 ** heartbeat_failures), 300)
                    await asyncio.sleep(backoff)
            except Exception as e:
                logger.error(f"Unexpected heartbeat error: {e}", exc_info=True)
                await asyncio.sleep(30)
    def start_heartbeat(self):
        """Start the heartbeat worker."""
        if not self._heartbeat_task:
            self._heartbeat_task = asyncio.create_task(self._heartbeat_worker())
            logger.info("Started heartbeat worker")
    async def stop_heartbeat(self):
        """Stop the heartbeat worker and immediately deregister container."""
        if self._heartbeat_task:
            self._heartbeat_task.cancel()
            try:
                await self._heartbeat_task
            except asyncio.CancelledError:
                pass
            # Immediate deregistration (no 60s wait)
            try:
                await self.redis.srem("monitor:active_containers", self.container_id)
                await self.redis.delete(f"monitor:heartbeat:{self.container_id}")
                logger.info(f"Container {self.container_id} immediately deregistered from monitoring")
            except Exception as e:
                logger.warning(f"Failed to deregister container on shutdown: {e}")
            self._heartbeat_task = None
            logger.info("Stopped heartbeat worker")
    async def cleanup(self):
        """Cleanup on shutdown - persist final stats and stop workers."""
        logger.info("Monitor cleanup starting...")
        try:
            # Persist final stats before shutdown
            await self._persist_endpoint_stats()
            # Stop background workers
            await self.stop_persistence_worker()
            await self.stop_heartbeat()
            logger.info("Monitor cleanup completed")
        except Exception as e:
            logger.error(f"Monitor cleanup error: {e}")
    async def load_from_redis(self):
        """Load persisted stats from Redis and start workers."""
        try:
            data = await self.redis.get("monitor:endpoint_stats")
            if data:
                self.endpoint_stats = json.loads(data)
                logger.info("Loaded endpoint stats from Redis")
            # Start background workers
            self.start_heartbeat()
        except Exception as e:
            logger.warning(f"Failed to load from Redis: {e}")
    async def get_health_summary(self) -> Dict:
        """Get current system health snapshot."""
        mem_pct = get_container_memory_percent()
        cpu_pct = psutil.cpu_percent(interval=0.1)
        # Network I/O (delta since last call)
        net = psutil.net_io_counters()
        # Pool status (acquire lock with timeout to prevent race conditions)
        from crawler_pool import PERMANENT, HOT_POOL, COLD_POOL, LOCK
        try:
            async with asyncio.timeout(2.0):
                async with LOCK:
                    # TODO: Track actual browser process memory instead of estimates
                    # These are conservative estimates based on typical Chromium usage
                    permanent_mem = 270 if PERMANENT else 0  # Estimate: ~270MB for permanent browser
                    hot_mem = len(HOT_POOL) * 180  # Estimate: ~180MB per hot pool browser
                    cold_mem = len(COLD_POOL) * 180  # Estimate: ~180MB per cold pool browser
                    permanent_active = PERMANENT is not None
                    hot_count = len(HOT_POOL)
                    cold_count = len(COLD_POOL)
        except asyncio.TimeoutError:
            logger.warning("Lock acquisition timeout in get_health_summary, using defaults")
            # Use safe defaults when lock times out
            permanent_mem = 0
            hot_mem = 0
            cold_mem = 0
            permanent_active = False
            hot_count = 0
            cold_count = 0
        return {
            "container": {
                "memory_percent": round(mem_pct, 1),
                "cpu_percent": round(cpu_pct, 1),
                "network_sent_mb": round(net.bytes_sent / (1024**2), 2),
                "network_recv_mb": round(net.bytes_recv / (1024**2), 2),
                "uptime_seconds": int(time.time() - self.start_time)
            },
            "pool": {
                "permanent": {"active": permanent_active, "memory_mb": permanent_mem},
                "hot": {"count": hot_count, "memory_mb": hot_mem},
                "cold": {"count": cold_count, "memory_mb": cold_mem},
                "total_memory_mb": permanent_mem + hot_mem + cold_mem
            },
            "janitor": {
                "next_cleanup_estimate": "adaptive",  # Would need janitor state
                "memory_pressure": "LOW" if mem_pct < 60 else "MEDIUM" if mem_pct < 80 else "HIGH"
            }
        }
    def get_active_requests(self) -> List[Dict]:
        """Get list of currently active requests."""
        now = time.time()
        return [
            {
                **req,
                "elapsed": round(now - req["start_time"], 1),
                "status": "running"
            }
            for req in self.active_requests.values()
        ]
    def get_completed_requests(self, limit: int = 50, filter_status: str = "all") -> List[Dict]:
        """Get recent completed requests."""
        requests = list(self.completed_requests)[-limit:]
        if filter_status == "success":
            requests = [r for r in requests if r.get("success")]
        elif filter_status == "error":
            requests = [r for r in requests if not r.get("success")]
        return requests
    async def get_browser_list(self) -> List[Dict]:
        """Get detailed browser pool information with timeout protection."""
        from crawler_pool import PERMANENT, HOT_POOL, COLD_POOL, LAST_USED, USAGE_COUNT, DEFAULT_CONFIG_SIG, LOCK
        browsers = []
        now = time.time()
        # Acquire lock with timeout to prevent deadlock
        try:
            async with asyncio.timeout(2.0):
                async with LOCK:
                    if PERMANENT:
                        browsers.append({
                            "type": "permanent",
                            "sig": DEFAULT_CONFIG_SIG[:8] if DEFAULT_CONFIG_SIG else "unknown",
                            "age_seconds": int(now - self.start_time),
                            "last_used_seconds": int(now - LAST_USED.get(DEFAULT_CONFIG_SIG, now)),
                            "memory_mb": 270,
                            "hits": USAGE_COUNT.get(DEFAULT_CONFIG_SIG, 0),
                            "killable": False
                        })
                    for sig, crawler in HOT_POOL.items():
                        browsers.append({
                            "type": "hot",
                            "sig": sig[:8],
                            "age_seconds": int(now - self.start_time),  # Approximation
                            "last_used_seconds": int(now - LAST_USED.get(sig, now)),
                            "memory_mb": 180,  # Estimate
                            "hits": USAGE_COUNT.get(sig, 0),
                            "killable": True
                        })
                    for sig, crawler in COLD_POOL.items():
                        browsers.append({
                            "type": "cold",
                            "sig": sig[:8],
                            "age_seconds": int(now - self.start_time),
                            "last_used_seconds": int(now - LAST_USED.get(sig, now)),
                            "memory_mb": 180,
                            "hits": USAGE_COUNT.get(sig, 0),
                            "killable": True
                        })
        except asyncio.TimeoutError:
            logger.error("Browser list lock timeout - pool may be locked by janitor")
            # Return empty list when lock times out to prevent blocking
            return []
        return browsers
    def get_endpoint_stats_summary(self) -> Dict[str, Dict]:
        """Get aggregated endpoint statistics."""
        summary = {}
        for endpoint, stats in self.endpoint_stats.items():
            count = stats["count"]
            avg_time = (stats["total_time"] / count) if count > 0 else 0
            success_rate = (stats["success"] / count * 100) if count > 0 else 0
            pool_hit_rate = (stats["pool_hits"] / count * 100) if count > 0 else 0
            summary[endpoint] = {
                "count": count,
                "avg_latency_ms": round(avg_time * 1000, 1),
                "success_rate_percent": round(success_rate, 1),
                "pool_hit_rate_percent": round(pool_hit_rate, 1),
                "errors": stats["errors"]
            }
        return summary
    def get_timeline_data(self, metric: str, window: str = "5m") -> Dict:
        """Get timeline data for charts."""
        # For now, only 5m window supported
        if metric == "memory":
            data = list(self.memory_timeline)
        elif metric == "requests":
            data = list(self.requests_timeline)
        elif metric == "browsers":
            data = list(self.browser_timeline)
        else:
            return {"timestamps": [], "values": []}
        return {
            "timestamps": [int(d["time"]) for d in data],
            "values": [d.get("value", d.get("browsers")) for d in data]
        }
    def get_janitor_log(self, limit: int = 100) -> List[Dict]:
        """Get recent janitor events."""
        return list(self.janitor_events)[-limit:]
    def get_errors_log(self, limit: int = 100) -> List[Dict]:
        """Get recent errors."""
        return list(self.errors)[-limit:]
 # Global instance (initialized in server.py)
 monitor_stats: Optional[MonitorStats] = None
 def get_monitor() -> MonitorStats:
    """Get global monitor instance."""
    if monitor_stats is None:
        raise RuntimeError("Monitor not initialized")
    return monitor_stats
--- a/deploy/docker/monitor_routes.py
+++ b/deploy/docker/monitor_routes.py
@@ -1,608 +0,0 @@
 # monitor_routes.py - Monitor API endpoints
 from fastapi import APIRouter, HTTPException, WebSocket, WebSocketDisconnect
 from pydantic import BaseModel
 from typing import Optional
 from monitor import get_monitor
 from utils import detect_deployment_mode, get_container_id
 import logging
 import asyncio
 import json
 import re
 logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/monitor", tags=["monitor"])
 # ========== Security & Validation ==========
 def validate_container_id(cid: str) -> bool:
    """Validate container ID format to prevent Redis key injection.
    Docker container IDs are 12-64 character hexadecimal strings.
    Hostnames are alphanumeric with dashes and underscores.
    Args:
        cid: Container ID to validate
    Returns:
        True if valid, False otherwise
    """
    if not cid or not isinstance(cid, str):
        return False
    # Allow alphanumeric, dashes, and underscores only (1-64 chars)
    # This prevents path traversal (../../), wildcards (**), and other injection attempts
    return bool(re.match(r'^[a-zA-Z0-9_-]{1,64}$', cid))
 # ========== Redis Aggregation Helpers ==========
 async def _get_active_containers():
    """Get list of active container IDs from Redis with validation."""
    try:
        monitor = get_monitor()
        container_ids = await monitor.redis.smembers("monitor:active_containers")
        # Decode and validate each container ID
        validated = []
        for cid in container_ids:
            cid_str = cid.decode() if isinstance(cid, bytes) else cid
            if validate_container_id(cid_str):
                validated.append(cid_str)
            else:
                logger.warning(f"Invalid container ID format rejected: {cid_str}")
        return validated
    except Exception as e:
        logger.error(f"Failed to get active containers: {e}")
        return []
 async def _aggregate_active_requests():
    """Aggregate active requests from all containers."""
    container_ids = await _get_active_containers()
    all_requests = []
    monitor = get_monitor()
    for container_id in container_ids:
        try:
            data = await monitor.redis.get(f"monitor:{container_id}:active_requests")
            if data:
                requests = json.loads(data)
                all_requests.extend(requests)
        except Exception as e:
            logger.warning(f"Failed to get active requests from {container_id}: {e}")
    return all_requests
 async def _aggregate_completed_requests(limit=100):
    """Aggregate completed requests from all containers."""
    container_ids = await _get_active_containers()
    all_requests = []
    monitor = get_monitor()
    for container_id in container_ids:
        try:
            data = await monitor.redis.get(f"monitor:{container_id}:completed")
            if data:
                requests = json.loads(data)
                all_requests.extend(requests)
        except Exception as e:
            logger.warning(f"Failed to get completed requests from {container_id}: {e}")
    # Sort by end_time (most recent first) and limit
    all_requests.sort(key=lambda x: x.get("end_time", 0), reverse=True)
    return all_requests[:limit]
 async def _aggregate_janitor_events(limit=100):
    """Aggregate janitor events from all containers."""
    container_ids = await _get_active_containers()
    all_events = []
    monitor = get_monitor()
    for container_id in container_ids:
        try:
            data = await monitor.redis.get(f"monitor:{container_id}:janitor")
            if data:
                events = json.loads(data)
                all_events.extend(events)
        except Exception as e:
            logger.warning(f"Failed to get janitor events from {container_id}: {e}")
    # Sort by timestamp (most recent first) and limit
    all_events.sort(key=lambda x: x.get("timestamp", 0), reverse=True)
    return all_events[:limit]
 async def _aggregate_errors(limit=100):
    """Aggregate errors from all containers."""
    container_ids = await _get_active_containers()
    all_errors = []
    monitor = get_monitor()
    for container_id in container_ids:
        try:
            data = await monitor.redis.get(f"monitor:{container_id}:errors")
            if data:
                errors = json.loads(data)
                all_errors.extend(errors)
        except Exception as e:
            logger.warning(f"Failed to get errors from {container_id}: {e}")
    # Sort by timestamp (most recent first) and limit
    all_errors.sort(key=lambda x: x.get("timestamp", 0), reverse=True)
    return all_errors[:limit]
@router.get("/health")
 async def get_health():
    """Get current system health snapshot."""
    try:
        monitor = get_monitor()
        return await monitor.get_health_summary()
    except Exception as e:
        logger.error(f"Error getting health: {e}")
        raise HTTPException(500, str(e))
@router.get("/requests")
 async def get_requests(status: str = "all", limit: int = 50):
    """Get active and completed requests.
    Args:
        status: Filter by 'active', 'completed', 'success', 'error', or 'all'
        limit: Max number of completed requests to return (default 50)
    """
    # Input validation
    if status not in ["all", "active", "completed", "success", "error"]:
        raise HTTPException(400, f"Invalid status: {status}. Must be one of: all, active, completed, success, error")
    if limit < 1 or limit > 1000:
        raise HTTPException(400, f"Invalid limit: {limit}. Must be between 1 and 1000")
    try:
        # Aggregate from all containers via Redis
        active_requests = await _aggregate_active_requests()
        completed_requests = await _aggregate_completed_requests(limit)
        # Filter by status if needed
        if status in ["success", "error"]:
            is_success = (status == "success")
            completed_requests = [r for r in completed_requests if r.get("success") == is_success]
        if status == "active":
            return {"active": active_requests, "completed": []}
        elif status == "completed":
            return {"active": [], "completed": completed_requests}
        else:  # "all" or success/error
            return {
                "active": active_requests,
                "completed": completed_requests
            }
    except Exception as e:
        logger.error(f"Error getting requests: {e}")
        raise HTTPException(500, str(e))
@router.get("/browsers")
 async def get_browsers():
    """Get detailed browser pool information."""
    try:
        monitor = get_monitor()
        container_id = get_container_id()
        browsers = await monitor.get_browser_list()
        # Add container_id to each browser
        for browser in browsers:
            browser["container_id"] = container_id
        # Calculate summary stats
        total_browsers = len(browsers)
        total_memory = sum(b["memory_mb"] for b in browsers)
        # Calculate reuse rate from recent requests
        recent = monitor.get_completed_requests(100)
        pool_hits = sum(1 for r in recent if r.get("pool_hit", False))
        reuse_rate = (pool_hits / len(recent) * 100) if recent else 0
        return {
            "browsers": browsers,
            "summary": {
                "total_count": total_browsers,
                "total_memory_mb": total_memory,
                "reuse_rate_percent": round(reuse_rate, 1)
            },
            "container_id": container_id
        }
    except Exception as e:
        logger.error(f"Error getting browsers: {e}")
        raise HTTPException(500, str(e))
@router.get("/endpoints/stats")
 async def get_endpoint_stats():
    """Get aggregated endpoint statistics."""
    try:
        monitor = get_monitor()
        return monitor.get_endpoint_stats_summary()
    except Exception as e:
        logger.error(f"Error getting endpoint stats: {e}")
        raise HTTPException(500, str(e))
@router.get("/timeline")
 async def get_timeline(metric: str = "memory", window: str = "5m"):
    """Get timeline data for charts.
    Args:
        metric: 'memory', 'requests', or 'browsers'
        window: Time window (only '5m' supported for now)
    """
    # Input validation
    if metric not in ["memory", "requests", "browsers"]:
        raise HTTPException(400, f"Invalid metric: {metric}. Must be one of: memory, requests, browsers")
    if window != "5m":
        raise HTTPException(400, f"Invalid window: {window}. Only '5m' is currently supported")
    try:
        monitor = get_monitor()
        return monitor.get_timeline_data(metric, window)
    except Exception as e:
        logger.error(f"Error getting timeline: {e}")
        raise HTTPException(500, str(e))
@router.get("/logs/janitor")
 async def get_janitor_log(limit: int = 100):
    """Get recent janitor cleanup events."""
    # Input validation
    if limit < 1 or limit > 1000:
        raise HTTPException(400, f"Invalid limit: {limit}. Must be between 1 and 1000")
    try:
        # Aggregate from all containers via Redis
        events = await _aggregate_janitor_events(limit)
        return {"events": events}
    except Exception as e:
        logger.error(f"Error getting janitor log: {e}")
        raise HTTPException(500, str(e))
@router.get("/logs/errors")
 async def get_errors_log(limit: int = 100):
    """Get recent errors."""
    # Input validation
    if limit < 1 or limit > 1000:
        raise HTTPException(400, f"Invalid limit: {limit}. Must be between 1 and 1000")
    try:
        # Aggregate from all containers via Redis
        errors = await _aggregate_errors(limit)
        return {"errors": errors}
    except Exception as e:
        logger.error(f"Error getting errors log: {e}")
        raise HTTPException(500, str(e))
 # ========== Control Actions ==========
 class KillBrowserRequest(BaseModel):
    sig: str
@router.post("/actions/cleanup")
 async def force_cleanup():
    """Force immediate janitor cleanup (kills idle cold pool browsers)."""
    try:
        from crawler_pool import COLD_POOL, LAST_USED, USAGE_COUNT, LOCK
        import time
        from contextlib import suppress
        killed_count = 0
        now = time.time()
        async with LOCK:
            for sig in list(COLD_POOL.keys()):
                # Kill all cold pool browsers immediately
                logger.info(f"🧹 Force cleanup: closing cold browser (sig={sig[:8]})")
                with suppress(Exception):
                    await COLD_POOL[sig].close()
                COLD_POOL.pop(sig, None)
                LAST_USED.pop(sig, None)
                USAGE_COUNT.pop(sig, None)
                killed_count += 1
        monitor = get_monitor()
        await monitor.track_janitor_event("force_cleanup", "manual", {"killed": killed_count})
        return {"success": True, "killed_browsers": killed_count}
    except Exception as e:
        logger.error(f"Error during force cleanup: {e}")
        raise HTTPException(500, str(e))
@router.post("/actions/kill_browser")
 async def kill_browser(req: KillBrowserRequest):
    """Kill a specific browser by signature (hot or cold only).
    Args:
        sig: Browser config signature (first 8 chars)
    """
    try:
        from crawler_pool import HOT_POOL, COLD_POOL, LAST_USED, USAGE_COUNT, LOCK, DEFAULT_CONFIG_SIG
        from contextlib import suppress
        # Find full signature matching prefix
        target_sig = None
        pool_type = None
        async with LOCK:
            # Check hot pool
            for sig in HOT_POOL.keys():
                if sig.startswith(req.sig):
                    target_sig = sig
                    pool_type = "hot"
                    break
            # Check cold pool
            if not target_sig:
                for sig in COLD_POOL.keys():
                    if sig.startswith(req.sig):
                        target_sig = sig
                        pool_type = "cold"
                        break
            # Check if trying to kill permanent
            if DEFAULT_CONFIG_SIG and DEFAULT_CONFIG_SIG.startswith(req.sig):
                raise HTTPException(403, "Cannot kill permanent browser. Use restart instead.")
            if not target_sig:
                raise HTTPException(404, f"Browser with sig={req.sig} not found")
            # Warn if there are active requests (browser might be in use)
            monitor = get_monitor()
            active_count = len(monitor.get_active_requests())
            if active_count > 0:
                logger.warning(f"Killing browser {target_sig[:8]} while {active_count} requests are active - may cause failures")
            # Kill the browser
            if pool_type == "hot":
                browser = HOT_POOL.pop(target_sig)
            else:
                browser = COLD_POOL.pop(target_sig)
            with suppress(Exception):
                await browser.close()
            LAST_USED.pop(target_sig, None)
            USAGE_COUNT.pop(target_sig, None)
        logger.info(f"🔪 Killed {pool_type} browser (sig={target_sig[:8]})")
        monitor = get_monitor()
        await monitor.track_janitor_event("kill_browser", target_sig, {"pool": pool_type, "manual": True})
        return {"success": True, "killed_sig": target_sig[:8], "pool_type": pool_type}
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error killing browser: {e}")
        raise HTTPException(500, str(e))
@router.post("/actions/restart_browser")
 async def restart_browser(req: KillBrowserRequest):
    """Restart a browser (kill + recreate). Works for permanent too.
    Args:
        sig: Browser config signature (first 8 chars), or "permanent"
    """
    try:
        from crawler_pool import (PERMANENT, HOT_POOL, COLD_POOL, LAST_USED,
                                  USAGE_COUNT, LOCK, DEFAULT_CONFIG_SIG, init_permanent)
        from crawl4ai import AsyncWebCrawler, BrowserConfig
        from contextlib import suppress
        import time
        # Handle permanent browser restart
        if req.sig == "permanent" or (DEFAULT_CONFIG_SIG and DEFAULT_CONFIG_SIG.startswith(req.sig)):
            async with LOCK:
                if PERMANENT:
                    with suppress(Exception):
                        await PERMANENT.close()
                # Reinitialize permanent
                from utils import load_config
                config = load_config()
                await init_permanent(BrowserConfig(
                    extra_args=config["crawler"]["browser"].get("extra_args", []),
                    **config["crawler"]["browser"].get("kwargs", {}),
                ))
            logger.info("🔄 Restarted permanent browser")
            return {"success": True, "restarted": "permanent"}
        # Handle hot/cold browser restart
        target_sig = None
        pool_type = None
        browser_config = None
        async with LOCK:
            # Find browser
            for sig in HOT_POOL.keys():
                if sig.startswith(req.sig):
                    target_sig = sig
                    pool_type = "hot"
                    # Would need to reconstruct config (not stored currently)
                    break
            if not target_sig:
                for sig in COLD_POOL.keys():
                    if sig.startswith(req.sig):
                        target_sig = sig
                        pool_type = "cold"
                        break
            if not target_sig:
                raise HTTPException(404, f"Browser with sig={req.sig} not found")
            # Kill existing
            if pool_type == "hot":
                browser = HOT_POOL.pop(target_sig)
            else:
                browser = COLD_POOL.pop(target_sig)
            with suppress(Exception):
                await browser.close()
            # Note: We can't easily recreate with same config without storing it
            # For now, just kill and let new requests create fresh ones
            LAST_USED.pop(target_sig, None)
            USAGE_COUNT.pop(target_sig, None)
        logger.info(f"🔄 Restarted {pool_type} browser (sig={target_sig[:8]})")
        monitor = get_monitor()
        await monitor.track_janitor_event("restart_browser", target_sig, {"pool": pool_type})
        return {"success": True, "restarted_sig": target_sig[:8], "note": "Browser will be recreated on next request"}
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error restarting browser: {e}")
        raise HTTPException(500, str(e))
@router.post("/stats/reset")
 async def reset_stats():
    """Reset today's endpoint counters."""
    try:
        monitor = get_monitor()
        monitor.endpoint_stats.clear()
        await monitor._persist_endpoint_stats()
        return {"success": True, "message": "Endpoint stats reset"}
    except Exception as e:
        logger.error(f"Error resetting stats: {e}")
        raise HTTPException(500, str(e))
@router.get("/containers")
 async def get_containers():
    """Get container deployment info from Redis heartbeats."""
    try:
        monitor = get_monitor()
        container_ids = await _get_active_containers()
        containers = []
        for cid in container_ids:
            try:
                # Get heartbeat data
                data = await monitor.redis.get(f"monitor:heartbeat:{cid}")
                if data:
                    info = json.loads(data)
                    containers.append({
                        "id": info.get("id", cid),
                        "hostname": info.get("hostname", cid),
                        "healthy": True  # If heartbeat exists, it's healthy
                    })
            except Exception as e:
                logger.warning(f"Failed to get heartbeat for {cid}: {e}")
        # Determine mode
        mode = "single" if len(containers) == 1 else "compose"
        if len(containers) > 1:
            # Check if any hostname has swarm pattern (service.slot.task_id)
            if any("." in c["hostname"] and len(c["hostname"].split(".")) > 2 for c in containers):
                mode = "swarm"
        return {
            "mode": mode,
            "container_id": get_container_id(),
            "containers": containers,
            "count": len(containers)
        }
    except Exception as e:
        logger.error(f"Error getting containers: {e}")
        raise HTTPException(500, str(e))
@router.websocket("/ws")
 async def websocket_endpoint(websocket: WebSocket):
    """WebSocket endpoint for real-time monitoring updates.
    Sends aggregated updates every 2 seconds from all containers with:
    - Health stats (local container)
    - Active/completed requests (aggregated from all containers)
    - Browser pool status (local container only - not in Redis)
    - Timeline data (local container - TODO: aggregate from Redis)
    - Janitor events (aggregated from all containers)
    - Errors (aggregated from all containers)
    """
    await websocket.accept()
    logger.info("WebSocket client connected")
    try:
        while True:
            try:
                # Gather aggregated monitoring data from Redis
                monitor = get_monitor()
                container_id = get_container_id()
                # Get container info
                containers_info = await get_containers()
                # AGGREGATE data from all containers via Redis
                active_reqs = await _aggregate_active_requests()
                completed_reqs = await _aggregate_completed_requests(limit=10)
                janitor_events = await _aggregate_janitor_events(limit=10)
                errors_log = await _aggregate_errors(limit=10)
                # Local container data (not aggregated)
                local_health = await monitor.get_health_summary()
                browsers = await monitor.get_browser_list()  # Browser list is local only
                # Add container_id to browsers (they're local)
                for browser in browsers:
                    browser["container_id"] = container_id
                data = {
                    "timestamp": asyncio.get_event_loop().time(),
                    "container_id": container_id,  # This container handling the WebSocket
                    "is_aggregated": True,  # Flag to indicate aggregated data
                    "local_health": local_health,  # This container's health
                    "containers": containers_info.get("containers", []),  # All containers
                    "requests": {
                        "active": active_reqs,  # Aggregated from all containers
                        "completed": completed_reqs  # Aggregated from all containers
                    },
                    "browsers": browsers,  # Local only (not in Redis)
                    "timeline": {
                        # TODO: Aggregate timeline from Redis (currently local only)
                        "memory": monitor.get_timeline_data("memory", "5m"),
                        "requests": monitor.get_timeline_data("requests", "5m"),
                        "browsers": monitor.get_timeline_data("browsers", "5m")
                    },
                    "janitor": janitor_events,  # Aggregated from all containers
                    "errors": errors_log  # Aggregated from all containers
                }
                # Send update to client
                await websocket.send_json(data)
                # Wait 2 seconds before next update
                await asyncio.sleep(2)
            except WebSocketDisconnect:
                logger.info("WebSocket client disconnected")
                break
            except Exception as e:
                logger.error(f"WebSocket error: {e}", exc_info=True)
                await asyncio.sleep(2)  # Continue trying
    except Exception as e:
        logger.error(f"WebSocket connection error: {e}", exc_info=True)
    finally:
        logger.info("WebSocket connection closed")
--- a/deploy/docker/requirements.txt
+++ b/deploy/docker/requirements.txt
@@ -12,6 +12,6 @@ pydantic>=2.11
 rank-bm25==0.2.2
 anyio==4.9.0
 PyJWT==2.10.1
-mcp>=1.18.0
+mcp>=1.6.0
 websockets>=15.0.1
 httpx[http2]>=0.27.2
--- a/deploy/docker/schemas.py
+++ b/deploy/docker/schemas.py
@@ -1,6 +1,6 @@
 from typing import List, Optional, Dict
 from enum import Enum
-from pydantic import BaseModel, Field, HttpUrl
+from pydantic import BaseModel, Field
 from utils import FilterType
@@ -9,50 +9,6 @@ class CrawlRequest(BaseModel):
    browser_config: Optional[Dict] = Field(default_factory=dict)
    crawler_config: Optional[Dict] = Field(default_factory=dict)
 class HookConfig(BaseModel):
    """Configuration for user-provided hooks"""
    code: Dict[str, str] = Field(
        default_factory=dict,
        description="Map of hook points to Python code strings"
    )
    timeout: int = Field(
        default=30,
        ge=1,
        le=120,
        description="Timeout in seconds for each hook execution"
    )
    class Config:
        schema_extra = {
            "example": {
                "code": {
                    "on_page_context_created": """
 async def hook(page, context, **kwargs):
    # Block images to speed up crawling
    await context.route("**/*.{png,jpg,jpeg,gif}", lambda route: route.abort())
    return page
 """,
                    "before_retrieve_html": """
 async def hook(page, context, **kwargs):
    # Scroll to load lazy content
    await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
    await page.wait_for_timeout(2000)
    return page
 """
                },
                "timeout": 30
            }
        }
 class CrawlRequestWithHooks(CrawlRequest):
    """Extended crawl request with hooks support"""
    hooks: Optional[HookConfig] = Field(
        default=None,
        description="Optional user-provided hook functions"
    )
 class MarkdownRequest(BaseModel):
    """Request body for the /md endpoint."""
    url: str                    = Field(...,  description="Absolute http/https URL to fetch")
@@ -86,21 +42,3 @@ class JSEndpointRequest(BaseModel):
        ...,
        description="List of separated JavaScript snippets to execute"
    )
 class WebhookConfig(BaseModel):
    """Configuration for webhook notifications."""
    webhook_url: HttpUrl
    webhook_data_in_payload: bool = False
    webhook_headers: Optional[Dict[str, str]] = None
 class WebhookPayload(BaseModel):
    """Payload sent to webhook endpoints."""
    task_id: str
    task_type: str  # "crawl", "llm_extraction", etc.
    status: str  # "completed" or "failed"
    timestamp: str  # ISO 8601 format
    urls: List[str]
    error: Optional[str] = None
    data: Optional[Dict] = None  # Included only if webhook_data_in_payload=True
--- a/deploy/docker/server.py
+++ b/deploy/docker/server.py
@@ -16,7 +16,6 @@ from fastapi import Request, Depends
 from fastapi.responses import FileResponse
 import base64
 import re
 import logging
 from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
 from api import (
    handle_markdown_request, handle_llm_qa,
@@ -24,7 +23,7 @@ from api import (
    stream_results
 )
 from schemas import (
-    CrawlRequestWithHooks,
+    CrawlRequest,
    MarkdownRequest,
    RawCode,
    HTMLRequest,
@@ -79,14 +78,6 @@ __version__ = "0.5.1-d1"
 MAX_PAGES = config["crawler"]["pool"].get("max_pages", 30)
 GLOBAL_SEM = asyncio.Semaphore(MAX_PAGES)
 # ── default browser config helper ─────────────────────────────
 def get_default_browser_config() -> BrowserConfig:
    """Get default BrowserConfig from config.yml."""
    return BrowserConfig(
        extra_args=config["crawler"]["browser"].get("extra_args", []),
        **config["crawler"]["browser"].get("kwargs", {}),
    )
 # import logging
 # page_log = logging.getLogger("page_cap")
 # orig_arun = AsyncWebCrawler.arun
@@ -112,52 +103,15 @@ AsyncWebCrawler.arun = capped_arun
@asynccontextmanager
 async def lifespan(_: FastAPI):
-    from crawler_pool import init_permanent
+    await get_crawler(BrowserConfig(
    from monitor import MonitorStats
    import monitor as monitor_module
    # Initialize monitor
    monitor_module.monitor_stats = MonitorStats(redis)
    await monitor_module.monitor_stats.load_from_redis()
    monitor_module.monitor_stats.start_persistence_worker()
    # Initialize browser pool
    await init_permanent(BrowserConfig(
        extra_args=config["crawler"]["browser"].get("extra_args", []),
        **config["crawler"]["browser"].get("kwargs", {}),
-    ))
+    ))           # warm‑up
-
+    app.state.janitor = asyncio.create_task(janitor())        # idle GC
    # Start background tasks
    app.state.janitor = asyncio.create_task(janitor())
    app.state.timeline_updater = asyncio.create_task(_timeline_updater())
    yield
    # Cleanup
    app.state.janitor.cancel()
    app.state.timeline_updater.cancel()
    # Monitor cleanup (persist stats and stop workers)
    from monitor import get_monitor
    try:
        await get_monitor().cleanup()
    except Exception as e:
        logger.error(f"Monitor cleanup failed: {e}")
    await close_all()
 async def _timeline_updater():
    """Update timeline data every 5 seconds."""
    from monitor import get_monitor
    while True:
        await asyncio.sleep(5)
        try:
            await asyncio.wait_for(get_monitor().update_timeline(), timeout=4.0)
        except asyncio.TimeoutError:
            logger.warning("Timeline update timeout after 4s")
        except Exception as e:
            logger.warning(f"Timeline update error: {e}")
 # ───────────────────── FastAPI instance ──────────────────────
 app = FastAPI(
    title=config["app"]["title"],
@@ -175,36 +129,13 @@ app.mount(
    name="play",
 )
 # ── static monitor dashboard ────────────────────────────────
 MONITOR_DIR = pathlib.Path(__file__).parent / "static" / "monitor"
 if not MONITOR_DIR.exists():
    raise RuntimeError(f"Monitor assets not found at {MONITOR_DIR}")
 app.mount(
    "/dashboard",
    StaticFiles(directory=MONITOR_DIR, html=True),
    name="monitor_ui",
 )
 # ── static assets (logo, etc) ────────────────────────────────
 ASSETS_DIR = pathlib.Path(__file__).parent / "static" / "assets"
 if ASSETS_DIR.exists():
    app.mount(
        "/static/assets",
        StaticFiles(directory=ASSETS_DIR),
        name="assets",
    )
@app.get("/")
 async def root():
    return RedirectResponse("/playground")
 # ─────────────────── infra / middleware  ─────────────────────
-# Build Redis URL from environment or config
+redis = aioredis.from_url(config["redis"].get("uri", "redis://localhost"))
 redis_host = os.getenv("REDIS_HOST", config["redis"].get("host", "localhost"))
 redis_port = os.getenv("REDIS_PORT", config["redis"].get("port", 6379))
 redis_url = config["redis"].get("uri") or f"redis://{redis_host}:{redis_port}"
 redis = aioredis.from_url(redis_url)
 limiter = Limiter(
    key_func=get_remote_address,
@@ -281,12 +212,6 @@ def _safe_eval_config(expr: str) -> dict:
 # ── job router ──────────────────────────────────────────────
 app.include_router(init_job_router(redis, config, token_dep))
 # ── monitor router ──────────────────────────────────────────
 from monitor_routes import router as monitor_router
 app.include_router(monitor_router)
 logger = logging.getLogger(__name__)
 # ──────────────────────── Endpoints ──────────────────────────
@app.post("/token")
 async def get_token(req: TokenRequest):
@@ -341,20 +266,27 @@ async def generate_html(
    Crawls the URL, preprocesses the raw HTML for schema extraction, and returns the processed HTML.
    Use when you need sanitized HTML structures for building schemas or further processing.
    """
    from crawler_pool import get_crawler
    cfg = CrawlerRunConfig()
    try:
-        crawler = await get_crawler(get_default_browser_config())
+        async with AsyncWebCrawler(config=BrowserConfig()) as crawler:
-        results = await crawler.arun(url=body.url, config=cfg)
+            results = await crawler.arun(url=body.url, config=cfg)
        # Check if the crawl was successful
        if not results[0].success:
-            raise HTTPException(500, detail=results[0].error_message or "Crawl failed")
+            raise HTTPException(
                status_code=500,
                detail=results[0].error_message or "Crawl failed"
            )
        raw_html = results[0].html
        from crawl4ai.utils import preprocess_html_for_schema
        processed_html = preprocess_html_for_schema(raw_html)
        return JSONResponse({"html": processed_html, "url": body.url, "success": True})
    except Exception as e:
-        raise HTTPException(500, detail=str(e))
+        # Log and raise as HTTP 500 for other exceptions
        raise HTTPException(
            status_code=500,
            detail=str(e)
        )
 # Screenshot endpoint
@@ -372,13 +304,16 @@ async def generate_screenshot(
    Use when you need an image snapshot of the rendered page. Its recommened to provide an output path to save the screenshot.
    Then in result instead of the screenshot you will get a path to the saved file.
    """
    from crawler_pool import get_crawler
    try:
-        cfg = CrawlerRunConfig(screenshot=True, screenshot_wait_for=body.screenshot_wait_for)
+        cfg = CrawlerRunConfig(
-        crawler = await get_crawler(get_default_browser_config())
+            screenshot=True, screenshot_wait_for=body.screenshot_wait_for)
-        results = await crawler.arun(url=body.url, config=cfg)
+        async with AsyncWebCrawler(config=BrowserConfig()) as crawler:
            results = await crawler.arun(url=body.url, config=cfg)
        if not results[0].success:
-            raise HTTPException(500, detail=results[0].error_message or "Crawl failed")
+            raise HTTPException(
                status_code=500,
                detail=results[0].error_message or "Crawl failed"
            )
        screenshot_data = results[0].screenshot
        if body.output_path:
            abs_path = os.path.abspath(body.output_path)
@@ -388,7 +323,10 @@ async def generate_screenshot(
            return {"success": True, "path": abs_path}
        return {"success": True, "screenshot": screenshot_data}
    except Exception as e:
-        raise HTTPException(500, detail=str(e))
+        raise HTTPException(
            status_code=500,
            detail=str(e)
        )
 # PDF endpoint
@@ -406,13 +344,15 @@ async def generate_pdf(
    Use when you need a printable or archivable snapshot of the page. It is recommended to provide an output path to save the PDF.
    Then in result instead of the PDF you will get a path to the saved file.
    """
    from crawler_pool import get_crawler
    try:
        cfg = CrawlerRunConfig(pdf=True)
-        crawler = await get_crawler(get_default_browser_config())
+        async with AsyncWebCrawler(config=BrowserConfig()) as crawler:
-        results = await crawler.arun(url=body.url, config=cfg)
+            results = await crawler.arun(url=body.url, config=cfg)
        if not results[0].success:
-            raise HTTPException(500, detail=results[0].error_message or "Crawl failed")
+            raise HTTPException(
                status_code=500,
                detail=results[0].error_message or "Crawl failed"
            )
        pdf_data = results[0].pdf
        if body.output_path:
            abs_path = os.path.abspath(body.output_path)
@@ -422,7 +362,10 @@ async def generate_pdf(
            return {"success": True, "path": abs_path}
        return {"success": True, "pdf": base64.b64encode(pdf_data).decode()}
    except Exception as e:
-        raise HTTPException(500, detail=str(e))
+        raise HTTPException(
            status_code=500,
            detail=str(e)
        )
@app.post("/execute_js")
@@ -478,17 +421,23 @@ async def execute_js(
        ```
    """
    from crawler_pool import get_crawler
    try:
        cfg = CrawlerRunConfig(js_code=body.scripts)
-        crawler = await get_crawler(get_default_browser_config())
+        async with AsyncWebCrawler(config=BrowserConfig()) as crawler:
-        results = await crawler.arun(url=body.url, config=cfg)
+            results = await crawler.arun(url=body.url, config=cfg)
        if not results[0].success:
-            raise HTTPException(500, detail=results[0].error_message or "Crawl failed")
+            raise HTTPException(
                status_code=500,
                detail=results[0].error_message or "Crawl failed"
            )
        # Return JSON-serializable dict of the first CrawlResult
        data = results[0].model_dump()
        return JSONResponse(data)
    except Exception as e:
-        raise HTTPException(500, detail=str(e))
+        raise HTTPException(
            status_code=500,
            detail=str(e)
        )
@app.get("/llm/{url:path}")
@@ -513,72 +462,6 @@ async def get_schema():
            "crawler": CrawlerRunConfig().dump()}
@app.get("/hooks/info")
 async def get_hooks_info():
    """Get information about available hook points and their signatures"""
    from hook_manager import UserHookManager
    hook_info = {}
    for hook_point, params in UserHookManager.HOOK_SIGNATURES.items():
        hook_info[hook_point] = {
            "parameters": params,
            "description": get_hook_description(hook_point),
            "example": get_hook_example(hook_point)
        }
    return JSONResponse({
        "available_hooks": hook_info,
        "timeout_limits": {
            "min": 1,
            "max": 120,
            "default": 30
        }
    })
 def get_hook_description(hook_point: str) -> str:
    """Get description for each hook point"""
    descriptions = {
        "on_browser_created": "Called after browser instance is created",
        "on_page_context_created": "Called after page and context are created - ideal for authentication",
        "before_goto": "Called before navigating to the target URL",
        "after_goto": "Called after navigation is complete",
        "on_user_agent_updated": "Called when user agent is updated",
        "on_execution_started": "Called when custom JavaScript execution begins",
        "before_retrieve_html": "Called before retrieving the final HTML - ideal for scrolling",
        "before_return_html": "Called just before returning the HTML content"
    }
    return descriptions.get(hook_point, "")
 def get_hook_example(hook_point: str) -> str:
    """Get example code for each hook point"""
    examples = {
        "on_page_context_created": """async def hook(page, context, **kwargs):
    # Add authentication cookie
    await context.add_cookies([{
        'name': 'session',
        'value': 'my-session-id',
        'domain': '.example.com'
    }])
    return page""",
        "before_retrieve_html": """async def hook(page, context, **kwargs):
    # Scroll to load lazy content
    await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
    await page.wait_for_timeout(2000)
    return page""",
        "before_goto": """async def hook(page, context, url, **kwargs):
    # Set custom headers
    await page.set_extra_http_headers({
        'X-Custom-Header': 'value'
    })
    return page"""
    }
    return examples.get(hook_point, "# Implement your hook logic here\nreturn page")
@app.get(config["observability"]["health_check"]["endpoint"])
 async def health():
    return {"status": "ok", "timestamp": time.time(), "version": __version__}
@@ -594,13 +477,12 @@ async def metrics():
@mcp_tool("crawl")
 async def crawl(
    request: Request,
-    crawl_request: CrawlRequestWithHooks,
+    crawl_request: CrawlRequest,
    _td: Dict = Depends(token_dep),
 ):
    """
    Crawl a list of URLs and return the results as JSON.
    For streaming responses, use /crawl/stream endpoint.
    Supports optional user-provided hook functions for customization.
    """
    if not crawl_request.urls:
        raise HTTPException(400, "At least one URL required")
@@ -608,21 +490,11 @@ async def crawl(
    crawler_config = CrawlerRunConfig.load(crawl_request.crawler_config)
    if crawler_config.stream:
        return await stream_process(crawl_request=crawl_request)
    # Prepare hooks config if provided
    hooks_config = None
    if crawl_request.hooks:
        hooks_config = {
            'code': crawl_request.hooks.code,
            'timeout': crawl_request.hooks.timeout
        }
    results = await handle_crawl_request(
        urls=crawl_request.urls,
        browser_config=crawl_request.browser_config,
        crawler_config=crawl_request.crawler_config,
        config=config,
        hooks_config=hooks_config
    )
    # check if all of the results are not successful
    if all(not result["success"] for result in results["results"]):
@@ -634,7 +506,7 @@ async def crawl(
@limiter.limit(config["rate_limiting"]["default_limit"])
 async def crawl_stream(
    request: Request,
-    crawl_request: CrawlRequestWithHooks,
+    crawl_request: CrawlRequest,
    _td: Dict = Depends(token_dep),
 ):
    if not crawl_request.urls:
@@ -642,38 +514,21 @@ async def crawl_stream(
    return await stream_process(crawl_request=crawl_request)
-async def stream_process(crawl_request: CrawlRequestWithHooks):
+async def stream_process(crawl_request: CrawlRequest):
-    
+    crawler, gen = await handle_stream_crawl_request(
    # Prepare hooks config if provided# Prepare hooks config if provided
    hooks_config = None
    if crawl_request.hooks:
        hooks_config = {
            'code': crawl_request.hooks.code,
            'timeout': crawl_request.hooks.timeout
        }
    crawler, gen, hooks_info = await handle_stream_crawl_request(
        urls=crawl_request.urls,
        browser_config=crawl_request.browser_config,
        crawler_config=crawl_request.crawler_config,
        config=config,
-        hooks_config=hooks_config
+)
    )
    # Add hooks info to response headers if available
    headers = {
        "Cache-Control": "no-cache",
        "Connection": "keep-alive",
        "X-Stream-Status": "active",
    }
    if hooks_info:
        import json
        headers["X-Hooks-Status"] = json.dumps(hooks_info['status']['status'])
    return StreamingResponse(
        stream_results(crawler, gen),
        media_type="application/x-ndjson",
-        headers=headers,
+        headers={
            "Cache-Control": "no-cache",
            "Connection": "keep-alive",
            "X-Stream-Status": "active",
        },
    )
--- a/deploy/docker/server_manager.py
+++ b/deploy/docker/server_manager.py
--- a/deploy/docker/static/assets/crawl4ai-logo.jpg
+++ b/deploy/docker/static/assets/crawl4ai-logo.jpg
--- a/deploy/docker/static/assets/crawl4ai-logo.png
+++ b/deploy/docker/static/assets/crawl4ai-logo.png
--- a/deploy/docker/static/assets/logo.png
+++ b/deploy/docker/static/assets/logo.png
--- a/deploy/docker/static/monitor/index.html
+++ b/deploy/docker/static/monitor/index.html
--- a/deploy/docker/static/playground/index.html
+++ b/deploy/docker/static/playground/index.html
@@ -167,14 +167,11 @@
            </a>
        </h1>
-        <div class="ml-auto flex items-center space-x-4">
+        <div class="ml-auto flex space-x-2">
-            <a href="/dashboard" class="text-xs text-secondary hover:text-primary underline">Monitor</a>
+            <button id="play-tab"
-            <div class="flex space-x-2">
+                class="px-3 py-1 rounded-t bg-surface border border-b-0 border-border text-primary">Playground</button>
-                <button id="play-tab"
+            <button id="stress-tab" class="px-3 py-1 rounded-t border border-border hover:bg-surface">Stress
-                    class="px-3 py-1 rounded-t bg-surface border border-b-0 border-border text-primary">Playground</button>
+                Test</button>
                <button id="stress-tab" class="px-3 py-1 rounded-t border border-border hover:bg-surface">Stress
                    Test</button>
            </div>
        </div>
    </header>
--- a/deploy/docker/test-websocket.py
+++ b/deploy/docker/test-websocket.py
@@ -1,34 +0,0 @@
 #!/usr/bin/env python3
 """
 Quick WebSocket test - Connect to monitor WebSocket and print updates
 """
 import asyncio
 import websockets
 import json
 async def test_websocket():
    uri = "ws://localhost:11235/monitor/ws"
    print(f"Connecting to {uri}...")
    try:
        async with websockets.connect(uri) as websocket:
            print("✅ Connected!")
            # Receive and print 5 updates
            for i in range(5):
                message = await websocket.recv()
                data = json.loads(message)
                print(f"\n📊 Update #{i+1}:")
                print(f"  - Health: CPU {data['health']['container']['cpu_percent']}%, Memory {data['health']['container']['memory_percent']}%")
                print(f"  - Active Requests: {len(data['requests']['active'])}")
                print(f"  - Browsers: {len(data['browsers'])}")
    except Exception as e:
        print(f"❌ Error: {e}")
        return 1
    print("\n✅ WebSocket test passed!")
    return 0
 if __name__ == "__main__":
    exit(asyncio.run(test_websocket()))
--- a/deploy/docker/tests/cli/README.md
+++ b/deploy/docker/tests/cli/README.md
@@ -1,298 +0,0 @@
 # Crawl4AI CLI E2E Test Suite
 Comprehensive end-to-end tests for the `crwl server` command-line interface.
 ## Overview
 This test suite validates all aspects of the Docker server CLI including:
 - Basic operations (start, stop, status, logs)
 - Advanced features (scaling, modes, custom configurations)
 - Resource management and stress testing
 - Dashboard UI functionality
 - Edge cases and error handling
 **Total Tests:** 32
 - Basic: 8 tests
 - Advanced: 8 tests
 - Resource: 5 tests
 - Dashboard: 1 test
 - Edge Cases: 10 tests
 ## Prerequisites
 ```bash
 # Activate virtual environment
 source venv/bin/activate
 # For dashboard tests, install Playwright
 pip install playwright
 playwright install chromium
 # Ensure Docker is running
 docker ps
 ```
 ## Quick Start
 ```bash
 # Run all tests (except dashboard)
 ./run_tests.sh
 # Run specific category
 ./run_tests.sh basic
 ./run_tests.sh advanced
 ./run_tests.sh resource
 ./run_tests.sh edge
 # Run dashboard tests (slower, includes UI screenshots)
 ./run_tests.sh dashboard
 # Run specific test
 ./run_tests.sh basic 01
 ./run_tests.sh edge 05
 ```
 ## Test Categories
 ### 1. Basic Tests (`basic/`)
 Core CLI functionality tests.
 | Test | Description | Expected Result |
 |------|-------------|----------------|
 | `test_01_start_default.sh` | Start server with defaults | 1 replica on port 11235 |
 | `test_02_status.sh` | Check server status | Shows running state and details |
 | `test_03_stop.sh` | Stop server | Clean shutdown, port freed |
 | `test_04_start_custom_port.sh` | Start on port 8080 | Server on custom port |
 | `test_05_start_replicas.sh` | Start with 3 replicas | Multi-container deployment |
 | `test_06_logs.sh` | View server logs | Logs displayed correctly |
 | `test_07_restart.sh` | Restart server | Preserves configuration |
 | `test_08_cleanup.sh` | Force cleanup | All resources removed |
 ### 2. Advanced Tests (`advanced/`)
 Advanced features and configurations.
 | Test | Description | Expected Result |
 |------|-------------|----------------|
 | `test_01_scale_up.sh` | Scale 3 → 5 replicas | Live scaling without downtime |
 | `test_02_scale_down.sh` | Scale 5 → 2 replicas | Graceful container removal |
 | `test_03_mode_single.sh` | Explicit single mode | Single container deployment |
 | `test_04_mode_compose.sh` | Compose mode with Nginx | Multi-container with load balancer |
 | `test_05_custom_image.sh` | Custom image specification | Uses specified image tag |
 | `test_06_env_file.sh` | Environment file loading | Variables loaded correctly |
 | `test_07_stop_remove_volumes.sh` | Stop with volume removal | Volumes cleaned up |
 | `test_08_restart_with_scale.sh` | Restart with new replica count | Configuration updated |
 ### 3. Resource Tests (`resource/`)
 Resource monitoring and stress testing.
 | Test | Description | Expected Result |
 |------|-------------|----------------|
 | `test_01_memory_monitoring.sh` | Monitor memory usage | Stats accessible and reasonable |
 | `test_02_cpu_stress.sh` | Concurrent request load | Handles load without errors |
 | `test_03_max_replicas.sh` | 10 replicas stress test | Maximum scale works correctly |
 | `test_04_cleanup_verification.sh` | Verify resource cleanup | All Docker resources removed |
 | `test_05_long_running.sh` | 5-minute stability test | Server remains stable |
 ### 4. Dashboard Tests (`dashboard/`)
 Dashboard UI functionality with Playwright.
 | Test | Description | Expected Result |
 |------|-------------|----------------|
 | `test_01_dashboard_ui.py` | Full dashboard UI test | All UI elements functional |
 **Dashboard Test Details:**
 - Starts server with 3 replicas
 - Runs demo script to generate activity
 - Uses Playwright to:
  - Take screenshots of dashboard
  - Verify container filter buttons
  - Check WebSocket connection
  - Validate timeline charts
  - Test all dashboard sections
 **Screenshots saved to:** `dashboard/screenshots/`
 ### 5. Edge Case Tests (`edge/`)
 Error handling and validation.
 | Test | Description | Expected Result |
 |------|-------------|----------------|
 | `test_01_already_running.sh` | Start when already running | Proper error message |
 | `test_02_not_running.sh` | Operations when stopped | Appropriate errors |
 | `test_03_scale_single_mode.sh` | Scale single container | Error with guidance |
 | `test_04_invalid_port.sh` | Invalid port numbers | Validation errors |
 | `test_05_invalid_replicas.sh` | Invalid replica counts | Validation errors |
 | `test_06_missing_env_file.sh` | Non-existent env file | File not found error |
 | `test_07_port_in_use.sh` | Port already occupied | Port conflict error |
 | `test_08_state_corruption.sh` | Corrupted state file | Cleanup recovers |
 | `test_09_network_conflict.sh` | Docker network collision | Handles gracefully |
 | `test_10_rapid_operations.sh` | Rapid start/stop cycles | No corruption |
 ## Test Execution Workflow
 Each test follows this pattern:
 1. **Setup:** Clean state, activate venv
 2. **Execute:** Run test commands
 3. **Verify:** Check results and assertions
 4. **Cleanup:** Stop server, remove resources
 ## Running Individual Tests
 ```bash
 # Make test executable (if needed)
 chmod +x deploy/docker/tests/cli/basic/test_01_start_default.sh
 # Run directly
 ./deploy/docker/tests/cli/basic/test_01_start_default.sh
 # Or use the test runner
 ./run_tests.sh basic 01
 ```
 ## Interpreting Results
 ### Success Output
 ```
 ✅ Test passed: [description]
 ```
 ### Failure Output
 ```
 ❌ Test failed: [error message]
 ```
 ### Warning Output
 ```
 ⚠️  Warning: [issue description]
 ```
 ## Common Issues
 ### Docker Not Running
 ```
 Error: Docker daemon not running
 Solution: Start Docker Desktop or Docker daemon
 ```
 ### Port Already In Use
 ```
 Error: Port 11235 is already in use
 Solution: Stop existing server or use different port
 ```
 ### Virtual Environment Not Found
 ```
 Warning: venv not found
 Solution: Create venv and activate it
 ```
 ### Playwright Not Installed
 ```
 Error: playwright module not found
 Solution: pip install playwright && playwright install chromium
 ```
 ## Test Development
 ### Adding New Tests
 1. **Choose category:** basic, advanced, resource, dashboard, or edge
 2. **Create test file:** Follow naming pattern `test_XX_description.sh`
 3. **Use template:**
 ```bash
 #!/bin/bash
 # Test: [Description]
 # Expected: [What should happen]
 set -e
 echo "=== Test: [Name] ==="
 echo ""
 source venv/bin/activate
 # Cleanup
 crwl server stop 2>/dev/null || true
 sleep 2
 # Test logic here
 # Cleanup
 crwl server stop >/dev/null 2>&1
 echo ""
 echo "✅ Test passed: [success message]"
 ```
 4. **Make executable:** `chmod +x test_XX_description.sh`
 5. **Test it:** `./test_XX_description.sh`
 6. **Add to runner:** Tests are auto-discovered by `run_tests.sh`
 ## CI/CD Integration
 These tests can be integrated into CI/CD pipelines:
 ```yaml
 # Example GitHub Actions
 - name: Run CLI Tests
  run: |
    source venv/bin/activate
    cd deploy/docker/tests/cli
    ./run_tests.sh all
 ```
 ## Performance Considerations
 - **Basic tests:** ~2-5 minutes total
 - **Advanced tests:** ~5-10 minutes total
 - **Resource tests:** ~10-15 minutes total (including 5-min stability test)
 - **Dashboard test:** ~3-5 minutes
 - **Edge case tests:** ~5-8 minutes total
 **Full suite:** ~30-45 minutes
 ## Best Practices
 1. **Always cleanup:** Each test should cleanup after itself
 2. **Wait for readiness:** Add sleep after starting servers
 3. **Check health:** Verify health endpoint before assertions
 4. **Graceful failures:** Use `|| true` to continue on expected failures
 5. **Clear messages:** Output should clearly indicate what's being tested
 ## Troubleshooting
 ### Tests Hanging
 - Check if Docker containers are stuck
 - Look for port conflicts
 - Verify network connectivity
 ### Intermittent Failures
 - Increase sleep durations for slower systems
 - Check system resources (memory, CPU)
 - Verify Docker has enough resources allocated
 ### All Tests Failing
 - Verify Docker is running: `docker ps`
 - Check CLI is installed: `which crwl`
 - Activate venv: `source venv/bin/activate`
 - Check server manager: `crwl server status`
 ## Contributing
 When adding new tests:
 1. Follow existing naming conventions
 2. Add comprehensive documentation
 3. Test on clean system
 4. Update this README
 5. Ensure cleanup is robust
 ## License
 Same as Crawl4AI project license.
--- a/deploy/docker/tests/cli/TEST_RESULTS.md
+++ b/deploy/docker/tests/cli/TEST_RESULTS.md
@@ -1,163 +0,0 @@
 # CLI Test Suite - Execution Results
 **Date:** 2025-10-20
 **Status:** ✅ PASSED
 ## Summary
 | Category | Total | Passed | Failed | Skipped |
 |----------|-------|--------|--------|---------|
 | Basic Tests | 8 | 8 | 0 | 0 |
 | Advanced Tests | 8 | 8 | 0 | 0 |
 | Edge Case Tests | 10 | 10 | 0 | 0 |
 | Resource Tests | 3 | 3 | 0 | 2 (skipped) |
 | Dashboard UI Tests | 0 | 0 | 0 | 1 (not run) |
 | **TOTAL** | **29** | **29** | **0** | **3** |
 **Success Rate:** 100% (29/29 tests passed)
 ## Test Results by Category
 ### ✅ Basic Tests (8/8 Passed)
 | Test | Status | Notes |
 |------|--------|-------|
 | test_01_start_default | ✅ PASS | Server starts with defaults (1 replica, port 11235) |
 | test_02_status | ✅ PASS | Status command shows correct information |
 | test_03_stop | ✅ PASS | Server stops cleanly, port freed |
 | test_04_start_custom_port | ✅ PASS | Server starts on port 8080 |
 | test_05_start_replicas | ✅ PASS | Compose mode with 3 replicas |
 | test_06_logs | ✅ PASS | Logs retrieved successfully |
 | test_07_restart | ✅ PASS | Server restarts preserving config (2 replicas) |
 | test_08_cleanup | ✅ PASS | Force cleanup removes all resources |
 ### ✅ Advanced Tests (8/8 Passed)
 | Test | Status | Notes |
 |------|--------|-------|
 | test_01_scale_up | ✅ PASS | Scaled 3 → 5 replicas successfully |
 | test_02_scale_down | ✅ PASS | Scaled 5 → 2 replicas successfully |
 | test_03_mode_single | ✅ PASS | Explicit single mode works |
 | test_04_mode_compose | ✅ PASS | Compose mode with 3 replicas and Nginx |
 | test_05_custom_image | ✅ PASS | Custom image specification works |
 | test_06_env_file | ✅ PASS | Environment file loading works |
 | test_07_stop_remove_volumes | ✅ PASS | Volumes handled during cleanup |
 | test_08_restart_with_scale | ✅ PASS | Restart with scale change (2 → 4 replicas) |
 ### ✅ Edge Case Tests (10/10 Passed)
 | Test | Status | Notes |
 |------|--------|-------|
 | test_01_already_running | ✅ PASS | Proper error for duplicate start |
 | test_02_not_running | ✅ PASS | Appropriate errors when server stopped |
 | test_03_scale_single_mode | ✅ PASS | Cannot scale single mode (expected error) |
 | test_04_invalid_port | ✅ PASS | Rejected ports: 0, -1, 99999, 65536 |
 | test_05_invalid_replicas | ✅ PASS | Rejected replicas: 0, -1, 101 |
 | test_06_missing_env_file | ✅ PASS | File not found error |
 | test_07_port_in_use | ✅ PASS | Port conflict detected |
 | test_08_state_corruption | ✅ PASS | Corrupted state handled gracefully |
 | test_09_network_conflict | ✅ PASS | Network collision handled |
 | test_10_rapid_operations | ✅ PASS | Rapid start/stop/restart cycles work |
 ### ✅ Resource Tests (3/5 Completed)
 | Test | Status | Notes |
 |------|--------|-------|
 | test_01_memory_monitoring | ✅ PASS | Baseline: 9.6%, After: 12.1%, Pool: 450 MB |
 | test_02_cpu_stress | ✅ PASS | Handled 10 concurrent requests |
 | test_03_max_replicas | ⏭️ SKIP | Takes ~2 minutes (10 replicas) |
 | test_04_cleanup_verification | ✅ PASS | All resources cleaned up |
 | test_05_long_running | ⏭️ SKIP | Takes 5 minutes |
 ### Dashboard UI Tests (Not Run)
 | Test | Status | Notes |
 |------|--------|-------|
 | test_01_dashboard_ui | ⏭️ SKIP | Requires Playwright, takes ~5 minutes |
 ## Key Findings
 ### ✅ Strengths
 1. **Robust Error Handling**
   - All invalid inputs properly rejected with clear error messages
   - State corruption detected and recovered automatically
   - Port conflicts identified before container start
 2. **Scaling Functionality**
   - Live scaling works smoothly (3 → 5 → 2 replicas)
   - Mode detection works correctly (single vs compose)
   - Restart preserves configuration
 3. **Resource Management**
   - Cleanup thoroughly removes all Docker resources
   - Memory usage reasonable (9.6% → 12.1% with 5 crawls)
   - Concurrent requests handled without errors
 4. **CLI Usability**
   - Clear, color-coded output
   - Helpful error messages with hints
   - Status command shows comprehensive info
 ### 📊 Performance Observations
 - **Startup Time:** ~5 seconds for single container, ~10-12 seconds for 3 replicas
 - **Memory Usage:** Baseline 9.6%, increases to 12.1% after 5 crawls
 - **Browser Pool:** ~450 MB memory usage (reasonable)
 - **Concurrent Load:** Successfully handled 10 parallel requests
 ### 🔧 Issues Found
 None! All 29 tests passed successfully.
 ## Test Execution Notes
 ### Test Environment
 - **OS:** macOS (Darwin 24.3.0)
 - **Docker:** Running
 - **Python:** Virtual environment activated
 - **Date:** 2025-10-20
 ### Skipped Tests Rationale
 1. **test_03_max_replicas:** Takes ~2 minutes to start 10 replicas
 2. **test_05_long_running:** 5-minute stability test
 3. **test_01_dashboard_ui:** Requires Playwright installation, UI screenshots
 These tests are fully implemented and can be run manually when time permits.
 ## Verification Commands
 All tests can be re-run with:
 ```bash
 # Individual test
 bash deploy/docker/tests/cli/basic/test_01_start_default.sh
 # Category
 ./deploy/docker/tests/cli/run_tests.sh basic
 # All tests
 ./deploy/docker/tests/cli/run_tests.sh all
 ```
 ## Conclusion
 ✅ **The CLI test suite is comprehensive and thoroughly validates all functionality.**
 - All core features tested and working
 - Error handling is robust
 - Edge cases properly covered
 - Resource management verified
 - No bugs or issues found
 The Crawl4AI Docker server CLI is production-ready with excellent test coverage.
 ---
 **Next Steps:**
 1. Run skipped tests when time permits (optional)
 2. Integrate into CI/CD pipeline
 3. Run dashboard UI test for visual verification
 4. Document test results in main README
 **Recommendation:** ✅ Ready for production use
--- a/deploy/docker/tests/cli/TEST_SUMMARY.md
+++ b/deploy/docker/tests/cli/TEST_SUMMARY.md
@@ -1,300 +0,0 @@
 # CLI Test Suite - Implementation Summary
 ## Completed Implementation
 Successfully created a comprehensive E2E test suite for the Crawl4AI Docker server CLI.
 ## Test Suite Overview
 ### Total Tests: 32
 #### 1. Basic Tests (8 tests) ✅
 - `test_01_start_default.sh` - Start with default settings
 - `test_02_status.sh` - Status command validation
 - `test_03_stop.sh` - Clean server shutdown
 - `test_04_start_custom_port.sh` - Custom port configuration
 - `test_05_start_replicas.sh` - Multi-replica deployment
 - `test_06_logs.sh` - Log retrieval
 - `test_07_restart.sh` - Server restart
 - `test_08_cleanup.sh` - Force cleanup
 #### 2. Advanced Tests (8 tests) ✅
 - `test_01_scale_up.sh` - Scale from 3 to 5 replicas
 - `test_02_scale_down.sh` - Scale from 5 to 2 replicas
 - `test_03_mode_single.sh` - Explicit single mode
 - `test_04_mode_compose.sh` - Compose mode with Nginx
 - `test_05_custom_image.sh` - Custom image specification
 - `test_06_env_file.sh` - Environment file loading
 - `test_07_stop_remove_volumes.sh` - Volume cleanup
 - `test_08_restart_with_scale.sh` - Restart with scale change
 #### 3. Resource Tests (5 tests) ✅
 - `test_01_memory_monitoring.sh` - Memory usage tracking
 - `test_02_cpu_stress.sh` - CPU stress with concurrent requests
 - `test_03_max_replicas.sh` - Maximum (10) replicas stress test
 - `test_04_cleanup_verification.sh` - Resource cleanup verification
 - `test_05_long_running.sh` - 5-minute stability test
 #### 4. Dashboard UI Test (1 test) ✅
 - `test_01_dashboard_ui.py` - Comprehensive Playwright test
  - Automated browser testing
  - Screenshot capture (7 screenshots per run)
  - UI element validation
  - Container filter testing
  - WebSocket connection verification
 #### 5. Edge Case Tests (10 tests) ✅
 - `test_01_already_running.sh` - Duplicate start attempt
 - `test_02_not_running.sh` - Operations on stopped server
 - `test_03_scale_single_mode.sh` - Invalid scaling operation
 - `test_04_invalid_port.sh` - Port validation (0, -1, 99999, 65536)
 - `test_05_invalid_replicas.sh` - Replica validation (0, -1, 101)
 - `test_06_missing_env_file.sh` - Non-existent env file
 - `test_07_port_in_use.sh` - Port conflict detection
 - `test_08_state_corruption.sh` - State file corruption recovery
 - `test_09_network_conflict.sh` - Docker network collision handling
 - `test_10_rapid_operations.sh` - Rapid start/stop cycles
 ## Test Infrastructure
 ### Master Test Runner (`run_tests.sh`)
 - Run all tests or specific categories
 - Color-coded output (green/red/yellow)
 - Test counters (passed/failed/skipped)
 - Summary statistics
 - Individual test execution support
 ### Documentation
 - `README.md` - Comprehensive test documentation
  - Test descriptions and expected results
  - Usage instructions
  - Troubleshooting guide
  - Best practices
  - CI/CD integration examples
 - `TEST_SUMMARY.md` - Implementation summary (this file)
 ## File Structure
 ```
 deploy/docker/tests/cli/
 ├── README.md                      # Main documentation
 ├── TEST_SUMMARY.md                # This summary
 ├── run_tests.sh                   # Master test runner
 │
 ├── basic/                         # Basic CLI tests
 │   ├── test_01_start_default.sh
 │   ├── test_02_status.sh
 │   ├── test_03_stop.sh
 │   ├── test_04_start_custom_port.sh
 │   ├── test_05_start_replicas.sh
 │   ├── test_06_logs.sh
 │   ├── test_07_restart.sh
 │   └── test_08_cleanup.sh
 │
 ├── advanced/                      # Advanced feature tests
 │   ├── test_01_scale_up.sh
 │   ├── test_02_scale_down.sh
 │   ├── test_03_mode_single.sh
 │   ├── test_04_mode_compose.sh
 │   ├── test_05_custom_image.sh
 │   ├── test_06_env_file.sh
 │   ├── test_07_stop_remove_volumes.sh
 │   └── test_08_restart_with_scale.sh
 │
 ├── resource/                      # Resource and stress tests
 │   ├── test_01_memory_monitoring.sh
 │   ├── test_02_cpu_stress.sh
 │   ├── test_03_max_replicas.sh
 │   ├── test_04_cleanup_verification.sh
 │   └── test_05_long_running.sh
 │
 ├── dashboard/                     # Dashboard UI tests
 │   ├── test_01_dashboard_ui.py
 │   ├── run_dashboard_test.sh
 │   └── screenshots/               # Auto-generated screenshots
 │
 └── edge/                          # Edge case tests
    ├── test_01_already_running.sh
    ├── test_02_not_running.sh
    ├── test_03_scale_single_mode.sh
    ├── test_04_invalid_port.sh
    ├── test_05_invalid_replicas.sh
    ├── test_06_missing_env_file.sh
    ├── test_07_port_in_use.sh
    ├── test_08_state_corruption.sh
    ├── test_09_network_conflict.sh
    └── test_10_rapid_operations.sh
 ```
 ## Usage Examples
 ### Run All Tests (except dashboard)
 ```bash
 ./run_tests.sh
 ```
 ### Run Specific Category
 ```bash
 ./run_tests.sh basic
 ./run_tests.sh advanced
 ./run_tests.sh resource
 ./run_tests.sh edge
 ```
 ### Run Dashboard Tests
 ```bash
 ./run_tests.sh dashboard
 # or
 ./dashboard/run_dashboard_test.sh
 ```
 ### Run Individual Test
 ```bash
 ./run_tests.sh basic 01
 ./run_tests.sh edge 05
 ```
 ### Direct Execution
 ```bash
 ./basic/test_01_start_default.sh
 ./edge/test_01_already_running.sh
 ```
 ## Test Verification
 The following tests have been verified working:
 - ✅ `test_01_start_default.sh` - PASSED
 - ✅ `test_02_status.sh` - PASSED
 - ✅ `test_03_stop.sh` - PASSED
 - ✅ `test_03_mode_single.sh` - PASSED
 - ✅ `test_01_already_running.sh` - PASSED
 - ✅ Master test runner - PASSED
 ## Key Features
 ### Robustness
 - Each test cleans up after itself
 - Handles expected failures gracefully
 - Waits for server readiness before assertions
 - Comprehensive error checking
 ### Clarity
 - Clear test descriptions
 - Colored output for easy interpretation
 - Detailed error messages
 - Progress indicators
 ### Completeness
 - Covers all CLI commands
 - Tests success and failure paths
 - Validates error messages
 - Checks resource cleanup
 ### Maintainability
 - Consistent structure across all tests
 - Well-documented code
 - Modular test design
 - Easy to add new tests
 ## Test Coverage
 ### CLI Commands Tested
 - ✅ `crwl server start` (all options)
 - ✅ `crwl server stop` (with/without volumes)
 - ✅ `crwl server status`
 - ✅ `crwl server scale`
 - ✅ `crwl server logs`
 - ✅ `crwl server restart`
 - ✅ `crwl server cleanup`
 ### Deployment Modes Tested
 - ✅ Single container mode
 - ✅ Compose mode (multi-container)
 - ✅ Auto mode detection
 ### Features Tested
 - ✅ Custom ports
 - ✅ Custom replicas (1-10)
 - ✅ Custom images
 - ✅ Environment files
 - ✅ Live scaling
 - ✅ Configuration persistence
 - ✅ Resource cleanup
 - ✅ Dashboard UI
 ### Error Handling Tested
 - ✅ Invalid inputs (ports, replicas)
 - ✅ Missing files
 - ✅ Port conflicts
 - ✅ State corruption
 - ✅ Network conflicts
 - ✅ Rapid operations
 - ✅ Duplicate operations
 ## Performance
 ### Estimated Execution Times
 - Basic tests: ~2-5 minutes
 - Advanced tests: ~5-10 minutes
 - Resource tests: ~10-15 minutes
 - Dashboard test: ~3-5 minutes
 - Edge case tests: ~5-8 minutes
 **Total: ~30-45 minutes for full suite**
 ## Next Steps
 ### Recommended Actions
 1. ✅ Run full test suite to verify all tests
 2. ✅ Test dashboard UI test with Playwright
 3. ✅ Verify long-running stability test
 4. ✅ Integrate into CI/CD pipeline
 5. ✅ Add to project documentation
 ### Future Enhancements
 - Add performance benchmarking
 - Add load testing scenarios
 - Add network failure simulation
 - Add disk space tests
 - Add security tests
 - Add multi-host tests (Swarm mode)
 ## Notes
 ### Dependencies
 - Docker running
 - Virtual environment activated
 - `jq` for JSON parsing (installed by default on most systems)
 - `bc` for calculations (installed by default on most systems)
 - Playwright for dashboard tests (optional)
 ### Test Philosophy
 - **Small:** Each test focuses on one specific aspect
 - **Smart:** Tests verify both success and failure paths
 - **Strong:** Robust cleanup and error handling
 - **Self-contained:** Each test is independent
 ### Known Limitations
 - Dashboard test requires Playwright installation
 - Long-running test takes 5 minutes
 - Max replicas test requires significant system resources
 - Some tests may need adjustment for slower systems
 ## Success Criteria
 ✅ All 32 tests created
 ✅ Test runner implemented
 ✅ Documentation complete
 ✅ Tests verified working
 ✅ File structure organized
 ✅ Error handling comprehensive
 ✅ Cleanup mechanisms robust
 ## Conclusion
 The CLI test suite is complete and ready for use. It provides comprehensive coverage of all CLI functionality, validates error handling, and ensures robustness across various scenarios.
 **Status:** ✅ COMPLETE
 **Date:** 2025-10-20
 **Tests:** 32 (8 basic + 8 advanced + 5 resource + 1 dashboard + 10 edge)
--- a/deploy/docker/tests/cli/advanced/test_01_scale_up.sh
+++ b/deploy/docker/tests/cli/advanced/test_01_scale_up.sh
@@ -1,56 +0,0 @@
 #!/bin/bash
 # Test: Scale server up from 3 to 5 replicas
 # Expected: Server scales without downtime
 set -e
 echo "=== Test: Scale Up (3 → 5 replicas) ==="
 echo ""
 PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../../../" && pwd)"
 source "$PROJECT_ROOT/venv/bin/activate"
 # Cleanup
 crwl server stop 2>/dev/null || true
 sleep 2
 # Start with 3 replicas
 echo "Starting server with 3 replicas..."
 crwl server start --replicas 3 >/dev/null 2>&1
 sleep 10
 # Verify 3 replicas
 STATUS=$(crwl server status | grep "Replicas" || echo "")
 echo "Initial status: $STATUS"
 # Scale up to 5
 echo ""
 echo "Scaling up to 5 replicas..."
 crwl server scale 5
 sleep 10
 # Verify 5 replicas
 STATUS=$(crwl server status)
 echo "$STATUS"
 if ! echo "$STATUS" | grep -q "5"; then
    echo "❌ Status does not show 5 replicas"
    crwl server stop
    exit 1
 fi
 # Verify health during scaling
 HEALTH=$(curl -s http://localhost:11235/health | jq -r '.status' 2>/dev/null || echo "error")
 if [[ "$HEALTH" != "ok" ]]; then
    echo "❌ Health check failed after scaling"
    crwl server stop
    exit 1
 fi
 # Cleanup
 echo "Cleaning up..."
 crwl server stop >/dev/null 2>&1
 echo ""
 echo "✅ Test passed: Successfully scaled from 3 to 5 replicas"
--- a/deploy/docker/tests/cli/advanced/test_02_scale_down.sh
+++ b/deploy/docker/tests/cli/advanced/test_02_scale_down.sh
@@ -1,56 +0,0 @@
 #!/bin/bash
 # Test: Scale server down from 5 to 2 replicas
 # Expected: Server scales down gracefully
 set -e
 echo "=== Test: Scale Down (5 → 2 replicas) ==="
 echo ""
 PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../../../" && pwd)"
 source "$PROJECT_ROOT/venv/bin/activate"
 # Cleanup
 crwl server stop 2>/dev/null || true
 sleep 2
 # Start with 5 replicas
 echo "Starting server with 5 replicas..."
 crwl server start --replicas 5 >/dev/null 2>&1
 sleep 12
 # Verify 5 replicas
 STATUS=$(crwl server status | grep "Replicas" || echo "")
 echo "Initial status: $STATUS"
 # Scale down to 2
 echo ""
 echo "Scaling down to 2 replicas..."
 crwl server scale 2
 sleep 8
 # Verify 2 replicas
 STATUS=$(crwl server status)
 echo "$STATUS"
 if ! echo "$STATUS" | grep -q "2"; then
    echo "❌ Status does not show 2 replicas"
    crwl server stop
    exit 1
 fi
 # Verify health after scaling down
 HEALTH=$(curl -s http://localhost:11235/health | jq -r '.status' 2>/dev/null || echo "error")
 if [[ "$HEALTH" != "ok" ]]; then
    echo "❌ Health check failed after scaling down"
    crwl server stop
    exit 1
 fi
 # Cleanup
 echo "Cleaning up..."
 crwl server stop >/dev/null 2>&1
 echo ""
 echo "✅ Test passed: Successfully scaled down from 5 to 2 replicas"
--- a/deploy/docker/tests/cli/advanced/test_03_mode_single.sh
+++ b/deploy/docker/tests/cli/advanced/test_03_mode_single.sh
@@ -1,52 +0,0 @@
 #!/bin/bash
 # Test: Start server explicitly in single mode
 # Expected: Server starts in single mode
 set -e
 echo "=== Test: Explicit Single Mode ==="
 echo ""
 PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../../../" && pwd)"
 source "$PROJECT_ROOT/venv/bin/activate"
 # Cleanup
 crwl server stop 2>/dev/null || true
 sleep 2
 # Start in single mode explicitly
 echo "Starting server in single mode..."
 crwl server start --mode single
 sleep 5
 # Check mode
 STATUS=$(crwl server status)
 echo "$STATUS"
 if ! echo "$STATUS" | grep -q "single"; then
    echo "❌ Mode is not 'single'"
    crwl server stop
    exit 1
 fi
 if ! echo "$STATUS" | grep -q "1"; then
    echo "❌ Should have 1 replica in single mode"
    crwl server stop
    exit 1
 fi
 # Verify health
 HEALTH=$(curl -s http://localhost:11235/health | jq -r '.status' 2>/dev/null || echo "error")
 if [[ "$HEALTH" != "ok" ]]; then
    echo "❌ Health check failed"
    crwl server stop
    exit 1
 fi
 # Cleanup
 echo "Cleaning up..."
 crwl server stop >/dev/null 2>&1
 echo ""
 echo "✅ Test passed: Server started in single mode"
--- a/deploy/docker/tests/cli/advanced/test_04_mode_compose.sh
+++ b/deploy/docker/tests/cli/advanced/test_04_mode_compose.sh
@@ -1,52 +0,0 @@
 #!/bin/bash
 # Test: Start server in compose mode with replicas
 # Expected: Server starts in compose mode with Nginx
 set -e
 echo "=== Test: Compose Mode with 3 Replicas ==="
 echo ""
 PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../../../" && pwd)"
 source "$PROJECT_ROOT/venv/bin/activate"
 # Cleanup
 crwl server stop 2>/dev/null || true
 sleep 2
 # Start in compose mode
 echo "Starting server in compose mode with 3 replicas..."
 crwl server start --mode compose --replicas 3
 sleep 12
 # Check mode
 STATUS=$(crwl server status)
 echo "$STATUS"
 if ! echo "$STATUS" | grep -q "3"; then
    echo "❌ Status does not show 3 replicas"
    crwl server stop
    exit 1
 fi
 # Verify Nginx is running (load balancer)
 NGINX_RUNNING=$(docker ps --filter "name=nginx" --format "{{.Names}}" || echo "")
 if [[ -z "$NGINX_RUNNING" ]]; then
    echo "⚠️  Warning: Nginx load balancer not detected (may be using swarm or single mode)"
 fi
 # Verify health through load balancer
 HEALTH=$(curl -s http://localhost:11235/health | jq -r '.status' 2>/dev/null || echo "error")
 if [[ "$HEALTH" != "ok" ]]; then
    echo "❌ Health check failed"
    crwl server stop
    exit 1
 fi
 # Cleanup
 echo "Cleaning up..."
 crwl server stop >/dev/null 2>&1
 echo ""
 echo "✅ Test passed: Server started in compose mode"
--- a/deploy/docker/tests/cli/advanced/test_05_custom_image.sh
+++ b/deploy/docker/tests/cli/advanced/test_05_custom_image.sh
@@ -1,47 +0,0 @@
 #!/bin/bash
 # Test: Start server with custom image tag
 # Expected: Server uses specified image
 set -e
 echo "=== Test: Custom Image Specification ==="
 echo ""
 PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../../../" && pwd)"
 source "$PROJECT_ROOT/venv/bin/activate"
 # Cleanup
 crwl server stop 2>/dev/null || true
 sleep 2
 # Use latest tag explicitly (or specify a different tag if available)
 IMAGE="unclecode/crawl4ai:latest"
 echo "Starting server with image: $IMAGE..."
 crwl server start --image "$IMAGE"
 sleep 5
 # Check status shows correct image
 STATUS=$(crwl server status)
 echo "$STATUS"
 if ! echo "$STATUS" | grep -q "crawl4ai"; then
    echo "❌ Status does not show correct image"
    crwl server stop
    exit 1
 fi
 # Verify health
 HEALTH=$(curl -s http://localhost:11235/health | jq -r '.status' 2>/dev/null || echo "error")
 if [[ "$HEALTH" != "ok" ]]; then
    echo "❌ Health check failed"
    crwl server stop
    exit 1
 fi
 # Cleanup
 echo "Cleaning up..."
 crwl server stop >/dev/null 2>&1
 echo ""
 echo "✅ Test passed: Server started with custom image"
--- a/deploy/docker/tests/cli/advanced/test_06_env_file.sh
+++ b/deploy/docker/tests/cli/advanced/test_06_env_file.sh
@@ -1,47 +0,0 @@
 #!/bin/bash
 # Test: Start server with environment file
 # Expected: Server loads environment variables
 set -e
 echo "=== Test: Start with Environment File ==="
 echo ""
 PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../../../" && pwd)"
 source "$PROJECT_ROOT/venv/bin/activate"
 # Create a test env file
 TEST_ENV_FILE="/tmp/test_crawl4ai.env"
 cat > "$TEST_ENV_FILE" <<EOF
 TEST_VAR=test_value
 OPENAI_API_KEY=sk-test-key
 EOF
 echo "Created test env file at $TEST_ENV_FILE"
 # Cleanup
 crwl server stop 2>/dev/null || true
 sleep 2
 # Start with env file
 echo "Starting server with env file..."
 crwl server start --env-file "$TEST_ENV_FILE"
 sleep 5
 # Verify server started
 HEALTH=$(curl -s http://localhost:11235/health | jq -r '.status' 2>/dev/null || echo "error")
 if [[ "$HEALTH" != "ok" ]]; then
    echo "❌ Health check failed"
    rm -f "$TEST_ENV_FILE"
    crwl server stop
    exit 1
 fi
 # Cleanup
 echo "Cleaning up..."
 crwl server stop >/dev/null 2>&1
 rm -f "$TEST_ENV_FILE"
 echo ""
 echo "✅ Test passed: Server started with environment file"
--- a/deploy/docker/tests/cli/advanced/test_07_stop_remove_volumes.sh
+++ b/deploy/docker/tests/cli/advanced/test_07_stop_remove_volumes.sh
@@ -1,49 +0,0 @@
 #!/bin/bash
 # Test: Stop server with volume removal
 # Expected: Volumes are removed along with containers
 set -e
 echo "=== Test: Stop with Remove Volumes ==="
 echo ""
 PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../../../" && pwd)"
 source "$PROJECT_ROOT/venv/bin/activate"
 # Start server (which may create volumes)
 echo "Starting server..."
 crwl server start --replicas 2 >/dev/null 2>&1
 sleep 8
 # Make some requests to populate data
 echo "Making requests to populate data..."
 curl -s -X POST http://localhost:11235/crawl \
  -H "Content-Type: application/json" \
  -d '{"urls": ["https://httpbin.org/html"], "crawler_config": {}}' > /dev/null || true
 sleep 2
 # Stop with volume removal (needs confirmation, so we'll use cleanup instead)
 echo "Stopping server with volume removal..."
 # Note: --remove-volumes requires confirmation, so we use cleanup --force
 crwl server cleanup --force >/dev/null 2>&1
 sleep 3
 # Verify volumes are removed
 echo "Checking for remaining volumes..."
 VOLUMES=$(docker volume ls --filter "name=crawl4ai" --format "{{.Name}}" || echo "")
 if [[ -n "$VOLUMES" ]]; then
    echo "⚠️  Warning: Some volumes still exist: $VOLUMES"
    echo "  (This may be expected if using system-wide volumes)"
 fi
 # Verify server is stopped
 STATUS=$(crwl server status | grep "No server" || echo "RUNNING")
 if [[ "$STATUS" == "RUNNING" ]]; then
    echo "❌ Server still running after stop"
    exit 1
 fi
 echo ""
 echo "✅ Test passed: Server stopped and volumes handled"
--- a/deploy/docker/tests/cli/advanced/test_08_restart_with_scale.sh
+++ b/deploy/docker/tests/cli/advanced/test_08_restart_with_scale.sh
@@ -1,56 +0,0 @@
 #!/bin/bash
 # Test: Restart server with different replica count
 # Expected: Server restarts with new replica count
 set -e
 echo "=== Test: Restart with Scale Change ==="
 echo ""
 PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../../../" && pwd)"
 source "$PROJECT_ROOT/venv/bin/activate"
 # Cleanup
 crwl server stop 2>/dev/null || true
 sleep 2
 # Start with 2 replicas
 echo "Starting server with 2 replicas..."
 crwl server start --replicas 2 >/dev/null 2>&1
 sleep 8
 # Verify 2 replicas
 STATUS=$(crwl server status | grep "Replicas" || echo "")
 echo "Initial: $STATUS"
 # Restart with 4 replicas
 echo ""
 echo "Restarting with 4 replicas..."
 crwl server restart --replicas 4
 sleep 10
 # Verify 4 replicas
 STATUS=$(crwl server status)
 echo "$STATUS"
 if ! echo "$STATUS" | grep -q "4"; then
    echo "❌ Status does not show 4 replicas after restart"
    crwl server stop
    exit 1
 fi
 # Verify health
 HEALTH=$(curl -s http://localhost:11235/health | jq -r '.status' 2>/dev/null || echo "error")
 if [[ "$HEALTH" != "ok" ]]; then
    echo "❌ Health check failed after restart"
    crwl server stop
    exit 1
 fi
 # Cleanup
 echo "Cleaning up..."
 crwl server stop >/dev/null 2>&1
 echo ""
 echo "✅ Test passed: Server restarted with new replica count"
--- a/deploy/docker/tests/cli/basic/test_01_start_default.sh
+++ b/deploy/docker/tests/cli/basic/test_01_start_default.sh
@@ -1,52 +0,0 @@
 #!/bin/bash
 # Test: Start server with default settings
 # Expected: Server starts with 1 replica on port 11235
 set -e
 echo "=== Test: Start Server with Defaults ==="
 echo "Expected: 1 replica, port 11235, auto mode"
 echo ""
 # Activate virtual environment
 # Navigate to project root and activate venv
 PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../../../" && pwd)"
 source "$PROJECT_ROOT/venv/bin/activate"
 # Cleanup any existing server
 echo "Cleaning up any existing server..."
 crwl server stop 2>/dev/null || true
 sleep 2
 # Start server with defaults
 echo "Starting server with default settings..."
 crwl server start
 # Wait for server to be ready
 echo "Waiting for server to be healthy..."
 sleep 5
 # Verify server is running
 echo "Checking server status..."
 STATUS=$(crwl server status | grep "Running" || echo "NOT_RUNNING")
 if [[ "$STATUS" == "NOT_RUNNING" ]]; then
    echo "❌ Server failed to start"
    crwl server stop
    exit 1
 fi
 # Check health endpoint
 echo "Checking health endpoint..."
 HEALTH=$(curl -s http://localhost:11235/health | jq -r '.status' 2>/dev/null || echo "error")
 if [[ "$HEALTH" != "ok" ]]; then
    echo "❌ Health check failed: $HEALTH"
    crwl server stop
    exit 1
 fi
 # Cleanup
 echo "Cleaning up..."
 crwl server stop
 echo ""
 echo "✅ Test passed: Server started with defaults and responded to health check"
--- a/deploy/docker/tests/cli/basic/test_02_status.sh
+++ b/deploy/docker/tests/cli/basic/test_02_status.sh
@@ -1,42 +0,0 @@
 #!/bin/bash
 # Test: Check server status command
 # Expected: Shows running status with correct details
 set -e
 echo "=== Test: Server Status Command ==="
 echo ""
 PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../../../" && pwd)"
 source "$PROJECT_ROOT/venv/bin/activate"
 # Start server first
 echo "Starting server..."
 crwl server start >/dev/null 2>&1
 sleep 5
 # Check status
 echo "Checking server status..."
 STATUS_OUTPUT=$(crwl server status)
 echo "$STATUS_OUTPUT"
 echo ""
 # Verify output contains expected fields
 if ! echo "$STATUS_OUTPUT" | grep -q "Running"; then
    echo "❌ Status does not show 'Running'"
    crwl server stop
    exit 1
 fi
 if ! echo "$STATUS_OUTPUT" | grep -q "11235"; then
    echo "❌ Status does not show correct port"
    crwl server stop
    exit 1
 fi
 # Cleanup
 echo "Cleaning up..."
 crwl server stop >/dev/null 2>&1
 echo ""
 echo "✅ Test passed: Status command shows correct information"
--- a/deploy/docker/tests/cli/basic/test_03_stop.sh
+++ b/deploy/docker/tests/cli/basic/test_03_stop.sh
@@ -1,45 +0,0 @@
 #!/bin/bash
 # Test: Stop server command
 # Expected: Server stops cleanly and port becomes available
 set -e
 echo "=== Test: Stop Server Command ==="
 echo ""
 PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../../../" && pwd)"
 source "$PROJECT_ROOT/venv/bin/activate"
 # Start server first
 echo "Starting server..."
 crwl server start >/dev/null 2>&1
 sleep 5
 # Verify running
 echo "Verifying server is running..."
 if ! curl -s http://localhost:11235/health > /dev/null 2>&1; then
    echo "❌ Server is not running before stop"
    exit 1
 fi
 # Stop server
 echo "Stopping server..."
 crwl server stop
 # Verify stopped
 echo "Verifying server is stopped..."
 sleep 3
 if curl -s http://localhost:11235/health > /dev/null 2>&1; then
    echo "❌ Server is still responding after stop"
    exit 1
 fi
 # Check status shows not running
 STATUS=$(crwl server status | grep "No server" || echo "RUNNING")
 if [[ "$STATUS" == "RUNNING" ]]; then
    echo "❌ Status still shows server as running"
    exit 1
 fi
 echo ""
 echo "✅ Test passed: Server stopped cleanly"
--- a/deploy/docker/tests/cli/basic/test_04_start_custom_port.sh
+++ b/deploy/docker/tests/cli/basic/test_04_start_custom_port.sh
@@ -1,46 +0,0 @@
 #!/bin/bash
 # Test: Start server with custom port
 # Expected: Server starts on port 8080 instead of default 11235
 set -e
 echo "=== Test: Start Server with Custom Port ==="
 echo "Expected: Server on port 8080"
 echo ""
 PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../../../" && pwd)"
 source "$PROJECT_ROOT/venv/bin/activate"
 # Cleanup
 crwl server stop 2>/dev/null || true
 sleep 2
 # Start on custom port
 echo "Starting server on port 8080..."
 crwl server start --port 8080
 sleep 5
 # Check health on custom port
 echo "Checking health on port 8080..."
 HEALTH=$(curl -s http://localhost:8080/health | jq -r '.status' 2>/dev/null || echo "error")
 if [[ "$HEALTH" != "ok" ]]; then
    echo "❌ Health check failed on port 8080: $HEALTH"
    crwl server stop
    exit 1
 fi
 # Verify default port is NOT responding
 echo "Verifying port 11235 is not in use..."
 if curl -s http://localhost:11235/health > /dev/null 2>&1; then
    echo "❌ Server is also running on default port 11235"
    crwl server stop
    exit 1
 fi
 # Cleanup
 echo "Cleaning up..."
 crwl server stop
 echo ""
 echo "✅ Test passed: Server started on custom port 8080"
--- a/deploy/docker/tests/cli/basic/test_05_start_replicas.sh
+++ b/deploy/docker/tests/cli/basic/test_05_start_replicas.sh
@@ -1,54 +0,0 @@
 #!/bin/bash
 # Test: Start server with multiple replicas
 # Expected: Server starts with 3 replicas in compose mode
 set -e
 echo "=== Test: Start Server with 3 Replicas ==="
 echo ""
 PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../../../" && pwd)"
 source "$PROJECT_ROOT/venv/bin/activate"
 # Cleanup
 crwl server stop 2>/dev/null || true
 sleep 2
 # Start with 3 replicas
 echo "Starting server with 3 replicas..."
 crwl server start --replicas 3
 sleep 10
 # Check status shows 3 replicas
 echo "Checking status..."
 STATUS_OUTPUT=$(crwl server status)
 echo "$STATUS_OUTPUT"
 if ! echo "$STATUS_OUTPUT" | grep -q "3"; then
    echo "❌ Status does not show 3 replicas"
    crwl server stop
    exit 1
 fi
 # Check health endpoint
 echo "Checking health endpoint..."
 HEALTH=$(curl -s http://localhost:11235/health | jq -r '.status' 2>/dev/null || echo "error")
 if [[ "$HEALTH" != "ok" ]]; then
    echo "❌ Health check failed"
    crwl server stop
    exit 1
 fi
 # Check container discovery (should show 3 containers eventually)
 echo "Checking container discovery..."
 sleep 5  # Wait for heartbeats
 CONTAINERS=$(curl -s http://localhost:11235/monitor/containers | jq -r '.count' 2>/dev/null || echo "0")
 echo "Container count: $CONTAINERS"
 # Cleanup
 echo "Cleaning up..."
 crwl server stop
 echo ""
 echo "✅ Test passed: Server started with 3 replicas"
--- a/deploy/docker/tests/cli/basic/test_06_logs.sh
+++ b/deploy/docker/tests/cli/basic/test_06_logs.sh
@@ -1,47 +0,0 @@
 #!/bin/bash
 # Test: View server logs
 # Expected: Logs are displayed without errors
 set -e
 echo "=== Test: Server Logs Command ==="
 echo ""
 PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../../../" && pwd)"
 source "$PROJECT_ROOT/venv/bin/activate"
 # Start server
 echo "Starting server..."
 crwl server start >/dev/null 2>&1
 sleep 5
 # Make a request to generate some logs
 echo "Making request to generate logs..."
 curl -s http://localhost:11235/health > /dev/null
 # Check logs (tail)
 echo "Fetching logs (last 50 lines)..."
 LOGS=$(crwl server logs --tail 50 2>&1 || echo "ERROR")
 if [[ "$LOGS" == "ERROR" ]]; then
    echo "❌ Failed to retrieve logs"
    crwl server stop
    exit 1
 fi
 echo "Log sample (first 10 lines):"
 echo "$LOGS" | head -n 10
 echo ""
 # Verify logs contain something (not empty)
 if [[ -z "$LOGS" ]]; then
    echo "❌ Logs are empty"
    crwl server stop
    exit 1
 fi
 # Cleanup
 echo "Cleaning up..."
 crwl server stop >/dev/null 2>&1
 echo ""
 echo "✅ Test passed: Logs retrieved successfully"
--- a/deploy/docker/tests/cli/basic/test_07_restart.sh
+++ b/deploy/docker/tests/cli/basic/test_07_restart.sh
@@ -1,55 +0,0 @@
 #!/bin/bash
 # Test: Restart server command
 # Expected: Server restarts with same configuration
 set -e
 echo "=== Test: Restart Server Command ==="
 echo ""
 PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../../../" && pwd)"
 source "$PROJECT_ROOT/venv/bin/activate"
 # Start server with specific config
 echo "Starting server with 2 replicas..."
 crwl server start --replicas 2 >/dev/null 2>&1
 sleep 8
 # Get initial container ID
 echo "Getting initial state..."
 INITIAL_STATUS=$(crwl server status)
 echo "$INITIAL_STATUS"
 # Restart
 echo ""
 echo "Restarting server..."
 crwl server restart
 sleep 8
 # Check status after restart
 echo "Checking status after restart..."
 RESTART_STATUS=$(crwl server status)
 echo "$RESTART_STATUS"
 # Verify still has 2 replicas
 if ! echo "$RESTART_STATUS" | grep -q "2"; then
    echo "❌ Replica count not preserved after restart"
    crwl server stop
    exit 1
 fi
 # Verify health
 HEALTH=$(curl -s http://localhost:11235/health | jq -r '.status' 2>/dev/null || echo "error")
 if [[ "$HEALTH" != "ok" ]]; then
    echo "❌ Health check failed after restart"
    crwl server stop
    exit 1
 fi
 # Cleanup
 echo "Cleaning up..."
 crwl server stop >/dev/null 2>&1
 echo ""
 echo "✅ Test passed: Server restarted with preserved configuration"
--- a/deploy/docker/tests/cli/basic/test_08_cleanup.sh
+++ b/deploy/docker/tests/cli/basic/test_08_cleanup.sh
@@ -1,46 +0,0 @@
 #!/bin/bash
 # Test: Force cleanup command
 # Expected: All resources removed even if state is corrupted
 set -e
 echo "=== Test: Force Cleanup Command ==="
 echo ""
 PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../../../" && pwd)"
 source "$PROJECT_ROOT/venv/bin/activate"
 # Start server
 echo "Starting server..."
 crwl server start >/dev/null 2>&1
 sleep 5
 # Run cleanup (will prompt, so use force flag)
 echo "Running force cleanup..."
 crwl server cleanup --force
 sleep 3
 # Verify no containers running
 echo "Verifying cleanup..."
 CONTAINERS=$(docker ps --filter "name=crawl4ai" --format "{{.Names}}" || echo "")
 if [[ -n "$CONTAINERS" ]]; then
    echo "❌ Crawl4AI containers still running: $CONTAINERS"
    exit 1
 fi
 # Verify port is free
 if curl -s http://localhost:11235/health > /dev/null 2>&1; then
    echo "❌ Server still responding after cleanup"
    exit 1
 fi
 # Verify status shows not running
 STATUS=$(crwl server status | grep "No server" || echo "RUNNING")
 if [[ "$STATUS" == "RUNNING" ]]; then
    echo "❌ Status still shows server running after cleanup"
    exit 1
 fi
 echo ""
 echo "✅ Test passed: Force cleanup removed all resources"
--- a/deploy/docker/tests/cli/dashboard/run_dashboard_test.sh
+++ b/deploy/docker/tests/cli/dashboard/run_dashboard_test.sh
@@ -1,27 +0,0 @@
 #!/bin/bash
 # Wrapper script to run dashboard UI test with proper environment
 set -e
 echo "=== Dashboard UI Test ==="
 echo ""
 # Activate virtual environment
 source venv/bin/activate
 # Make sure playwright is installed
 echo "Checking Playwright installation..."
 python -c "import playwright" 2>/dev/null || {
    echo "Installing Playwright..."
    pip install playwright
    playwright install chromium
 }
 # Run the test
 echo ""
 echo "Running dashboard UI test..."
 python deploy/docker/tests/cli/dashboard/test_01_dashboard_ui.py
 echo ""
 echo "✅ Dashboard test complete"
 echo "Check deploy/docker/tests/cli/dashboard/screenshots/ for results"
--- a/deploy/docker/tests/cli/dashboard/test_01_dashboard_ui.py
+++ b/deploy/docker/tests/cli/dashboard/test_01_dashboard_ui.py
@@ -1,225 +0,0 @@
 #!/usr/bin/env python3
 """
 Dashboard UI Test with Playwright
 Tests the monitoring dashboard UI functionality
 """
 import asyncio
 import subprocess
 import time
 import os
 from pathlib import Path
 from playwright.async_api import async_playwright
 BASE_URL = "http://localhost:11235"
 SCREENSHOT_DIR = Path(__file__).parent / "screenshots"
 async def start_server():
    """Start server with 3 replicas"""
    print("Starting server with 3 replicas...")
    subprocess.run(["crwl", "server", "stop"],
                   stdout=subprocess.DEVNULL,
                   stderr=subprocess.DEVNULL)
    time.sleep(2)
    result = subprocess.run(
        ["crwl", "server", "start", "--replicas", "3"],
        capture_output=True,
        text=True
    )
    if result.returncode != 0:
        raise Exception(f"Failed to start server: {result.stderr}")
    print("Waiting for server to be ready...")
    time.sleep(12)
 async def run_demo_script():
    """Run the demo script in background to generate activity"""
    print("Starting demo script to generate dashboard activity...")
    demo_path = Path(__file__).parent.parent.parent / "monitor" / "demo_monitor_dashboard.py"
    process = subprocess.Popen(
        ["python", str(demo_path)],
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE
    )
    # Let it run for a bit to generate some data
    print("Waiting for demo to generate data...")
    time.sleep(10)
    return process
 async def test_dashboard_ui():
    """Test dashboard UI with Playwright"""
    # Create screenshot directory
    SCREENSHOT_DIR.mkdir(exist_ok=True)
    print(f"Screenshots will be saved to: {SCREENSHOT_DIR}")
    async with async_playwright() as p:
        # Launch browser
        print("\nLaunching browser...")
        browser = await p.chromium.launch(headless=True)
        context = await browser.new_context(viewport={'width': 1920, 'height': 1080})
        page = await context.new_page()
        try:
            # Navigate to dashboard
            print(f"Navigating to {BASE_URL}/dashboard")
            await page.goto(f"{BASE_URL}/dashboard", wait_until="networkidle")
            await asyncio.sleep(3)
            # Take full dashboard screenshot
            print("Taking full dashboard screenshot...")
            await page.screenshot(path=SCREENSHOT_DIR / "01_full_dashboard.png", full_page=True)
            print(f"  ✅ Saved: 01_full_dashboard.png")
            # Verify page title
            title = await page.title()
            print(f"\nPage title: {title}")
            if "Monitor" not in title and "Dashboard" not in title:
                print("  ⚠️  Warning: Title doesn't contain 'Monitor' or 'Dashboard'")
            # Check for infrastructure card (container filters)
            print("\nChecking Infrastructure card...")
            infrastructure = await page.query_selector('.card h3:has-text("Infrastructure")')
            if infrastructure:
                print("  ✅ Infrastructure card found")
                await page.screenshot(path=SCREENSHOT_DIR / "02_infrastructure_card.png")
                print(f"  ✅ Saved: 02_infrastructure_card.png")
            else:
                print("  ❌ Infrastructure card not found")
            # Check for container filter buttons (All, C-1, C-2, C-3)
            print("\nChecking container filter buttons...")
            all_button = await page.query_selector('.filter-btn[data-container="all"]')
            if all_button:
                print("  ✅ 'All' filter button found")
                # Take screenshot of filter area
                await all_button.screenshot(path=SCREENSHOT_DIR / "03_filter_buttons.png")
                print(f"  ✅ Saved: 03_filter_buttons.png")
                # Test clicking filter button
                await all_button.click()
                await asyncio.sleep(1)
                print("  ✅ Clicked 'All' filter button")
            else:
                print("  ⚠️  'All' filter button not found (may appear after containers register)")
            # Check for WebSocket connection indicator
            print("\nChecking WebSocket connection...")
            ws_indicator = await page.query_selector('.ws-status, .connection-status, [class*="websocket"]')
            if ws_indicator:
                print("  ✅ WebSocket indicator found")
            else:
                print("  ⚠️  WebSocket indicator not found in DOM")
            # Check for main dashboard sections
            print("\nChecking dashboard sections...")
            sections = [
                ("Active Requests", ".active-requests, [class*='active']"),
                ("Completed Requests", ".completed-requests, [class*='completed']"),
                ("Browsers", ".browsers, [class*='browser']"),
                ("Timeline", ".timeline, [class*='timeline']"),
            ]
            for section_name, selector in sections:
                element = await page.query_selector(selector)
                if element:
                    print(f"  ✅ {section_name} section found")
                else:
                    print(f"  ⚠️  {section_name} section not found with selector: {selector}")
            # Scroll to different sections and take screenshots
            print("\nTaking section screenshots...")
            # Requests section
            requests = await page.query_selector('.card h3:has-text("Requests")')
            if requests:
                await requests.scroll_into_view_if_needed()
                await asyncio.sleep(1)
                await page.screenshot(path=SCREENSHOT_DIR / "04_requests_section.png")
                print(f"  ✅ Saved: 04_requests_section.png")
            # Browsers section
            browsers = await page.query_selector('.card h3:has-text("Browsers")')
            if browsers:
                await browsers.scroll_into_view_if_needed()
                await asyncio.sleep(1)
                await page.screenshot(path=SCREENSHOT_DIR / "05_browsers_section.png")
                print(f"  ✅ Saved: 05_browsers_section.png")
            # Timeline section
            timeline = await page.query_selector('.card h3:has-text("Timeline")')
            if timeline:
                await timeline.scroll_into_view_if_needed()
                await asyncio.sleep(1)
                await page.screenshot(path=SCREENSHOT_DIR / "06_timeline_section.png")
                print(f"  ✅ Saved: 06_timeline_section.png")
            # Check for tabs (if they exist)
            print("\nChecking for tabs...")
            tabs = await page.query_selector_all('.tab, [role="tab"]')
            if tabs:
                print(f"  ✅ Found {len(tabs)} tabs")
                for i, tab in enumerate(tabs[:5]):  # Check first 5 tabs
                    tab_text = await tab.inner_text()
                    print(f"    - Tab {i+1}: {tab_text}")
            else:
                print("  ℹ️  No tab elements found")
            # Wait for any animations to complete
            await asyncio.sleep(2)
            # Take final screenshot
            print("\nTaking final screenshot...")
            await page.screenshot(path=SCREENSHOT_DIR / "07_final_state.png", full_page=True)
            print(f"  ✅ Saved: 07_final_state.png")
            print("\n" + "="*60)
            print("Dashboard UI Test Complete!")
            print(f"Screenshots saved to: {SCREENSHOT_DIR}")
            print("="*60)
        finally:
            await browser.close()
 async def cleanup():
    """Stop server and cleanup"""
    print("\nCleaning up...")
    subprocess.run(["crwl", "server", "stop"],
                   stdout=subprocess.DEVNULL,
                   stderr=subprocess.DEVNULL)
    print("✅ Cleanup complete")
 async def main():
    """Main test execution"""
    demo_process = None
    try:
        # Start server
        await start_server()
        # Run demo script to generate activity
        demo_process = await run_demo_script()
        # Run dashboard UI test
        await test_dashboard_ui()
        print("\n✅ All dashboard UI tests passed!")
    except Exception as e:
        print(f"\n❌ Test failed: {e}")
        raise
    finally:
        # Stop demo script
        if demo_process:
            demo_process.terminate()
            demo_process.wait(timeout=5)
        # Cleanup server
        await cleanup()
 if __name__ == "__main__":
    asyncio.run(main())
--- a/deploy/docker/tests/cli/edge/test_01_already_running.sh
+++ b/deploy/docker/tests/cli/edge/test_01_already_running.sh
@@ -1,48 +0,0 @@
 #!/bin/bash
 # Test: Try starting server when already running
 # Expected: Error message indicating server is already running
 set -e
 echo "=== Test: Start When Already Running ==="
 echo ""
 PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../../../" && pwd)"
 source "$PROJECT_ROOT/venv/bin/activate"
 # Cleanup
 crwl server stop 2>/dev/null || true
 sleep 2
 # Start server
 echo "Starting server..."
 crwl server start >/dev/null 2>&1
 sleep 5
 # Try to start again
 echo ""
 echo "Attempting to start server again (should fail)..."
 OUTPUT=$(crwl server start 2>&1 || true)
 echo "$OUTPUT"
 # Verify error message
 if echo "$OUTPUT" | grep -iq "already running"; then
    echo ""
    echo "✅ Test passed: Proper error for already running server"
 else
    echo ""
    echo "❌ Test failed: Expected 'already running' error message"
    crwl server stop
    exit 1
 fi
 # Verify original server still running
 HEALTH=$(curl -s http://localhost:11235/health | jq -r '.status' 2>/dev/null || echo "error")
 if [[ "$HEALTH" != "ok" ]]; then
    echo "❌ Original server is not running"
    crwl server stop
    exit 1
 fi
 # Cleanup
 crwl server stop >/dev/null 2>&1
--- a/deploy/docker/tests/cli/edge/test_02_not_running.sh
+++ b/deploy/docker/tests/cli/edge/test_02_not_running.sh
@@ -1,50 +0,0 @@
 #!/bin/bash
 # Test: Operations when server is not running
 # Expected: Appropriate error messages
 set -e
 echo "=== Test: Operations When Not Running ==="
 echo ""
 PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../../../" && pwd)"
 source "$PROJECT_ROOT/venv/bin/activate"
 # Make sure nothing is running
 crwl server stop 2>/dev/null || true
 sleep 2
 # Try status when not running
 echo "Checking status when not running..."
 OUTPUT=$(crwl server status 2>&1 || true)
 echo "$OUTPUT"
 echo ""
 if ! echo "$OUTPUT" | grep -iq "no server"; then
    echo "❌ Status should indicate no server running"
    exit 1
 fi
 # Try stop when not running
 echo "Trying to stop when not running..."
 OUTPUT=$(crwl server stop 2>&1 || true)
 echo "$OUTPUT"
 echo ""
 if ! echo "$OUTPUT" | grep -iq "no server\|not running"; then
    echo "❌ Stop should indicate no server running"
    exit 1
 fi
 # Try scale when not running
 echo "Trying to scale when not running..."
 OUTPUT=$(crwl server scale 3 2>&1 || true)
 echo "$OUTPUT"
 echo ""
 if ! echo "$OUTPUT" | grep -iq "no server\|not running"; then
    echo "❌ Scale should indicate no server running"
    exit 1
 fi
 echo "✅ Test passed: Appropriate errors for operations when not running"
--- a/deploy/docker/tests/cli/edge/test_03_scale_single_mode.sh
+++ b/deploy/docker/tests/cli/edge/test_03_scale_single_mode.sh
@@ -1,47 +0,0 @@
 #!/bin/bash
 # Test: Try to scale single container mode
 # Expected: Error indicating single mode cannot be scaled
 set -e
 echo "=== Test: Scale Single Container Mode ==="
 echo ""
 PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../../../" && pwd)"
 source "$PROJECT_ROOT/venv/bin/activate"
 # Cleanup
 crwl server stop 2>/dev/null || true
 sleep 2
 # Start in single mode
 echo "Starting in single mode..."
 crwl server start --mode single >/dev/null 2>&1
 sleep 5
 # Try to scale
 echo ""
 echo "Attempting to scale single mode (should fail)..."
 OUTPUT=$(crwl server scale 3 2>&1 || true)
 echo "$OUTPUT"
 echo ""
 # Verify error message
 if echo "$OUTPUT" | grep -iq "single"; then
    echo "✅ Test passed: Proper error for scaling single mode"
 else
    echo "❌ Test failed: Expected error about single mode"
    crwl server stop
    exit 1
 fi
 # Verify server still running
 HEALTH=$(curl -s http://localhost:11235/health | jq -r '.status' 2>/dev/null || echo "error")
 if [[ "$HEALTH" != "ok" ]]; then
    echo "❌ Server is not running after failed scale"
    crwl server stop
    exit 1
 fi
 # Cleanup
 crwl server stop >/dev/null 2>&1
--- a/deploy/docker/tests/cli/edge/test_04_invalid_port.sh
+++ b/deploy/docker/tests/cli/edge/test_04_invalid_port.sh
@@ -1,36 +0,0 @@
 #!/bin/bash
 # Test: Invalid port numbers
 # Expected: Validation errors for invalid ports
 set -e
 echo "=== Test: Invalid Port Numbers ==="
 echo ""
 PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../../../" && pwd)"
 source "$PROJECT_ROOT/venv/bin/activate"
 # Cleanup
 crwl server stop 2>/dev/null || true
 sleep 2
 # Test invalid ports
 INVALID_PORTS=(0 -1 99999 65536)
 for PORT in "${INVALID_PORTS[@]}"; do
    echo "Testing invalid port: $PORT"
    OUTPUT=$(crwl server start --port $PORT 2>&1 || true)
    if echo "$OUTPUT" | grep -iq "error\|invalid\|usage"; then
        echo "  ✅ Rejected port $PORT"
    else
        echo "  ⚠️  Port $PORT may have been accepted (output: $OUTPUT)"
    fi
    # Make sure no server started
    crwl server stop 2>/dev/null || true
    sleep 1
    echo ""
 done
 echo "✅ Test passed: Invalid ports handled appropriately"
--- a/deploy/docker/tests/cli/edge/test_05_invalid_replicas.sh
+++ b/deploy/docker/tests/cli/edge/test_05_invalid_replicas.sh
@@ -1,57 +0,0 @@
 #!/bin/bash
 # Test: Invalid replica counts
 # Expected: Validation errors for invalid replicas
 set -e
 echo "=== Test: Invalid Replica Counts ==="
 echo ""
 PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../../../" && pwd)"
 source "$PROJECT_ROOT/venv/bin/activate"
 # Cleanup
 crwl server stop 2>/dev/null || true
 sleep 2
 # Test invalid replica counts
 INVALID_REPLICAS=(0 -1 101)
 for REPLICAS in "${INVALID_REPLICAS[@]}"; do
    echo "Testing invalid replica count: $REPLICAS"
    OUTPUT=$(crwl server start --replicas $REPLICAS 2>&1 || true)
    if echo "$OUTPUT" | grep -iq "error\|invalid\|usage"; then
        echo "  ✅ Rejected replica count $REPLICAS"
    else
        echo "  ⚠️  Replica count $REPLICAS may have been accepted"
    fi
    # Make sure no server started
    crwl server stop 2>/dev/null || true
    sleep 1
    echo ""
 done
 # Test scaling to invalid counts
 echo "Testing scale to invalid counts..."
 crwl server start --replicas 2 >/dev/null 2>&1
 sleep 5
 INVALID_SCALE=(0 -1)
 for SCALE in "${INVALID_SCALE[@]}"; do
    echo "Testing scale to: $SCALE"
    OUTPUT=$(crwl server scale $SCALE 2>&1 || true)
    if echo "$OUTPUT" | grep -iq "error\|invalid\|must be at least 1"; then
        echo "  ✅ Rejected scale to $SCALE"
    else
        echo "  ⚠️  Scale to $SCALE may have been accepted"
    fi
    echo ""
 done
 # Cleanup
 crwl server stop >/dev/null 2>&1
 echo "✅ Test passed: Invalid replica counts handled appropriately"
--- a/deploy/docker/tests/cli/edge/test_06_missing_env_file.sh
+++ b/deploy/docker/tests/cli/edge/test_06_missing_env_file.sh
@@ -1,40 +0,0 @@
 #!/bin/bash
 # Test: Non-existent environment file
 # Expected: Error indicating file not found
 set -e
 echo "=== Test: Missing Environment File ==="
 echo ""
 PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../../../" && pwd)"
 source "$PROJECT_ROOT/venv/bin/activate"
 # Cleanup
 crwl server stop 2>/dev/null || true
 sleep 2
 # Try with non-existent file
 FAKE_FILE="/tmp/nonexistent_$(date +%s).env"
 echo "Attempting to start with non-existent env file: $FAKE_FILE"
 OUTPUT=$(crwl server start --env-file "$FAKE_FILE" 2>&1 || true)
 echo "$OUTPUT"
 echo ""
 # Verify error
 if echo "$OUTPUT" | grep -iq "error\|does not exist\|not found\|no such file"; then
    echo "✅ Test passed: Proper error for missing env file"
 else
    echo "❌ Test failed: Expected error about missing file"
    crwl server stop
    exit 1
 fi
 # Make sure no server started
 if curl -s http://localhost:11235/health > /dev/null 2>&1; then
    echo "❌ Server should not have started"
    crwl server stop
    exit 1
 fi
 echo "✅ Server correctly refused to start with missing env file"
--- a/deploy/docker/tests/cli/edge/test_07_port_in_use.sh
+++ b/deploy/docker/tests/cli/edge/test_07_port_in_use.sh
@@ -1,50 +0,0 @@
 #!/bin/bash
 # Test: Port already in use
 # Expected: Error indicating port is occupied
 set -e
 echo "=== Test: Port Already In Use ==="
 echo ""
 PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../../../" && pwd)"
 source "$PROJECT_ROOT/venv/bin/activate"
 # Cleanup
 crwl server stop 2>/dev/null || true
 sleep 2
 # Start a simple HTTP server on port 11235 to occupy it
 echo "Starting dummy server on port 11235..."
 python -m http.server 11235 >/dev/null 2>&1 &
 DUMMY_PID=$!
 sleep 2
 # Try to start crawl4ai on same port
 echo "Attempting to start Crawl4AI on occupied port..."
 OUTPUT=$(crwl server start 2>&1 || true)
 echo "$OUTPUT"
 echo ""
 # Kill dummy server
 kill $DUMMY_PID 2>/dev/null || true
 sleep 1
 # Verify error message
 if echo "$OUTPUT" | grep -iq "port.*in use\|already in use\|address already in use"; then
    echo "✅ Test passed: Proper error for port in use"
 else
    echo "⚠️  Expected 'port in use' error (output may vary)"
 fi
 # Make sure Crawl4AI didn't start
 if curl -s http://localhost:11235/health > /dev/null 2>&1; then
    HEALTH=$(curl -s http://localhost:11235/health | jq -r '.status' 2>/dev/null || echo "unknown")
    if [[ "$HEALTH" == "ok" ]]; then
        echo "❌ Crawl4AI started despite port being occupied"
        crwl server stop
        exit 1
    fi
 fi
 echo "✅ Crawl4AI correctly refused to start on occupied port"
--- a/deploy/docker/tests/cli/edge/test_08_state_corruption.sh
+++ b/deploy/docker/tests/cli/edge/test_08_state_corruption.sh
@@ -1,79 +0,0 @@
 #!/bin/bash
 # Test: Corrupted state file
 # Expected: Cleanup recovers from corrupted state
 set -e
 echo "=== Test: State File Corruption ==="
 echo ""
 PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../../../" && pwd)"
 source "$PROJECT_ROOT/venv/bin/activate"
 # Cleanup
 crwl server stop 2>/dev/null || true
 sleep 2
 # Start server to create state
 echo "Starting server to create state..."
 crwl server start >/dev/null 2>&1
 sleep 5
 # Get state file path
 STATE_FILE="$HOME/.crawl4ai/server/state.json"
 echo "State file: $STATE_FILE"
 # Verify state file exists
 if [[ ! -f "$STATE_FILE" ]]; then
    echo "❌ State file not created"
    crwl server stop
    exit 1
 fi
 echo "Original state:"
 cat "$STATE_FILE" | jq '.' || cat "$STATE_FILE"
 echo ""
 # Stop server
 crwl server stop >/dev/null 2>&1
 sleep 2
 # Corrupt state file
 echo "Corrupting state file..."
 echo "{ invalid json }" > "$STATE_FILE"
 cat "$STATE_FILE"
 echo ""
 # Try to start server (should handle corrupted state)
 echo "Attempting to start with corrupted state..."
 OUTPUT=$(crwl server start 2>&1 || true)
 echo "$OUTPUT"
 echo ""
 # Check if server started or gave clear error
 if curl -s http://localhost:11235/health > /dev/null 2>&1; then
    echo "✅ Server started despite corrupted state"
    crwl server stop
 elif echo "$OUTPUT" | grep -iq "already running"; then
    # State thinks server is running, use cleanup
    echo "State thinks server is running, using cleanup..."
    crwl server cleanup --force >/dev/null 2>&1
    sleep 2
    # Try starting again
    crwl server start >/dev/null 2>&1
    sleep 5
    if curl -s http://localhost:11235/health > /dev/null 2>&1; then
        echo "✅ Cleanup recovered from corrupted state"
        crwl server stop
    else
        echo "❌ Failed to recover from corrupted state"
        exit 1
    fi
 else
    echo "✅ Handled corrupted state appropriately"
 fi
 echo ""
 echo "✅ Test passed: System handles state corruption"
--- a/deploy/docker/tests/cli/edge/test_09_network_conflict.sh
+++ b/deploy/docker/tests/cli/edge/test_09_network_conflict.sh
@@ -1,47 +0,0 @@
 #!/bin/bash
 # Test: Docker network name collision
 # Expected: Handles existing network gracefully
 set -e
 echo "=== Test: Network Name Conflict ==="
 echo ""
 PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../../../" && pwd)"
 source "$PROJECT_ROOT/venv/bin/activate"
 # Cleanup
 crwl server stop 2>/dev/null || true
 sleep 2
 # Create a network with similar name
 NETWORK_NAME="crawl4ai_test_net"
 echo "Creating test network: $NETWORK_NAME..."
 docker network create "$NETWORK_NAME" 2>/dev/null || echo "Network may already exist"
 # Start server (should either use existing network or create its own)
 echo ""
 echo "Starting server..."
 crwl server start >/dev/null 2>&1
 sleep 5
 # Verify server started successfully
 HEALTH=$(curl -s http://localhost:11235/health | jq -r '.status' 2>/dev/null || echo "error")
 if [[ "$HEALTH" != "ok" ]]; then
    echo "❌ Server failed to start"
    docker network rm "$NETWORK_NAME" 2>/dev/null || true
    crwl server stop
    exit 1
 fi
 echo "✅ Server started successfully despite network conflict"
 # Cleanup
 crwl server stop >/dev/null 2>&1
 sleep 2
 # Remove test network
 docker network rm "$NETWORK_NAME" 2>/dev/null || echo "Network already removed"
 echo ""
 echo "✅ Test passed: Handled network conflict gracefully"
--- a/deploy/docker/tests/cli/edge/test_10_rapid_operations.sh
+++ b/deploy/docker/tests/cli/edge/test_10_rapid_operations.sh
@@ -1,72 +0,0 @@
 #!/bin/bash
 # Test: Rapid start/stop/restart operations
 # Expected: System handles rapid operations without corruption
 set -e
 echo "=== Test: Rapid Operations ==="
 echo ""
 PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../../../" && pwd)"
 source "$PROJECT_ROOT/venv/bin/activate"
 # Cleanup
 crwl server stop 2>/dev/null || true
 sleep 2
 # Test 1: Rapid start/stop
 echo "Test 1: Rapid start/stop cycles..."
 for i in {1..3}; do
    echo "  Cycle $i/3..."
    crwl server start >/dev/null 2>&1
    sleep 3
    crwl server stop >/dev/null 2>&1
    sleep 2
 done
 echo "  ✅ Completed rapid start/stop cycles"
 # Test 2: Restart immediately after start
 echo ""
 echo "Test 2: Restart immediately after start..."
 crwl server start >/dev/null 2>&1
 sleep 3
 crwl server restart >/dev/null 2>&1
 sleep 5
 HEALTH=$(curl -s http://localhost:11235/health | jq -r '.status' 2>/dev/null || echo "error")
 if [[ "$HEALTH" != "ok" ]]; then
    echo "  ❌ Health check failed after rapid restart"
    crwl server stop
    exit 1
 fi
 echo "  ✅ Rapid restart successful"
 # Test 3: Multiple status checks
 echo ""
 echo "Test 3: Multiple rapid status checks..."
 for i in {1..5}; do
    crwl server status >/dev/null 2>&1 || echo "  ⚠️  Status check $i failed"
 done
 echo "  ✅ Multiple status checks completed"
 # Test 4: Stop and immediate start
 echo ""
 echo "Test 4: Stop and immediate start..."
 crwl server stop >/dev/null 2>&1
 sleep 2
 crwl server start >/dev/null 2>&1
 sleep 5
 HEALTH=$(curl -s http://localhost:11235/health | jq -r '.status' 2>/dev/null || echo "error")
 if [[ "$HEALTH" != "ok" ]]; then
    echo "  ❌ Health check failed after stop/start"
    crwl server stop
    exit 1
 fi
 echo "  ✅ Stop/immediate start successful"
 # Cleanup
 crwl server stop >/dev/null 2>&1
 echo ""
 echo "✅ Test passed: System handles rapid operations correctly"
--- a/deploy/docker/tests/cli/plan.md
+++ b/deploy/docker/tests/cli/plan.md
@@ -1,119 +0,0 @@
 E2E CLI Test Suite Plan                                                                                     │ │
 │ │                                                                                                             │ │
 │ │ Test Structure                                                                                              │ │
 │ │                                                                                                             │ │
 │ │ Create deploy/docker/tests/cli/ folder with individual test scripts organized by category.                  │ │
 │ │                                                                                                             │ │
 │ │ Test Categories                                                                                             │ │
 │ │                                                                                                             │ │
 │ │ 1. Basic Tests (deploy/docker/tests/cli/basic/)                                                             │ │
 │ │                                                                                                             │ │
 │ │ - test_01_start_default.sh - Start server with defaults (1 replica, port 11235)                             │ │
 │ │ - test_02_status.sh - Check server status                                                                   │ │
 │ │ - test_03_stop.sh - Stop server cleanly                                                                     │ │
 │ │ - test_04_start_custom_port.sh - Start with custom port (8080)                                              │ │
 │ │ - test_05_start_replicas.sh - Start with 3 replicas                                                         │ │
 │ │ - test_06_logs.sh - View logs (tail and follow)                                                             │ │
 │ │ - test_07_restart.sh - Restart server preserving config                                                     │ │
 │ │ - test_08_cleanup.sh - Force cleanup all resources                                                          │ │
 │ │                                                                                                             │ │
 │ │ 2. Advanced Tests (deploy/docker/tests/cli/advanced/)                                                       │ │
 │ │                                                                                                             │ │
 │ │ - test_01_scale_up.sh - Scale from 3 to 5 replicas                                                          │ │
 │ │ - test_02_scale_down.sh - Scale from 5 to 2 replicas                                                        │ │
 │ │ - test_03_mode_single.sh - Start in single mode explicitly                                                  │ │
 │ │ - test_04_mode_compose.sh - Start in compose mode with 3 replicas                                           │ │
 │ │ - test_05_custom_image.sh - Start with custom image tag                                                     │ │
 │ │ - test_06_env_file.sh - Start with custom env file                                                          │ │
 │ │ - test_07_stop_remove_volumes.sh - Stop and remove volumes                                                  │ │
 │ │ - test_08_restart_with_scale.sh - Restart and change replica count                                          │ │
 │ │                                                                                                             │ │
 │ │ 3. Resource Tests (deploy/docker/tests/cli/resource/)                                                       │ │
 │ │                                                                                                             │ │
 │ │ - test_01_memory_monitoring.sh - Monitor memory during crawls                                               │ │
 │ │ - test_02_cpu_stress.sh - CPU usage under concurrent load                                                   │ │
 │ │ - test_03_max_replicas.sh - Start with 10 replicas and stress test                                          │ │
 │ │ - test_04_cleanup_verification.sh - Verify all resources cleaned up                                         │ │
 │ │ - test_05_long_running.sh - Stability test (30 min runtime)                                                 │ │
 │ │                                                                                                             │ │
 │ │ 4. Dashboard UI Tests (deploy/docker/tests/cli/dashboard/)                                                  │ │
 │ │                                                                                                             │ │
 │ │ - test_01_dashboard_ui.py - Playwright test with screenshots                                                │ │
 │ │   - Start server with 3 replicas                                                                            │ │
 │ │   - Run demo_monitor_dashboard.py script                                                                    │ │
 │ │   - Use Playwright to:                                                                                      │ │
 │ │       - Take screenshot of main dashboard                                                                   │ │
 │ │     - Verify container filter buttons (All, C-1, C-2, C-3)                                                  │ │
 │ │     - Test WebSocket connection indicator                                                                   │ │
 │ │     - Verify timeline charts render                                                                         │ │
 │ │     - Test filtering functionality                                                                          │ │
 │ │     - Check all tabs (Requests, Browsers, Janitor, Errors, Stats)                                           │ │
 │ │                                                                                                             │ │
 │ │ 5. Edge Cases (deploy/docker/tests/cli/edge/)                                                               │ │
 │ │                                                                                                             │ │
 │ │ - test_01_already_running.sh - Try starting when already running                                            │ │
 │ │ - test_02_not_running.sh - Try stop/status when not running                                                 │ │
 │ │ - test_03_scale_single_mode.sh - Try scaling single container mode                                          │ │
 │ │ - test_04_invalid_port.sh - Invalid port numbers (0, -1, 99999)                                             │ │
 │ │ - test_05_invalid_replicas.sh - Invalid replica counts (0, -1, 101)                                         │ │
 │ │ - test_06_missing_env_file.sh - Non-existent env file                                                       │ │
 │ │ - test_07_port_in_use.sh - Port already occupied                                                            │ │
 │ │ - test_08_state_corruption.sh - Manually corrupt state file                                                 │ │
 │ │ - test_09_network_conflict.sh - Docker network name collision                                               │ │
 │ │ - test_10_rapid_operations.sh - Start/stop/restart in quick succession                                      │ │
 │ │                                                                                                             │ │
 │ │ Test Execution Plan                                                                                         │ │
 │ │                                                                                                             │ │
 │ │ Process:                                                                                                    │ │
 │ │                                                                                                             │ │
 │ │ 1. Create test file                                                                                         │ │
 │ │ 2. Run test                                                                                                 │ │
 │ │ 3. Verify results                                                                                           │ │
 │ │ 4. If fails → fix issue → re-test                                                                           │ │
 │ │ 5. Move to next test                                                                                        │ │
 │ │ 6. Clean up after each test to ensure clean state                                                           │ │
 │ │                                                                                                             │ │
 │ │ Common Test Structure:                                                                                      │ │
 │ │                                                                                                             │ │
 │ │ #!/bin/bash                                                                                                 │ │
 │ │ # Test: [Description]                                                                                       │ │
 │ │ # Expected: [What should happen]                                                                            │ │
 │ │                                                                                                             │ │
 │ │ source venv/bin/activate                                                                                    │ │
 │ │ set -e  # Exit on error                                                                                     │ │
 │ │                                                                                                             │ │
 │ │ echo "=== Test: [Name] ==="                                                                                 │ │
 │ │                                                                                                             │ │
 │ │ # Setup                                                                                                     │ │
 │ │ # ... test commands ...                                                                                     │ │
 │ │                                                                                                             │ │
 │ │ # Verification                                                                                              │ │
 │ │ # ... assertions ...                                                                                        │ │
 │ │                                                                                                             │ │
 │ │ # Cleanup                                                                                                   │ │
 │ │ crwl server stop || true                                                                                    │ │
 │ │                                                                                                             │ │
 │ │ echo "✓ Test passed"                                                                                        │ │
 │ │                                                                                                             │ │
 │ │ Dashboard Test Structure (Python):                                                                          │ │
 │ │                                                                                                             │ │
 │ │ # Activate venv first in calling script                                                                     │ │
 │ │ import asyncio                                                                                              │ │
 │ │ from playwright.async_api import async_playwright                                                           │ │
 │ │                                                                                                             │ │
 │ │ async def test_dashboard():                                                                                 │ │
 │ │     # Start server with 3 replicas                                                                          │ │
 │ │     # Run demo script in background                                                                         │ │
 │ │     # Launch Playwright                                                                                     │ │
 │ │     # Take screenshots                                                                                      │ │
 │ │     # Verify elements                                                                                       │ │
 │ │     # Cleanup                                                                                               │ │
 │ │                                                                                                             │ │
 │ │ Success Criteria:                                                                                           │ │
 │ │                                                                                                             │ │
 │ │ - All basic operations work correctly                                                                       │ │
 │ │ - Scaling operations function properly                                                                      │ │
 │ │ - Resource limits are respected                                                                             │ │
 │ │ - Dashboard UI is functional and responsive                                                                 │ │
 │ │ - Edge cases handled gracefully with proper error messages                                                  │ │
 │ │ - Clean resource cleanup verified
--- a/deploy/docker/tests/cli/resource/test_01_memory_monitoring.sh
+++ b/deploy/docker/tests/cli/resource/test_01_memory_monitoring.sh
@@ -1,63 +0,0 @@
 #!/bin/bash
 # Test: Monitor memory usage during crawl operations
 # Expected: Memory stats are accessible and reasonable
 set -e
 echo "=== Test: Memory Monitoring ==="
 echo ""
 PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../../../" && pwd)"
 source "$PROJECT_ROOT/venv/bin/activate"
 # Cleanup
 crwl server stop 2>/dev/null || true
 sleep 2
 # Start server
 echo "Starting server..."
 crwl server start >/dev/null 2>&1
 sleep 5
 # Get baseline memory
 echo "Checking baseline memory..."
 BASELINE=$(curl -s http://localhost:11235/monitor/health | jq -r '.container.memory_percent' 2>/dev/null || echo "0")
 echo "Baseline memory: ${BASELINE}%"
 # Make several crawl requests
 echo ""
 echo "Making crawl requests to increase memory usage..."
 for i in {1..5}; do
    echo "  Request $i/5..."
    curl -s -X POST http://localhost:11235/crawl \
      -H "Content-Type: application/json" \
      -d "{\"urls\": [\"https://httpbin.org/html?req=$i\"], \"crawler_config\": {}}" > /dev/null || true
    sleep 1
 done
 # Check memory after requests
 echo ""
 echo "Checking memory after requests..."
 AFTER=$(curl -s http://localhost:11235/monitor/health | jq -r '.container.memory_percent' 2>/dev/null || echo "0")
 echo "Memory after requests: ${AFTER}%"
 # Get browser pool stats
 echo ""
 echo "Browser pool memory usage..."
 POOL_MEM=$(curl -s http://localhost:11235/monitor/browsers | jq -r '.summary.total_memory_mb' 2>/dev/null || echo "0")
 echo "Browser pool: ${POOL_MEM} MB"
 # Verify memory is within reasonable bounds (<80%)
 MEMORY_OK=$(echo "$AFTER < 80" | bc -l 2>/dev/null || echo "1")
 if [[ "$MEMORY_OK" != "1" ]]; then
    echo "⚠️  Warning: Memory usage is high: ${AFTER}%"
 fi
 # Cleanup
 echo ""
 echo "Cleaning up..."
 crwl server stop >/dev/null 2>&1
 echo ""
 echo "✅ Test passed: Memory monitoring functional"
 echo "   Baseline: ${BASELINE}%, After: ${AFTER}%, Pool: ${POOL_MEM} MB"
--- a/deploy/docker/tests/cli/resource/test_02_cpu_stress.sh
+++ b/deploy/docker/tests/cli/resource/test_02_cpu_stress.sh
@@ -1,61 +0,0 @@
 #!/bin/bash
 # Test: CPU usage under concurrent load
 # Expected: Server handles concurrent requests without errors
 set -e
 echo "=== Test: CPU Stress Test ==="
 echo ""
 PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../../../" && pwd)"
 source "$PROJECT_ROOT/venv/bin/activate"
 # Cleanup
 crwl server stop 2>/dev/null || true
 sleep 2
 # Start server with 3 replicas for better load distribution
 echo "Starting server with 3 replicas..."
 crwl server start --replicas 3 >/dev/null 2>&1
 sleep 12
 # Get baseline CPU
 echo "Checking baseline container stats..."
 docker stats --no-stream --format "table {{.Name}}\t{{.CPUPerc}}\t{{.MemUsage}}" \
  --filter "name=crawl4ai" 2>/dev/null || echo "Unable to get container stats"
 # Send concurrent requests
 echo ""
 echo "Sending 10 concurrent requests..."
 for i in {1..10}; do
    curl -s -X POST http://localhost:11235/crawl \
      -H "Content-Type: application/json" \
      -d "{\"urls\": [\"https://httpbin.org/html?req=$i\"], \"crawler_config\": {}}" > /dev/null &
 done
 # Wait for all requests to complete
 echo "Waiting for requests to complete..."
 wait
 # Check stats after load
 echo ""
 echo "Container stats after load:"
 docker stats --no-stream --format "table {{.Name}}\t{{.CPUPerc}}\t{{.MemUsage}}" \
  --filter "name=crawl4ai" 2>/dev/null || echo "Unable to get container stats"
 # Verify health
 echo ""
 HEALTH=$(curl -s http://localhost:11235/health | jq -r '.status' 2>/dev/null || echo "error")
 if [[ "$HEALTH" != "ok" ]]; then
    echo "❌ Health check failed after CPU stress"
    crwl server stop
    exit 1
 fi
 # Cleanup
 echo ""
 echo "Cleaning up..."
 crwl server stop >/dev/null 2>&1
 echo ""
 echo "✅ Test passed: Server handled concurrent load successfully"
--- a/deploy/docker/tests/cli/resource/test_03_max_replicas.sh
+++ b/deploy/docker/tests/cli/resource/test_03_max_replicas.sh
@@ -1,72 +0,0 @@
 #!/bin/bash
 # Test: Start with maximum replicas and stress test
 # Expected: Server handles max replicas (10) and distributes load
 set -e
 echo "=== Test: Maximum Replicas Stress Test ==="
 echo ""
 PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../../../" && pwd)"
 source "$PROJECT_ROOT/venv/bin/activate"
 # Cleanup
 crwl server stop 2>/dev/null || true
 sleep 2
 # Start with 10 replicas (max recommended)
 echo "Starting server with 10 replicas..."
 echo "This may take some time..."
 crwl server start --replicas 10 >/dev/null 2>&1
 sleep 20
 # Verify status
 echo "Checking status..."
 STATUS=$(crwl server status)
 if ! echo "$STATUS" | grep -q "10"; then
    echo "❌ Failed to start 10 replicas"
    crwl server stop
    exit 1
 fi
 # Wait for container discovery
 echo ""
 echo "Waiting for container discovery..."
 sleep 10
 # Check containers
 CONTAINER_COUNT=$(curl -s http://localhost:11235/monitor/containers | jq -r '.count' 2>/dev/null || echo "0")
 echo "Discovered containers: $CONTAINER_COUNT"
 # Send burst of requests
 echo ""
 echo "Sending burst of 20 requests..."
 for i in {1..20}; do
    curl -s -X POST http://localhost:11235/crawl \
      -H "Content-Type: application/json" \
      -d "{\"urls\": [\"https://httpbin.org/html?req=$i\"], \"crawler_config\": {}}" > /dev/null &
 done
 wait
 # Check health after stress
 echo ""
 HEALTH=$(curl -s http://localhost:11235/health | jq -r '.status' 2>/dev/null || echo "error")
 if [[ "$HEALTH" != "ok" ]]; then
    echo "❌ Health check failed after max replica stress"
    crwl server stop
    exit 1
 fi
 # Check endpoint stats
 echo ""
 echo "Endpoint statistics:"
 curl -s http://localhost:11235/monitor/endpoints/stats | jq '.' 2>/dev/null || echo "No stats available"
 # Cleanup
 echo ""
 echo "Cleaning up..."
 crwl server stop >/dev/null 2>&1
 echo ""
 echo "✅ Test passed: Successfully stress tested with 10 replicas"
--- a/deploy/docker/tests/cli/resource/test_04_cleanup_verification.sh
+++ b/deploy/docker/tests/cli/resource/test_04_cleanup_verification.sh
@@ -1,63 +0,0 @@
 #!/bin/bash
 # Test: Verify complete resource cleanup
 # Expected: All Docker resources are properly removed
 set -e
 echo "=== Test: Resource Cleanup Verification ==="
 echo ""
 PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../../../" && pwd)"
 source "$PROJECT_ROOT/venv/bin/activate"
 # Start server to create resources
 echo "Starting server with 3 replicas..."
 crwl server start --replicas 3 >/dev/null 2>&1
 sleep 10
 # List resources before cleanup
 echo ""
 echo "Resources before cleanup:"
 echo "Containers:"
 docker ps --filter "name=crawl4ai" --format "  - {{.Names}}" 2>/dev/null || echo "  None"
 docker ps --filter "name=nginx" --format "  - {{.Names}}" 2>/dev/null || echo "  None"
 docker ps --filter "name=redis" --format "  - {{.Names}}" 2>/dev/null || echo "  None"
 echo ""
 echo "Networks:"
 docker network ls --filter "name=crawl4ai" --format "  - {{.Name}}" 2>/dev/null || echo "  None"
 # Cleanup
 echo ""
 echo "Performing cleanup..."
 crwl server cleanup --force >/dev/null 2>&1
 sleep 5
 # Verify cleanup
 echo ""
 echo "Verifying cleanup..."
 CONTAINERS=$(docker ps -a --filter "name=crawl4ai" --format "{{.Names}}" 2>/dev/null || echo "")
 if [[ -n "$CONTAINERS" ]]; then
    echo "❌ Found remaining crawl4ai containers: $CONTAINERS"
    exit 1
 fi
 NGINX=$(docker ps -a --filter "name=nginx" --format "{{.Names}}" 2>/dev/null || echo "")
 if [[ -n "$NGINX" ]]; then
    echo "⚠️  Warning: Nginx container still exists: $NGINX"
 fi
 REDIS=$(docker ps -a --filter "name=redis" --format "{{.Names}}" 2>/dev/null || echo "")
 if [[ -n "$REDIS" ]]; then
    echo "⚠️  Warning: Redis container still exists: $REDIS"
 fi
 # Verify port is free
 if curl -s http://localhost:11235/health > /dev/null 2>&1; then
    echo "❌ Port 11235 still in use after cleanup"
    exit 1
 fi
 echo ""
 echo "✅ Test passed: All Crawl4AI resources properly cleaned up"
--- a/deploy/docker/tests/cli/resource/test_05_long_running.sh
+++ b/deploy/docker/tests/cli/resource/test_05_long_running.sh
@@ -1,99 +0,0 @@
 #!/bin/bash
 # Test: Long-running stability test (5 minutes)
 # Expected: Server remains stable over extended period
 set -e
 echo "=== Test: Long-Running Stability (5 minutes) ==="
 echo ""
 PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../../../" && pwd)"
 source "$PROJECT_ROOT/venv/bin/activate"
 # Cleanup
 crwl server stop 2>/dev/null || true
 sleep 2
 # Start server
 echo "Starting server with 2 replicas..."
 crwl server start --replicas 2 >/dev/null 2>&1
 sleep 10
 # Get start time
 START_TIME=$(date +%s)
 DURATION=300  # 5 minutes in seconds
 REQUEST_COUNT=0
 ERROR_COUNT=0
 echo ""
 echo "Running stability test for 5 minutes..."
 echo "Making periodic requests every 10 seconds..."
 echo ""
 while true; do
    CURRENT_TIME=$(date +%s)
    ELAPSED=$((CURRENT_TIME - START_TIME))
    if [[ $ELAPSED -ge $DURATION ]]; then
        break
    fi
    REMAINING=$((DURATION - ELAPSED))
    echo "[$ELAPSED/$DURATION seconds] Remaining: ${REMAINING}s, Requests: $REQUEST_COUNT, Errors: $ERROR_COUNT"
    # Make a request
    if curl -s -X POST http://localhost:11235/crawl \
        -H "Content-Type: application/json" \
        -d '{"urls": ["https://httpbin.org/html"], "crawler_config": {}}' > /dev/null 2>&1; then
        REQUEST_COUNT=$((REQUEST_COUNT + 1))
    else
        ERROR_COUNT=$((ERROR_COUNT + 1))
        echo "  ⚠️  Request failed"
    fi
    # Check health every 30 seconds
    if [[ $((ELAPSED % 30)) -eq 0 ]]; then
        HEALTH=$(curl -s http://localhost:11235/health | jq -r '.status' 2>/dev/null || echo "error")
        if [[ "$HEALTH" != "ok" ]]; then
            echo "  ❌ Health check failed!"
            ERROR_COUNT=$((ERROR_COUNT + 1))
        fi
        # Get memory stats
        MEM=$(curl -s http://localhost:11235/monitor/health | jq -r '.container.memory_percent' 2>/dev/null || echo "N/A")
        echo "  Memory: ${MEM}%"
    fi
    sleep 10
 done
 echo ""
 echo "Test duration completed!"
 echo "Total requests: $REQUEST_COUNT"
 echo "Total errors: $ERROR_COUNT"
 # Get final stats
 echo ""
 echo "Final statistics:"
 curl -s http://localhost:11235/monitor/endpoints/stats | jq '.' 2>/dev/null || echo "No stats available"
 # Verify error rate is acceptable (<10%)
 ERROR_RATE=$(echo "scale=2; $ERROR_COUNT * 100 / $REQUEST_COUNT" | bc -l 2>/dev/null || echo "0")
 echo ""
 echo "Error rate: ${ERROR_RATE}%"
 # Cleanup
 echo ""
 echo "Cleaning up..."
 crwl server stop >/dev/null 2>&1
 # Check error rate
 ERROR_OK=$(echo "$ERROR_RATE < 10" | bc -l 2>/dev/null || echo "1")
 if [[ "$ERROR_OK" != "1" ]]; then
    echo "❌ Error rate too high: ${ERROR_RATE}%"
    exit 1
 fi
 echo ""
 echo "✅ Test passed: Server remained stable over 5 minutes"
 echo "   Requests: $REQUEST_COUNT, Errors: $ERROR_COUNT, Error rate: ${ERROR_RATE}%"
--- a/deploy/docker/tests/cli/run_tests.sh
+++ b/deploy/docker/tests/cli/run_tests.sh
@@ -1,200 +0,0 @@
 #!/bin/bash
 # Master Test Runner for Crawl4AI CLI E2E Tests
 # Usage: ./run_tests.sh [category] [test_number]
 #   category: basic|advanced|resource|dashboard|edge|all (default: all)
 #   test_number: specific test number to run (optional)
 set -e
 # Color codes for output
 RED='\033[0;31m'
 GREEN='\033[0;32m'
 YELLOW='\033[1;33m'
 BLUE='\033[0;34m'
 NC='\033[0m' # No Color
 # Test counters
 TOTAL_TESTS=0
 PASSED_TESTS=0
 FAILED_TESTS=0
 SKIPPED_TESTS=0
 # Get script directory
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 # Print header
 print_header() {
    echo ""
    echo "=========================================="
    echo "$1"
    echo "=========================================="
    echo ""
 }
 # Print test result
 print_result() {
    local test_name=$1
    local result=$2
    if [[ "$result" == "PASS" ]]; then
        echo -e "${GREEN}✅ PASS${NC}: $test_name"
        PASSED_TESTS=$((PASSED_TESTS + 1))
    elif [[ "$result" == "FAIL" ]]; then
        echo -e "${RED}❌ FAIL${NC}: $test_name"
        FAILED_TESTS=$((FAILED_TESTS + 1))
    elif [[ "$result" == "SKIP" ]]; then
        echo -e "${YELLOW}⏭️  SKIP${NC}: $test_name"
        SKIPPED_TESTS=$((SKIPPED_TESTS + 1))
    fi
 }
 # Run a single test
 run_test() {
    local test_path=$1
    local test_name=$(basename "$test_path")
    echo ""
    echo -e "${BLUE}Running:${NC} $test_name"
    echo "----------------------------------------"
    TOTAL_TESTS=$((TOTAL_TESTS + 1))
    if bash "$test_path"; then
        print_result "$test_name" "PASS"
        return 0
    else
        print_result "$test_name" "FAIL"
        return 1
    fi
 }
 # Run Python test
 run_python_test() {
    local test_path=$1
    local test_name=$(basename "$test_path")
    echo ""
    echo -e "${BLUE}Running:${NC} $test_name"
    echo "----------------------------------------"
    TOTAL_TESTS=$((TOTAL_TESTS + 1))
    if python "$test_path"; then
        print_result "$test_name" "PASS"
        return 0
    else
        print_result "$test_name" "FAIL"
        return 1
    fi
 }
 # Run tests in a category
 run_category() {
    local category=$1
    local test_number=$2
    local category_dir="$SCRIPT_DIR/$category"
    if [[ ! -d "$category_dir" ]]; then
        echo -e "${RED}Error:${NC} Category '$category' not found"
        return 1
    fi
    print_header "Running $category tests"
    if [[ -n "$test_number" ]]; then
        # Run specific test
        local test_file=$(find "$category_dir" -name "*${test_number}*.sh" | head -n 1)
        if [[ -z "$test_file" ]]; then
            echo -e "${RED}Error:${NC} Test $test_number not found in $category"
            return 1
        fi
        run_test "$test_file"
    else
        # Run all tests in category
        if [[ "$category" == "dashboard" ]]; then
            # Dashboard tests are Python
            for test_file in "$category_dir"/*.py; do
                [[ -f "$test_file" ]] || continue
                run_python_test "$test_file" || true
            done
        else
            # Shell script tests
            for test_file in "$category_dir"/*.sh; do
                [[ -f "$test_file" ]] || continue
                run_test "$test_file" || true
            done
        fi
    fi
 }
 # Print summary
 print_summary() {
    echo ""
    echo "=========================================="
    echo "Test Summary"
    echo "=========================================="
    echo -e "Total:   $TOTAL_TESTS"
    echo -e "${GREEN}Passed:  $PASSED_TESTS${NC}"
    echo -e "${RED}Failed:  $FAILED_TESTS${NC}"
    echo -e "${YELLOW}Skipped: $SKIPPED_TESTS${NC}"
    echo ""
    if [[ $FAILED_TESTS -eq 0 ]]; then
        echo -e "${GREEN}✅ All tests passed!${NC}"
        return 0
    else
        echo -e "${RED}❌ Some tests failed${NC}"
        return 1
    fi
 }
 # Main execution
 main() {
    local category=${1:-all}
    local test_number=$2
    # Activate virtual environment
    if [[ -f "venv/bin/activate" ]]; then
        source venv/bin/activate
    else
        echo -e "${YELLOW}Warning:${NC} venv not found, some tests may fail"
    fi
    print_header "Crawl4AI CLI E2E Test Suite"
    if [[ "$category" == "all" ]]; then
        # Run all categories
        for cat in basic advanced resource edge; do
            run_category "$cat" || true
        done
        # Dashboard tests separately (can be slow)
        echo ""
        echo -e "${YELLOW}Note:${NC} Dashboard tests can be run separately with: ./run_tests.sh dashboard"
    else
        run_category "$category" "$test_number"
    fi
    print_summary
 }
 # Show usage
 if [[ "$1" == "-h" || "$1" == "--help" ]]; then
    echo "Usage: $0 [category] [test_number]"
    echo ""
    echo "Categories:"
    echo "  basic      - Basic CLI operations (8 tests)"
    echo "  advanced   - Advanced features (8 tests)"
    echo "  resource   - Resource monitoring and stress tests (5 tests)"
    echo "  dashboard  - Dashboard UI tests with Playwright (1 test)"
    echo "  edge       - Edge cases and error handling (10 tests)"
    echo "  all        - Run all tests except dashboard (default)"
    echo ""
    echo "Examples:"
    echo "  $0                    # Run all tests"
    echo "  $0 basic              # Run all basic tests"
    echo "  $0 basic 01           # Run test_01 from basic"
    echo "  $0 dashboard          # Run dashboard UI test"
    exit 0
 fi
 main "$@"
--- a/deploy/docker/tests/codebase_test/test_1_basic.py
+++ b/deploy/docker/tests/codebase_test/test_1_basic.py
@@ -1,138 +0,0 @@
 #!/usr/bin/env python3
 """
 Test 1: Basic Container Health + Single Endpoint
 - Starts container
 - Hits /health endpoint 10 times
 - Reports success rate and basic latency
 """
 import asyncio
 import time
 import docker
 import httpx
 # Config
 IMAGE = "crawl4ai-local:latest"
 CONTAINER_NAME = "crawl4ai-test"
 PORT = 11235
 REQUESTS = 10
 async def test_endpoint(url: str, count: int):
    """Hit endpoint multiple times, return stats."""
    results = []
    async with httpx.AsyncClient(timeout=30.0) as client:
        for i in range(count):
            start = time.time()
            try:
                resp = await client.get(url)
                elapsed = (time.time() - start) * 1000  # ms
                results.append({
                    "success": resp.status_code == 200,
                    "latency_ms": elapsed,
                    "status": resp.status_code
                })
                print(f"  [{i+1}/{count}] ✓ {resp.status_code} - {elapsed:.0f}ms")
            except Exception as e:
                results.append({
                    "success": False,
                    "latency_ms": None,
                    "error": str(e)
                })
                print(f"  [{i+1}/{count}] ✗ Error: {e}")
    return results
 def start_container(client, image: str, name: str, port: int):
    """Start container, return container object."""
    # Clean up existing
    try:
        old = client.containers.get(name)
        print(f"🧹 Stopping existing container '{name}'...")
        old.stop()
        old.remove()
    except docker.errors.NotFound:
        pass
    print(f"🚀 Starting container '{name}' from image '{image}'...")
    container = client.containers.run(
        image,
        name=name,
        ports={f"{port}/tcp": port},
        detach=True,
        shm_size="1g",
        environment={"PYTHON_ENV": "production"}
    )
    # Wait for health
    print(f"⏳ Waiting for container to be healthy...")
    for _ in range(30):  # 30s timeout
        time.sleep(1)
        container.reload()
        if container.status == "running":
            try:
                # Quick health check
                import requests
                resp = requests.get(f"http://localhost:{port}/health", timeout=2)
                if resp.status_code == 200:
                    print(f"✅ Container healthy!")
                    return container
            except:
                pass
    raise TimeoutError("Container failed to start")
 def stop_container(container):
    """Stop and remove container."""
    print(f"🛑 Stopping container...")
    container.stop()
    container.remove()
    print(f"✅ Container removed")
 async def main():
    print("="*60)
    print("TEST 1: Basic Container Health + Single Endpoint")
    print("="*60)
    client = docker.from_env()
    container = None
    try:
        # Start container
        container = start_container(client, IMAGE, CONTAINER_NAME, PORT)
        # Test /health endpoint
        print(f"\n📊 Testing /health endpoint ({REQUESTS} requests)...")
        url = f"http://localhost:{PORT}/health"
        results = await test_endpoint(url, REQUESTS)
        # Calculate stats
        successes = sum(1 for r in results if r["success"])
        success_rate = (successes / len(results)) * 100
        latencies = [r["latency_ms"] for r in results if r["latency_ms"] is not None]
        avg_latency = sum(latencies) / len(latencies) if latencies else 0
        # Print results
        print(f"\n{'='*60}")
        print(f"RESULTS:")
        print(f"  Success Rate: {success_rate:.1f}% ({successes}/{len(results)})")
        print(f"  Avg Latency:  {avg_latency:.0f}ms")
        if latencies:
            print(f"  Min Latency:  {min(latencies):.0f}ms")
            print(f"  Max Latency:  {max(latencies):.0f}ms")
        print(f"{'='*60}")
        # Pass/Fail
        if success_rate >= 100:
            print(f"✅ TEST PASSED")
            return 0
        else:
            print(f"❌ TEST FAILED (expected 100% success rate)")
            return 1
    except Exception as e:
        print(f"\n❌ TEST ERROR: {e}")
        return 1
    finally:
        if container:
            stop_container(container)
 if __name__ == "__main__":
    exit_code = asyncio.run(main())
    exit(exit_code)
--- a/deploy/docker/tests/codebase_test/test_2_memory.py
+++ b/deploy/docker/tests/codebase_test/test_2_memory.py
@@ -1,205 +0,0 @@
 #!/usr/bin/env python3
 """
 Test 2: Docker Stats Monitoring
 - Extends Test 1 with real-time container stats
 - Monitors memory % and CPU during requests
 - Reports baseline, peak, and final memory
 """
 import asyncio
 import time
 import docker
 import httpx
 from threading import Thread, Event
 # Config
 IMAGE = "crawl4ai-local:latest"
 CONTAINER_NAME = "crawl4ai-test"
 PORT = 11235
 REQUESTS = 20  # More requests to see memory usage
 # Stats tracking
 stats_history = []
 stop_monitoring = Event()
 def monitor_stats(container):
    """Background thread to collect container stats."""
    for stat in container.stats(decode=True, stream=True):
        if stop_monitoring.is_set():
            break
        try:
            # Extract memory stats
            mem_usage = stat['memory_stats'].get('usage', 0) / (1024 * 1024)  # MB
            mem_limit = stat['memory_stats'].get('limit', 1) / (1024 * 1024)
            mem_percent = (mem_usage / mem_limit * 100) if mem_limit > 0 else 0
            # Extract CPU stats (handle missing fields on Mac)
            cpu_percent = 0
            try:
                cpu_delta = stat['cpu_stats']['cpu_usage']['total_usage'] - \
                           stat['precpu_stats']['cpu_usage']['total_usage']
                system_delta = stat['cpu_stats'].get('system_cpu_usage', 0) - \
                              stat['precpu_stats'].get('system_cpu_usage', 0)
                if system_delta > 0:
                    num_cpus = stat['cpu_stats'].get('online_cpus', 1)
                    cpu_percent = (cpu_delta / system_delta * num_cpus * 100.0)
            except (KeyError, ZeroDivisionError):
                pass
            stats_history.append({
                'timestamp': time.time(),
                'memory_mb': mem_usage,
                'memory_percent': mem_percent,
                'cpu_percent': cpu_percent
            })
        except Exception as e:
            # Skip malformed stats
            pass
        time.sleep(0.5)  # Sample every 500ms
 async def test_endpoint(url: str, count: int):
    """Hit endpoint, return stats."""
    results = []
    async with httpx.AsyncClient(timeout=30.0) as client:
        for i in range(count):
            start = time.time()
            try:
                resp = await client.get(url)
                elapsed = (time.time() - start) * 1000
                results.append({
                    "success": resp.status_code == 200,
                    "latency_ms": elapsed,
                })
                if (i + 1) % 5 == 0:  # Print every 5 requests
                    print(f"  [{i+1}/{count}] ✓ {resp.status_code} - {elapsed:.0f}ms")
            except Exception as e:
                results.append({"success": False, "error": str(e)})
                print(f"  [{i+1}/{count}] ✗ Error: {e}")
    return results
 def start_container(client, image: str, name: str, port: int):
    """Start container."""
    try:
        old = client.containers.get(name)
        print(f"🧹 Stopping existing container '{name}'...")
        old.stop()
        old.remove()
    except docker.errors.NotFound:
        pass
    print(f"🚀 Starting container '{name}'...")
    container = client.containers.run(
        image,
        name=name,
        ports={f"{port}/tcp": port},
        detach=True,
        shm_size="1g",
        mem_limit="4g",  # Set explicit memory limit
    )
    print(f"⏳ Waiting for health...")
    for _ in range(30):
        time.sleep(1)
        container.reload()
        if container.status == "running":
            try:
                import requests
                resp = requests.get(f"http://localhost:{port}/health", timeout=2)
                if resp.status_code == 200:
                    print(f"✅ Container healthy!")
                    return container
            except:
                pass
    raise TimeoutError("Container failed to start")
 def stop_container(container):
    """Stop container."""
    print(f"🛑 Stopping container...")
    container.stop()
    container.remove()
 async def main():
    print("="*60)
    print("TEST 2: Docker Stats Monitoring")
    print("="*60)
    client = docker.from_env()
    container = None
    monitor_thread = None
    try:
        # Start container
        container = start_container(client, IMAGE, CONTAINER_NAME, PORT)
        # Start stats monitoring in background
        print(f"\n📊 Starting stats monitor...")
        stop_monitoring.clear()
        stats_history.clear()
        monitor_thread = Thread(target=monitor_stats, args=(container,), daemon=True)
        monitor_thread.start()
        # Wait a bit for baseline
        await asyncio.sleep(2)
        baseline_mem = stats_history[-1]['memory_mb'] if stats_history else 0
        print(f"📏 Baseline memory: {baseline_mem:.1f} MB")
        # Test /health endpoint
        print(f"\n🔄 Running {REQUESTS} requests to /health...")
        url = f"http://localhost:{PORT}/health"
        results = await test_endpoint(url, REQUESTS)
        # Wait a bit to capture peak
        await asyncio.sleep(1)
        # Stop monitoring
        stop_monitoring.set()
        if monitor_thread:
            monitor_thread.join(timeout=2)
        # Calculate stats
        successes = sum(1 for r in results if r.get("success"))
        success_rate = (successes / len(results)) * 100
        latencies = [r["latency_ms"] for r in results if "latency_ms" in r]
        avg_latency = sum(latencies) / len(latencies) if latencies else 0
        # Memory stats
        memory_samples = [s['memory_mb'] for s in stats_history]
        peak_mem = max(memory_samples) if memory_samples else 0
        final_mem = memory_samples[-1] if memory_samples else 0
        mem_delta = final_mem - baseline_mem
        # Print results
        print(f"\n{'='*60}")
        print(f"RESULTS:")
        print(f"  Success Rate: {success_rate:.1f}% ({successes}/{len(results)})")
        print(f"  Avg Latency:  {avg_latency:.0f}ms")
        print(f"\n  Memory Stats:")
        print(f"    Baseline: {baseline_mem:.1f} MB")
        print(f"    Peak:     {peak_mem:.1f} MB")
        print(f"    Final:    {final_mem:.1f} MB")
        print(f"    Delta:    {mem_delta:+.1f} MB")
        print(f"{'='*60}")
        # Pass/Fail
        if success_rate >= 100 and mem_delta < 100:  # No significant memory growth
            print(f"✅ TEST PASSED")
            return 0
        else:
            if success_rate < 100:
                print(f"❌ TEST FAILED (success rate < 100%)")
            if mem_delta >= 100:
                print(f"⚠️  WARNING: Memory grew by {mem_delta:.1f} MB")
            return 1
    except Exception as e:
        print(f"\n❌ TEST ERROR: {e}")
        return 1
    finally:
        stop_monitoring.set()
        if container:
            stop_container(container)
 if __name__ == "__main__":
    exit_code = asyncio.run(main())
    exit(exit_code)
--- a/deploy/docker/tests/codebase_test/test_3_pool.py
+++ b/deploy/docker/tests/codebase_test/test_3_pool.py
@@ -1,229 +0,0 @@
 #!/usr/bin/env python3
 """
 Test 3: Pool Validation - Permanent Browser Reuse
 - Tests /html endpoint (should use permanent browser)
 - Monitors container logs for pool hit markers
 - Validates browser reuse rate
 - Checks memory after browser creation
 """
 import asyncio
 import time
 import docker
 import httpx
 from threading import Thread, Event
 # Config
 IMAGE = "crawl4ai-local:latest"
 CONTAINER_NAME = "crawl4ai-test"
 PORT = 11235
 REQUESTS = 30
 # Stats tracking
 stats_history = []
 stop_monitoring = Event()
 def monitor_stats(container):
    """Background stats collector."""
    for stat in container.stats(decode=True, stream=True):
        if stop_monitoring.is_set():
            break
        try:
            mem_usage = stat['memory_stats'].get('usage', 0) / (1024 * 1024)
            stats_history.append({
                'timestamp': time.time(),
                'memory_mb': mem_usage,
            })
        except:
            pass
        time.sleep(0.5)
 def count_log_markers(container):
    """Extract pool usage markers from logs."""
    logs = container.logs().decode('utf-8')
    permanent_hits = logs.count("🔥 Using permanent browser")
    hot_hits = logs.count("♨️  Using hot pool browser")
    cold_hits = logs.count("❄️  Using cold pool browser")
    new_created = logs.count("🆕 Creating new browser")
    return {
        'permanent_hits': permanent_hits,
        'hot_hits': hot_hits,
        'cold_hits': cold_hits,
        'new_created': new_created,
        'total_hits': permanent_hits + hot_hits + cold_hits
    }
 async def test_endpoint(url: str, count: int):
    """Hit endpoint multiple times."""
    results = []
    async with httpx.AsyncClient(timeout=60.0) as client:
        for i in range(count):
            start = time.time()
            try:
                resp = await client.post(url, json={"url": "https://httpbin.org/html"})
                elapsed = (time.time() - start) * 1000
                results.append({
                    "success": resp.status_code == 200,
                    "latency_ms": elapsed,
                })
                if (i + 1) % 10 == 0:
                    print(f"  [{i+1}/{count}] ✓ {resp.status_code} - {elapsed:.0f}ms")
            except Exception as e:
                results.append({"success": False, "error": str(e)})
                print(f"  [{i+1}/{count}] ✗ Error: {e}")
    return results
 def start_container(client, image: str, name: str, port: int):
    """Start container."""
    try:
        old = client.containers.get(name)
        print(f"🧹 Stopping existing container...")
        old.stop()
        old.remove()
    except docker.errors.NotFound:
        pass
    print(f"🚀 Starting container...")
    container = client.containers.run(
        image,
        name=name,
        ports={f"{port}/tcp": port},
        detach=True,
        shm_size="1g",
        mem_limit="4g",
    )
    print(f"⏳ Waiting for health...")
    for _ in range(30):
        time.sleep(1)
        container.reload()
        if container.status == "running":
            try:
                import requests
                resp = requests.get(f"http://localhost:{port}/health", timeout=2)
                if resp.status_code == 200:
                    print(f"✅ Container healthy!")
                    return container
            except:
                pass
    raise TimeoutError("Container failed to start")
 def stop_container(container):
    """Stop container."""
    print(f"🛑 Stopping container...")
    container.stop()
    container.remove()
 async def main():
    print("="*60)
    print("TEST 3: Pool Validation - Permanent Browser Reuse")
    print("="*60)
    client = docker.from_env()
    container = None
    monitor_thread = None
    try:
        # Start container
        container = start_container(client, IMAGE, CONTAINER_NAME, PORT)
        # Wait for permanent browser initialization
        print(f"\n⏳ Waiting for permanent browser init (3s)...")
        await asyncio.sleep(3)
        # Start stats monitoring
        print(f"📊 Starting stats monitor...")
        stop_monitoring.clear()
        stats_history.clear()
        monitor_thread = Thread(target=monitor_stats, args=(container,), daemon=True)
        monitor_thread.start()
        await asyncio.sleep(1)
        baseline_mem = stats_history[-1]['memory_mb'] if stats_history else 0
        print(f"📏 Baseline (with permanent browser): {baseline_mem:.1f} MB")
        # Test /html endpoint (uses permanent browser for default config)
        print(f"\n🔄 Running {REQUESTS} requests to /html...")
        url = f"http://localhost:{PORT}/html"
        results = await test_endpoint(url, REQUESTS)
        # Wait a bit
        await asyncio.sleep(1)
        # Stop monitoring
        stop_monitoring.set()
        if monitor_thread:
            monitor_thread.join(timeout=2)
        # Analyze logs for pool markers
        print(f"\n📋 Analyzing pool usage...")
        pool_stats = count_log_markers(container)
        # Calculate request stats
        successes = sum(1 for r in results if r.get("success"))
        success_rate = (successes / len(results)) * 100
        latencies = [r["latency_ms"] for r in results if "latency_ms" in r]
        avg_latency = sum(latencies) / len(latencies) if latencies else 0
        # Memory stats
        memory_samples = [s['memory_mb'] for s in stats_history]
        peak_mem = max(memory_samples) if memory_samples else 0
        final_mem = memory_samples[-1] if memory_samples else 0
        mem_delta = final_mem - baseline_mem
        # Calculate reuse rate
        total_requests = len(results)
        total_pool_hits = pool_stats['total_hits']
        reuse_rate = (total_pool_hits / total_requests * 100) if total_requests > 0 else 0
        # Print results
        print(f"\n{'='*60}")
        print(f"RESULTS:")
        print(f"  Success Rate: {success_rate:.1f}% ({successes}/{len(results)})")
        print(f"  Avg Latency:  {avg_latency:.0f}ms")
        print(f"\n  Pool Stats:")
        print(f"    🔥 Permanent Hits: {pool_stats['permanent_hits']}")
        print(f"    ♨️  Hot Pool Hits:   {pool_stats['hot_hits']}")
        print(f"    ❄️  Cold Pool Hits:  {pool_stats['cold_hits']}")
        print(f"    🆕 New Created:    {pool_stats['new_created']}")
        print(f"    📊 Reuse Rate:     {reuse_rate:.1f}%")
        print(f"\n  Memory Stats:")
        print(f"    Baseline: {baseline_mem:.1f} MB")
        print(f"    Peak:     {peak_mem:.1f} MB")
        print(f"    Final:    {final_mem:.1f} MB")
        print(f"    Delta:    {mem_delta:+.1f} MB")
        print(f"{'='*60}")
        # Pass/Fail
        passed = True
        if success_rate < 100:
            print(f"❌ FAIL: Success rate {success_rate:.1f}% < 100%")
            passed = False
        if reuse_rate < 80:
            print(f"❌ FAIL: Reuse rate {reuse_rate:.1f}% < 80% (expected high permanent browser usage)")
            passed = False
        if pool_stats['permanent_hits'] < (total_requests * 0.8):
            print(f"⚠️  WARNING: Only {pool_stats['permanent_hits']} permanent hits out of {total_requests} requests")
        if mem_delta > 200:
            print(f"⚠️  WARNING: Memory grew by {mem_delta:.1f} MB (possible browser leak)")
        if passed:
            print(f"✅ TEST PASSED")
            return 0
        else:
            return 1
    except Exception as e:
        print(f"\n❌ TEST ERROR: {e}")
        import traceback
        traceback.print_exc()
        return 1
    finally:
        stop_monitoring.set()
        if container:
            stop_container(container)
 if __name__ == "__main__":
    exit_code = asyncio.run(main())
    exit(exit_code)
--- a/deploy/docker/tests/codebase_test/test_4_concurrent.py
+++ b/deploy/docker/tests/codebase_test/test_4_concurrent.py
@@ -1,236 +0,0 @@
 #!/usr/bin/env python3
 """
 Test 4: Concurrent Load Testing
 - Tests pool under concurrent load
 - Escalates: 10 → 50 → 100 concurrent requests
 - Validates latency distribution (P50, P95, P99)
 - Monitors memory stability
 """
 import asyncio
 import time
 import docker
 import httpx
 from threading import Thread, Event
 from collections import defaultdict
 # Config
 IMAGE = "crawl4ai-local:latest"
 CONTAINER_NAME = "crawl4ai-test"
 PORT = 11235
 LOAD_LEVELS = [
    {"name": "Light", "concurrent": 10, "requests": 20},
    {"name": "Medium", "concurrent": 50, "requests": 100},
    {"name": "Heavy", "concurrent": 100, "requests": 200},
 ]
 # Stats
 stats_history = []
 stop_monitoring = Event()
 def monitor_stats(container):
    """Background stats collector."""
    for stat in container.stats(decode=True, stream=True):
        if stop_monitoring.is_set():
            break
        try:
            mem_usage = stat['memory_stats'].get('usage', 0) / (1024 * 1024)
            stats_history.append({'timestamp': time.time(), 'memory_mb': mem_usage})
        except:
            pass
        time.sleep(0.5)
 def count_log_markers(container):
    """Extract pool markers."""
    logs = container.logs().decode('utf-8')
    return {
        'permanent': logs.count("🔥 Using permanent browser"),
        'hot': logs.count("♨️  Using hot pool browser"),
        'cold': logs.count("❄️  Using cold pool browser"),
        'new': logs.count("🆕 Creating new browser"),
    }
 async def hit_endpoint(client, url, payload, semaphore):
    """Single request with concurrency control."""
    async with semaphore:
        start = time.time()
        try:
            resp = await client.post(url, json=payload, timeout=60.0)
            elapsed = (time.time() - start) * 1000
            return {"success": resp.status_code == 200, "latency_ms": elapsed}
        except Exception as e:
            return {"success": False, "error": str(e)}
 async def run_concurrent_test(url, payload, concurrent, total_requests):
    """Run concurrent requests."""
    semaphore = asyncio.Semaphore(concurrent)
    async with httpx.AsyncClient() as client:
        tasks = [hit_endpoint(client, url, payload, semaphore) for _ in range(total_requests)]
        results = await asyncio.gather(*tasks)
    return results
 def calculate_percentiles(latencies):
    """Calculate P50, P95, P99."""
    if not latencies:
        return 0, 0, 0
    sorted_lat = sorted(latencies)
    n = len(sorted_lat)
    return (
        sorted_lat[int(n * 0.50)],
        sorted_lat[int(n * 0.95)],
        sorted_lat[int(n * 0.99)],
    )
 def start_container(client, image, name, port):
    """Start container."""
    try:
        old = client.containers.get(name)
        print(f"🧹 Stopping existing container...")
        old.stop()
        old.remove()
    except docker.errors.NotFound:
        pass
    print(f"🚀 Starting container...")
    container = client.containers.run(
        image, name=name, ports={f"{port}/tcp": port},
        detach=True, shm_size="1g", mem_limit="4g",
    )
    print(f"⏳ Waiting for health...")
    for _ in range(30):
        time.sleep(1)
        container.reload()
        if container.status == "running":
            try:
                import requests
                if requests.get(f"http://localhost:{port}/health", timeout=2).status_code == 200:
                    print(f"✅ Container healthy!")
                    return container
            except:
                pass
    raise TimeoutError("Container failed to start")
 async def main():
    print("="*60)
    print("TEST 4: Concurrent Load Testing")
    print("="*60)
    client = docker.from_env()
    container = None
    monitor_thread = None
    try:
        container = start_container(client, IMAGE, CONTAINER_NAME, PORT)
        print(f"\n⏳ Waiting for permanent browser init (3s)...")
        await asyncio.sleep(3)
        # Start monitoring
        stop_monitoring.clear()
        stats_history.clear()
        monitor_thread = Thread(target=monitor_stats, args=(container,), daemon=True)
        monitor_thread.start()
        await asyncio.sleep(1)
        baseline_mem = stats_history[-1]['memory_mb'] if stats_history else 0
        print(f"📏 Baseline: {baseline_mem:.1f} MB\n")
        url = f"http://localhost:{PORT}/html"
        payload = {"url": "https://httpbin.org/html"}
        all_results = []
        level_stats = []
        # Run load levels
        for level in LOAD_LEVELS:
            print(f"{'='*60}")
            print(f"🔄 {level['name']} Load: {level['concurrent']} concurrent, {level['requests']} total")
            print(f"{'='*60}")
            start_time = time.time()
            results = await run_concurrent_test(url, payload, level['concurrent'], level['requests'])
            duration = time.time() - start_time
            successes = sum(1 for r in results if r.get("success"))
            success_rate = (successes / len(results)) * 100
            latencies = [r["latency_ms"] for r in results if "latency_ms" in r]
            p50, p95, p99 = calculate_percentiles(latencies)
            avg_lat = sum(latencies) / len(latencies) if latencies else 0
            print(f"  Duration:     {duration:.1f}s")
            print(f"  Success:      {success_rate:.1f}% ({successes}/{len(results)})")
            print(f"  Avg Latency:  {avg_lat:.0f}ms")
            print(f"  P50/P95/P99:  {p50:.0f}ms / {p95:.0f}ms / {p99:.0f}ms")
            level_stats.append({
                'name': level['name'],
                'concurrent': level['concurrent'],
                'success_rate': success_rate,
                'avg_latency': avg_lat,
                'p50': p50, 'p95': p95, 'p99': p99,
            })
            all_results.extend(results)
            await asyncio.sleep(2)  # Cool down between levels
        # Stop monitoring
        await asyncio.sleep(1)
        stop_monitoring.set()
        if monitor_thread:
            monitor_thread.join(timeout=2)
        # Final stats
        pool_stats = count_log_markers(container)
        memory_samples = [s['memory_mb'] for s in stats_history]
        peak_mem = max(memory_samples) if memory_samples else 0
        final_mem = memory_samples[-1] if memory_samples else 0
        print(f"\n{'='*60}")
        print(f"FINAL RESULTS:")
        print(f"{'='*60}")
        print(f"  Total Requests: {len(all_results)}")
        print(f"\n  Pool Utilization:")
        print(f"    🔥 Permanent: {pool_stats['permanent']}")
        print(f"    ♨️  Hot:       {pool_stats['hot']}")
        print(f"    ❄️  Cold:      {pool_stats['cold']}")
        print(f"    🆕 New:       {pool_stats['new']}")
        print(f"\n  Memory:")
        print(f"    Baseline: {baseline_mem:.1f} MB")
        print(f"    Peak:     {peak_mem:.1f} MB")
        print(f"    Final:    {final_mem:.1f} MB")
        print(f"    Delta:    {final_mem - baseline_mem:+.1f} MB")
        print(f"{'='*60}")
        # Pass/Fail
        passed = True
        for ls in level_stats:
            if ls['success_rate'] < 99:
                print(f"❌ FAIL: {ls['name']} success rate {ls['success_rate']:.1f}% < 99%")
                passed = False
            if ls['p99'] > 10000:  # 10s threshold
                print(f"⚠️  WARNING: {ls['name']} P99 latency {ls['p99']:.0f}ms very high")
        if final_mem - baseline_mem > 300:
            print(f"⚠️  WARNING: Memory grew {final_mem - baseline_mem:.1f} MB")
        if passed:
            print(f"✅ TEST PASSED")
            return 0
        else:
            return 1
    except Exception as e:
        print(f"\n❌ TEST ERROR: {e}")
        import traceback
        traceback.print_exc()
        return 1
    finally:
        stop_monitoring.set()
        if container:
            print(f"🛑 Stopping container...")
            container.stop()
            container.remove()
 if __name__ == "__main__":
    exit_code = asyncio.run(main())
    exit(exit_code)
--- a/deploy/docker/tests/codebase_test/test_5_pool_stress.py
+++ b/deploy/docker/tests/codebase_test/test_5_pool_stress.py
@@ -1,267 +0,0 @@
 #!/usr/bin/env python3
 """
 Test 5: Pool Stress - Mixed Configs
 - Tests hot/cold pool with different browser configs
 - Uses different viewports to create config variants
 - Validates cold → hot promotion after 3 uses
 - Monitors pool tier distribution
 """
 import asyncio
 import time
 import docker
 import httpx
 from threading import Thread, Event
 import random
 # Config
 IMAGE = "crawl4ai-local:latest"
 CONTAINER_NAME = "crawl4ai-test"
 PORT = 11235
 REQUESTS_PER_CONFIG = 5  # 5 requests per config variant
 # Different viewport configs to test pool tiers
 VIEWPORT_CONFIGS = [
    None,  # Default (permanent browser)
    {"width": 1920, "height": 1080},  # Desktop
    {"width": 1024, "height": 768},   # Tablet
    {"width": 375, "height": 667},    # Mobile
 ]
 # Stats
 stats_history = []
 stop_monitoring = Event()
 def monitor_stats(container):
    """Background stats collector."""
    for stat in container.stats(decode=True, stream=True):
        if stop_monitoring.is_set():
            break
        try:
            mem_usage = stat['memory_stats'].get('usage', 0) / (1024 * 1024)
            stats_history.append({'timestamp': time.time(), 'memory_mb': mem_usage})
        except:
            pass
        time.sleep(0.5)
 def analyze_pool_logs(container):
    """Extract detailed pool stats from logs."""
    logs = container.logs().decode('utf-8')
    permanent = logs.count("🔥 Using permanent browser")
    hot = logs.count("♨️  Using hot pool browser")
    cold = logs.count("❄️  Using cold pool browser")
    new = logs.count("🆕 Creating new browser")
    promotions = logs.count("⬆️  Promoting to hot pool")
    return {
        'permanent': permanent,
        'hot': hot,
        'cold': cold,
        'new': new,
        'promotions': promotions,
        'total': permanent + hot + cold
    }
 async def crawl_with_viewport(client, url, viewport):
    """Single request with specific viewport."""
    payload = {
        "urls": ["https://httpbin.org/html"],
        "browser_config": {},
        "crawler_config": {}
    }
    # Add viewport if specified
    if viewport:
        payload["browser_config"] = {
            "type": "BrowserConfig",
            "params": {
                "viewport": {"type": "dict", "value": viewport},
                "headless": True,
                "text_mode": True,
                "extra_args": [
                    "--no-sandbox",
                    "--disable-dev-shm-usage",
                    "--disable-gpu",
                    "--disable-software-rasterizer",
                    "--disable-web-security",
                    "--allow-insecure-localhost",
                    "--ignore-certificate-errors"
                ]
            }
        }
    start = time.time()
    try:
        resp = await client.post(url, json=payload, timeout=60.0)
        elapsed = (time.time() - start) * 1000
        return {"success": resp.status_code == 200, "latency_ms": elapsed, "viewport": viewport}
    except Exception as e:
        return {"success": False, "error": str(e), "viewport": viewport}
 def start_container(client, image, name, port):
    """Start container."""
    try:
        old = client.containers.get(name)
        print(f"🧹 Stopping existing container...")
        old.stop()
        old.remove()
    except docker.errors.NotFound:
        pass
    print(f"🚀 Starting container...")
    container = client.containers.run(
        image, name=name, ports={f"{port}/tcp": port},
        detach=True, shm_size="1g", mem_limit="4g",
    )
    print(f"⏳ Waiting for health...")
    for _ in range(30):
        time.sleep(1)
        container.reload()
        if container.status == "running":
            try:
                import requests
                if requests.get(f"http://localhost:{port}/health", timeout=2).status_code == 200:
                    print(f"✅ Container healthy!")
                    return container
            except:
                pass
    raise TimeoutError("Container failed to start")
 async def main():
    print("="*60)
    print("TEST 5: Pool Stress - Mixed Configs")
    print("="*60)
    client = docker.from_env()
    container = None
    monitor_thread = None
    try:
        container = start_container(client, IMAGE, CONTAINER_NAME, PORT)
        print(f"\n⏳ Waiting for permanent browser init (3s)...")
        await asyncio.sleep(3)
        # Start monitoring
        stop_monitoring.clear()
        stats_history.clear()
        monitor_thread = Thread(target=monitor_stats, args=(container,), daemon=True)
        monitor_thread.start()
        await asyncio.sleep(1)
        baseline_mem = stats_history[-1]['memory_mb'] if stats_history else 0
        print(f"📏 Baseline: {baseline_mem:.1f} MB\n")
        url = f"http://localhost:{PORT}/crawl"
        print(f"Testing {len(VIEWPORT_CONFIGS)} different configs:")
        for i, vp in enumerate(VIEWPORT_CONFIGS):
            vp_str = "Default" if vp is None else f"{vp['width']}x{vp['height']}"
            print(f"  {i+1}. {vp_str}")
        print()
        # Run requests: repeat each config REQUESTS_PER_CONFIG times
        all_results = []
        config_sequence = []
        for _ in range(REQUESTS_PER_CONFIG):
            for viewport in VIEWPORT_CONFIGS:
                config_sequence.append(viewport)
        # Shuffle to mix configs
        random.shuffle(config_sequence)
        print(f"🔄 Running {len(config_sequence)} requests with mixed configs...")
        async with httpx.AsyncClient() as http_client:
            for i, viewport in enumerate(config_sequence):
                result = await crawl_with_viewport(http_client, url, viewport)
                all_results.append(result)
                if (i + 1) % 5 == 0:
                    vp_str = "default" if result['viewport'] is None else f"{result['viewport']['width']}x{result['viewport']['height']}"
                    status = "✓" if result.get('success') else "✗"
                    lat = f"{result.get('latency_ms', 0):.0f}ms" if 'latency_ms' in result else "error"
                    print(f"  [{i+1}/{len(config_sequence)}] {status} {vp_str} - {lat}")
        # Stop monitoring
        await asyncio.sleep(2)
        stop_monitoring.set()
        if monitor_thread:
            monitor_thread.join(timeout=2)
        # Analyze results
        pool_stats = analyze_pool_logs(container)
        successes = sum(1 for r in all_results if r.get("success"))
        success_rate = (successes / len(all_results)) * 100
        latencies = [r["latency_ms"] for r in all_results if "latency_ms" in r]
        avg_lat = sum(latencies) / len(latencies) if latencies else 0
        memory_samples = [s['memory_mb'] for s in stats_history]
        peak_mem = max(memory_samples) if memory_samples else 0
        final_mem = memory_samples[-1] if memory_samples else 0
        print(f"\n{'='*60}")
        print(f"RESULTS:")
        print(f"{'='*60}")
        print(f"  Requests:     {len(all_results)}")
        print(f"  Success Rate: {success_rate:.1f}% ({successes}/{len(all_results)})")
        print(f"  Avg Latency:  {avg_lat:.0f}ms")
        print(f"\n  Pool Statistics:")
        print(f"    🔥 Permanent: {pool_stats['permanent']}")
        print(f"    ♨️  Hot:       {pool_stats['hot']}")
        print(f"    ❄️  Cold:      {pool_stats['cold']}")
        print(f"    🆕 New:       {pool_stats['new']}")
        print(f"    ⬆️  Promotions: {pool_stats['promotions']}")
        print(f"    📊 Reuse:     {(pool_stats['total'] / len(all_results) * 100):.1f}%")
        print(f"\n  Memory:")
        print(f"    Baseline: {baseline_mem:.1f} MB")
        print(f"    Peak:     {peak_mem:.1f} MB")
        print(f"    Final:    {final_mem:.1f} MB")
        print(f"    Delta:    {final_mem - baseline_mem:+.1f} MB")
        print(f"{'='*60}")
        # Pass/Fail
        passed = True
        if success_rate < 99:
            print(f"❌ FAIL: Success rate {success_rate:.1f}% < 99%")
            passed = False
        # Should see promotions since we repeat each config 5 times
        if pool_stats['promotions'] < (len(VIEWPORT_CONFIGS) - 1):  # -1 for default
            print(f"⚠️  WARNING: Only {pool_stats['promotions']} promotions (expected ~{len(VIEWPORT_CONFIGS)-1})")
        # Should have created some browsers for different configs
        if pool_stats['new'] == 0:
            print(f"⚠️  NOTE: No new browsers created (all used default?)")
        if pool_stats['permanent'] == len(all_results):
            print(f"⚠️  NOTE: All requests used permanent browser (configs not varying enough?)")
        if final_mem - baseline_mem > 500:
            print(f"⚠️  WARNING: Memory grew {final_mem - baseline_mem:.1f} MB")
        if passed:
            print(f"✅ TEST PASSED")
            return 0
        else:
            return 1
    except Exception as e:
        print(f"\n❌ TEST ERROR: {e}")
        import traceback
        traceback.print_exc()
        return 1
    finally:
        stop_monitoring.set()
        if container:
            print(f"🛑 Stopping container...")
            container.stop()
            container.remove()
 if __name__ == "__main__":
    exit_code = asyncio.run(main())
    exit(exit_code)
--- a/deploy/docker/tests/codebase_test/test_6_multi_endpoint.py
+++ b/deploy/docker/tests/codebase_test/test_6_multi_endpoint.py
@@ -1,234 +0,0 @@
 #!/usr/bin/env python3
 """
 Test 6: Multi-Endpoint Testing
 - Tests multiple endpoints together: /html, /screenshot, /pdf, /crawl
 - Validates each endpoint works correctly
 - Monitors success rates per endpoint
 """
 import asyncio
 import time
 import docker
 import httpx
 from threading import Thread, Event
 # Config
 IMAGE = "crawl4ai-local:latest"
 CONTAINER_NAME = "crawl4ai-test"
 PORT = 11235
 REQUESTS_PER_ENDPOINT = 10
 # Stats
 stats_history = []
 stop_monitoring = Event()
 def monitor_stats(container):
    """Background stats collector."""
    for stat in container.stats(decode=True, stream=True):
        if stop_monitoring.is_set():
            break
        try:
            mem_usage = stat['memory_stats'].get('usage', 0) / (1024 * 1024)
            stats_history.append({'timestamp': time.time(), 'memory_mb': mem_usage})
        except:
            pass
        time.sleep(0.5)
 async def test_html(client, base_url, count):
    """Test /html endpoint."""
    url = f"{base_url}/html"
    results = []
    for _ in range(count):
        start = time.time()
        try:
            resp = await client.post(url, json={"url": "https://httpbin.org/html"}, timeout=30.0)
            elapsed = (time.time() - start) * 1000
            results.append({"success": resp.status_code == 200, "latency_ms": elapsed})
        except Exception as e:
            results.append({"success": False, "error": str(e)})
    return results
 async def test_screenshot(client, base_url, count):
    """Test /screenshot endpoint."""
    url = f"{base_url}/screenshot"
    results = []
    for _ in range(count):
        start = time.time()
        try:
            resp = await client.post(url, json={"url": "https://httpbin.org/html"}, timeout=30.0)
            elapsed = (time.time() - start) * 1000
            results.append({"success": resp.status_code == 200, "latency_ms": elapsed})
        except Exception as e:
            results.append({"success": False, "error": str(e)})
    return results
 async def test_pdf(client, base_url, count):
    """Test /pdf endpoint."""
    url = f"{base_url}/pdf"
    results = []
    for _ in range(count):
        start = time.time()
        try:
            resp = await client.post(url, json={"url": "https://httpbin.org/html"}, timeout=30.0)
            elapsed = (time.time() - start) * 1000
            results.append({"success": resp.status_code == 200, "latency_ms": elapsed})
        except Exception as e:
            results.append({"success": False, "error": str(e)})
    return results
 async def test_crawl(client, base_url, count):
    """Test /crawl endpoint."""
    url = f"{base_url}/crawl"
    results = []
    payload = {
        "urls": ["https://httpbin.org/html"],
        "browser_config": {},
        "crawler_config": {}
    }
    for _ in range(count):
        start = time.time()
        try:
            resp = await client.post(url, json=payload, timeout=30.0)
            elapsed = (time.time() - start) * 1000
            results.append({"success": resp.status_code == 200, "latency_ms": elapsed})
        except Exception as e:
            results.append({"success": False, "error": str(e)})
    return results
 def start_container(client, image, name, port):
    """Start container."""
    try:
        old = client.containers.get(name)
        print(f"🧹 Stopping existing container...")
        old.stop()
        old.remove()
    except docker.errors.NotFound:
        pass
    print(f"🚀 Starting container...")
    container = client.containers.run(
        image, name=name, ports={f"{port}/tcp": port},
        detach=True, shm_size="1g", mem_limit="4g",
    )
    print(f"⏳ Waiting for health...")
    for _ in range(30):
        time.sleep(1)
        container.reload()
        if container.status == "running":
            try:
                import requests
                if requests.get(f"http://localhost:{port}/health", timeout=2).status_code == 200:
                    print(f"✅ Container healthy!")
                    return container
            except:
                pass
    raise TimeoutError("Container failed to start")
 async def main():
    print("="*60)
    print("TEST 6: Multi-Endpoint Testing")
    print("="*60)
    client = docker.from_env()
    container = None
    monitor_thread = None
    try:
        container = start_container(client, IMAGE, CONTAINER_NAME, PORT)
        print(f"\n⏳ Waiting for permanent browser init (3s)...")
        await asyncio.sleep(3)
        # Start monitoring
        stop_monitoring.clear()
        stats_history.clear()
        monitor_thread = Thread(target=monitor_stats, args=(container,), daemon=True)
        monitor_thread.start()
        await asyncio.sleep(1)
        baseline_mem = stats_history[-1]['memory_mb'] if stats_history else 0
        print(f"📏 Baseline: {baseline_mem:.1f} MB\n")
        base_url = f"http://localhost:{PORT}"
        # Test each endpoint
        endpoints = {
            "/html": test_html,
            "/screenshot": test_screenshot,
            "/pdf": test_pdf,
            "/crawl": test_crawl,
        }
        all_endpoint_stats = {}
        async with httpx.AsyncClient() as http_client:
            for endpoint_name, test_func in endpoints.items():
                print(f"🔄 Testing {endpoint_name} ({REQUESTS_PER_ENDPOINT} requests)...")
                results = await test_func(http_client, base_url, REQUESTS_PER_ENDPOINT)
                successes = sum(1 for r in results if r.get("success"))
                success_rate = (successes / len(results)) * 100
                latencies = [r["latency_ms"] for r in results if "latency_ms" in r]
                avg_lat = sum(latencies) / len(latencies) if latencies else 0
                all_endpoint_stats[endpoint_name] = {
                    'success_rate': success_rate,
                    'avg_latency': avg_lat,
                    'total': len(results),
                    'successes': successes
                }
                print(f"  ✓ Success: {success_rate:.1f}% ({successes}/{len(results)}), Avg: {avg_lat:.0f}ms")
        # Stop monitoring
        await asyncio.sleep(1)
        stop_monitoring.set()
        if monitor_thread:
            monitor_thread.join(timeout=2)
        # Final stats
        memory_samples = [s['memory_mb'] for s in stats_history]
        peak_mem = max(memory_samples) if memory_samples else 0
        final_mem = memory_samples[-1] if memory_samples else 0
        print(f"\n{'='*60}")
        print(f"RESULTS:")
        print(f"{'='*60}")
        for endpoint, stats in all_endpoint_stats.items():
            print(f"  {endpoint:12} Success: {stats['success_rate']:5.1f}%  Avg: {stats['avg_latency']:6.0f}ms")
        print(f"\n  Memory:")
        print(f"    Baseline: {baseline_mem:.1f} MB")
        print(f"    Peak:     {peak_mem:.1f} MB")
        print(f"    Final:    {final_mem:.1f} MB")
        print(f"    Delta:    {final_mem - baseline_mem:+.1f} MB")
        print(f"{'='*60}")
        # Pass/Fail
        passed = True
        for endpoint, stats in all_endpoint_stats.items():
            if stats['success_rate'] < 100:
                print(f"❌ FAIL: {endpoint} success rate {stats['success_rate']:.1f}% < 100%")
                passed = False
        if passed:
            print(f"✅ TEST PASSED")
            return 0
        else:
            return 1
    except Exception as e:
        print(f"\n❌ TEST ERROR: {e}")
        import traceback
        traceback.print_exc()
        return 1
    finally:
        stop_monitoring.set()
        if container:
            print(f"🛑 Stopping container...")
            container.stop()
            container.remove()
 if __name__ == "__main__":
    exit_code = asyncio.run(main())
    exit(exit_code)
--- a/deploy/docker/tests/codebase_test/test_7_cleanup.py
+++ b/deploy/docker/tests/codebase_test/test_7_cleanup.py
@@ -1,199 +0,0 @@
 #!/usr/bin/env python3
 """
 Test 7: Cleanup Verification (Janitor)
 - Creates load spike then goes idle
 - Verifies memory returns to near baseline
 - Tests janitor cleanup of idle browsers
 - Monitors memory recovery time
 """
 import asyncio
 import time
 import docker
 import httpx
 from threading import Thread, Event
 # Config
 IMAGE = "crawl4ai-local:latest"
 CONTAINER_NAME = "crawl4ai-test"
 PORT = 11235
 SPIKE_REQUESTS = 20  # Create some browsers
 IDLE_TIME = 90  # Wait 90s for janitor (runs every 60s)
 # Stats
 stats_history = []
 stop_monitoring = Event()
 def monitor_stats(container):
    """Background stats collector."""
    for stat in container.stats(decode=True, stream=True):
        if stop_monitoring.is_set():
            break
        try:
            mem_usage = stat['memory_stats'].get('usage', 0) / (1024 * 1024)
            stats_history.append({'timestamp': time.time(), 'memory_mb': mem_usage})
        except:
            pass
        time.sleep(1)  # Sample every 1s for this test
 def start_container(client, image, name, port):
    """Start container."""
    try:
        old = client.containers.get(name)
        print(f"🧹 Stopping existing container...")
        old.stop()
        old.remove()
    except docker.errors.NotFound:
        pass
    print(f"🚀 Starting container...")
    container = client.containers.run(
        image, name=name, ports={f"{port}/tcp": port},
        detach=True, shm_size="1g", mem_limit="4g",
    )
    print(f"⏳ Waiting for health...")
    for _ in range(30):
        time.sleep(1)
        container.reload()
        if container.status == "running":
            try:
                import requests
                if requests.get(f"http://localhost:{port}/health", timeout=2).status_code == 200:
                    print(f"✅ Container healthy!")
                    return container
            except:
                pass
    raise TimeoutError("Container failed to start")
 async def main():
    print("="*60)
    print("TEST 7: Cleanup Verification (Janitor)")
    print("="*60)
    client = docker.from_env()
    container = None
    monitor_thread = None
    try:
        container = start_container(client, IMAGE, CONTAINER_NAME, PORT)
        print(f"\n⏳ Waiting for permanent browser init (3s)...")
        await asyncio.sleep(3)
        # Start monitoring
        stop_monitoring.clear()
        stats_history.clear()
        monitor_thread = Thread(target=monitor_stats, args=(container,), daemon=True)
        monitor_thread.start()
        await asyncio.sleep(2)
        baseline_mem = stats_history[-1]['memory_mb'] if stats_history else 0
        print(f"📏 Baseline: {baseline_mem:.1f} MB\n")
        # Create load spike with different configs to populate pool
        print(f"🔥 Creating load spike ({SPIKE_REQUESTS} requests with varied configs)...")
        url = f"http://localhost:{PORT}/crawl"
        viewports = [
            {"width": 1920, "height": 1080},
            {"width": 1024, "height": 768},
            {"width": 375, "height": 667},
        ]
        async with httpx.AsyncClient(timeout=60.0) as http_client:
            tasks = []
            for i in range(SPIKE_REQUESTS):
                vp = viewports[i % len(viewports)]
                payload = {
                    "urls": ["https://httpbin.org/html"],
                    "browser_config": {
                        "type": "BrowserConfig",
                        "params": {
                            "viewport": {"type": "dict", "value": vp},
                            "headless": True,
                            "text_mode": True,
                            "extra_args": [
                                "--no-sandbox", "--disable-dev-shm-usage",
                                "--disable-gpu", "--disable-software-rasterizer",
                                "--disable-web-security", "--allow-insecure-localhost",
                                "--ignore-certificate-errors"
                            ]
                        }
                    },
                    "crawler_config": {}
                }
                tasks.append(http_client.post(url, json=payload))
            results = await asyncio.gather(*tasks, return_exceptions=True)
            successes = sum(1 for r in results if hasattr(r, 'status_code') and r.status_code == 200)
            print(f"  ✓ Spike completed: {successes}/{len(results)} successful")
        # Measure peak
        await asyncio.sleep(2)
        peak_mem = max([s['memory_mb'] for s in stats_history]) if stats_history else baseline_mem
        print(f"  📊 Peak memory: {peak_mem:.1f} MB (+{peak_mem - baseline_mem:.1f} MB)")
        # Now go idle and wait for janitor
        print(f"\n⏸️  Going idle for {IDLE_TIME}s (janitor cleanup)...")
        print(f"  (Janitor runs every 60s, checking for idle browsers)")
        for elapsed in range(0, IDLE_TIME, 10):
            await asyncio.sleep(10)
            current_mem = stats_history[-1]['memory_mb'] if stats_history else 0
            print(f"  [{elapsed+10:3d}s] Memory: {current_mem:.1f} MB")
        # Stop monitoring
        stop_monitoring.set()
        if monitor_thread:
            monitor_thread.join(timeout=2)
        # Analyze memory recovery
        final_mem = stats_history[-1]['memory_mb'] if stats_history else 0
        recovery_mb = peak_mem - final_mem
        recovery_pct = (recovery_mb / (peak_mem - baseline_mem) * 100) if (peak_mem - baseline_mem) > 0 else 0
        print(f"\n{'='*60}")
        print(f"RESULTS:")
        print(f"{'='*60}")
        print(f"  Memory Journey:")
        print(f"    Baseline:  {baseline_mem:.1f} MB")
        print(f"    Peak:      {peak_mem:.1f} MB  (+{peak_mem - baseline_mem:.1f} MB)")
        print(f"    Final:     {final_mem:.1f} MB  (+{final_mem - baseline_mem:.1f} MB)")
        print(f"    Recovered: {recovery_mb:.1f} MB  ({recovery_pct:.1f}%)")
        print(f"{'='*60}")
        # Pass/Fail
        passed = True
        # Should have created some memory pressure
        if peak_mem - baseline_mem < 100:
            print(f"⚠️  WARNING: Peak increase only {peak_mem - baseline_mem:.1f} MB (expected more browsers)")
        # Should recover most memory (within 100MB of baseline)
        if final_mem - baseline_mem > 100:
            print(f"⚠️  WARNING: Memory didn't recover well (still +{final_mem - baseline_mem:.1f} MB above baseline)")
        else:
            print(f"✅ Good memory recovery!")
        # Baseline + 50MB tolerance
        if final_mem - baseline_mem < 50:
            print(f"✅ Excellent cleanup (within 50MB of baseline)")
        print(f"✅ TEST PASSED")
        return 0
    except Exception as e:
        print(f"\n❌ TEST ERROR: {e}")
        import traceback
        traceback.print_exc()
        return 1
    finally:
        stop_monitoring.set()
        if container:
            print(f"🛑 Stopping container...")
            container.stop()
            container.remove()
 if __name__ == "__main__":
    exit_code = asyncio.run(main())
    exit(exit_code)
--- a/deploy/docker/tests/monitor/demo_monitor_dashboard.py
+++ b/deploy/docker/tests/monitor/demo_monitor_dashboard.py
@@ -1,164 +0,0 @@
 #!/usr/bin/env python3
 """
 Monitor Dashboard Demo Script
 Generates varied activity to showcase all monitoring features for video recording.
 """
 import httpx
 import asyncio
 import time
 from datetime import datetime
 BASE_URL = "http://localhost:11235"
 async def demo_dashboard():
    print("🎬 Monitor Dashboard Demo - Starting...\n")
    print(f"📊 Dashboard: {BASE_URL}/dashboard")
    print("=" * 60)
    async with httpx.AsyncClient(timeout=60.0) as client:
        # Phase 1: Simple requests (permanent browser)
        print("\n🔷 Phase 1: Testing permanent browser pool")
        print("-" * 60)
        for i in range(5):
            print(f"  {i+1}/5 Request to /crawl (default config)...")
            try:
                r = await client.post(
                    f"{BASE_URL}/crawl",
                    json={"urls": [f"https://httpbin.org/html?req={i}"], "crawler_config": {}}
                )
                print(f"     ✅ Status: {r.status_code}, Time: {r.elapsed.total_seconds():.2f}s")
            except Exception as e:
                print(f"     ❌ Error: {e}")
            await asyncio.sleep(1)  # Small delay between requests
        # Phase 2: Create variant browsers (different configs)
        print("\n🔶 Phase 2: Testing cold→hot pool promotion")
        print("-" * 60)
        viewports = [
            {"width": 1920, "height": 1080},
            {"width": 1280, "height": 720},
            {"width": 800, "height": 600}
        ]
        for idx, viewport in enumerate(viewports):
            print(f"  Viewport {viewport['width']}x{viewport['height']}:")
            for i in range(4):  # 4 requests each to trigger promotion at 3
                try:
                    r = await client.post(
                        f"{BASE_URL}/crawl",
                        json={
                            "urls": [f"https://httpbin.org/json?v={idx}&r={i}"],
                            "browser_config": {"viewport": viewport},
                            "crawler_config": {}
                        }
                    )
                    print(f"    {i+1}/4 ✅ {r.status_code} - Should see cold→hot after 3 uses")
                except Exception as e:
                    print(f"    {i+1}/4 ❌ {e}")
                await asyncio.sleep(0.5)
        # Phase 3: Concurrent burst (stress pool)
        print("\n🔷 Phase 3: Concurrent burst (10 parallel)")
        print("-" * 60)
        tasks = []
        for i in range(10):
            tasks.append(
                client.post(
                    f"{BASE_URL}/crawl",
                    json={"urls": [f"https://httpbin.org/delay/2?burst={i}"], "crawler_config": {}}
                )
            )
        print("  Sending 10 concurrent requests...")
        start = time.time()
        results = await asyncio.gather(*tasks, return_exceptions=True)
        elapsed = time.time() - start
        successes = sum(1 for r in results if not isinstance(r, Exception) and r.status_code == 200)
        print(f"  ✅ {successes}/10 succeeded in {elapsed:.2f}s")
        # Phase 4: Multi-endpoint coverage
        print("\n🔶 Phase 4: Testing multiple endpoints")
        print("-" * 60)
        endpoints = [
            ("/md", {"url": "https://httpbin.org/html", "f": "fit", "c": "0"}),
            ("/screenshot", {"url": "https://httpbin.org/html"}),
            ("/pdf", {"url": "https://httpbin.org/html"}),
        ]
        for endpoint, payload in endpoints:
            print(f"  Testing {endpoint}...")
            try:
                if endpoint == "/md":
                    r = await client.post(f"{BASE_URL}{endpoint}", json=payload)
                else:
                    r = await client.post(f"{BASE_URL}{endpoint}", json=payload)
                print(f"    ✅ {r.status_code}")
            except Exception as e:
                print(f"    ❌ {e}")
            await asyncio.sleep(1)
        # Phase 5: Intentional error (to populate errors tab)
        print("\n🔷 Phase 5: Generating error examples")
        print("-" * 60)
        print("  Triggering invalid URL error...")
        try:
            r = await client.post(
                f"{BASE_URL}/crawl",
                json={"urls": ["invalid://bad-url"], "crawler_config": {}}
            )
            print(f"    Response: {r.status_code}")
        except Exception as e:
            print(f"    ✅ Error captured: {type(e).__name__}")
        # Phase 6: Wait for janitor activity
        print("\n🔶 Phase 6: Waiting for janitor cleanup...")
        print("-" * 60)
        print("  Idle for 40s to allow janitor to clean cold pool browsers...")
        for i in range(40, 0, -10):
            print(f"    {i}s remaining... (Check dashboard for cleanup events)")
            await asyncio.sleep(10)
        # Phase 7: Final stats check
        print("\n🔷 Phase 7: Final dashboard state")
        print("-" * 60)
        r = await client.get(f"{BASE_URL}/monitor/health")
        health = r.json()
        print(f"  Memory: {health['container']['memory_percent']:.1f}%")
        print(f"  Browsers: Perm={health['pool']['permanent']['active']}, "
              f"Hot={health['pool']['hot']['count']}, Cold={health['pool']['cold']['count']}")
        r = await client.get(f"{BASE_URL}/monitor/endpoints/stats")
        stats = r.json()
        print(f"\n  Endpoint Stats:")
        for endpoint, data in stats.items():
            print(f"    {endpoint}: {data['count']} req, "
                  f"{data['avg_latency_ms']:.0f}ms avg, "
                  f"{data['success_rate_percent']:.1f}% success")
        r = await client.get(f"{BASE_URL}/monitor/browsers")
        browsers = r.json()
        print(f"\n  Pool Efficiency:")
        print(f"    Total browsers: {browsers['summary']['total_count']}")
        print(f"    Memory usage: {browsers['summary']['total_memory_mb']} MB")
        print(f"    Reuse rate: {browsers['summary']['reuse_rate_percent']:.1f}%")
    print("\n" + "=" * 60)
    print("✅ Demo complete! Dashboard is now populated with rich data.")
    print(f"\n📹 Recording tip: Refresh {BASE_URL}/dashboard")
    print("   You should see:")
    print("   • Active & completed requests")
    print("   • Browser pool (permanent + hot/cold)")
    print("   • Janitor cleanup events")
    print("   • Endpoint analytics")
    print("   • Memory timeline")
 if __name__ == "__main__":
    try:
        asyncio.run(demo_dashboard())
    except KeyboardInterrupt:
        print("\n\n⚠️  Demo interrupted by user")
    except Exception as e:
        print(f"\n\n❌ Demo failed: {e}")
--- a/deploy/docker/tests/monitor/test_monitor_demo.py
+++ b/deploy/docker/tests/monitor/test_monitor_demo.py
@@ -1,57 +0,0 @@
 #!/usr/bin/env python3
 """Quick test to generate monitor dashboard activity"""
 import httpx
 import asyncio
 async def test_dashboard():
    async with httpx.AsyncClient(timeout=30.0) as client:
        print("📊 Generating dashboard activity...")
        # Test 1: Simple crawl
        print("\n1️⃣ Running simple crawl...")
        r1 = await client.post(
            "http://localhost:11235/crawl",
            json={"urls": ["https://httpbin.org/html"], "crawler_config": {}}
        )
        print(f"   Status: {r1.status_code}")
        # Test 2: Multiple URLs
        print("\n2️⃣ Running multi-URL crawl...")
        r2 = await client.post(
            "http://localhost:11235/crawl",
            json={
                "urls": [
                    "https://httpbin.org/html",
                    "https://httpbin.org/json"
                ],
                "crawler_config": {}
            }
        )
        print(f"   Status: {r2.status_code}")
        # Test 3: Check monitor health
        print("\n3️⃣ Checking monitor health...")
        r3 = await client.get("http://localhost:11235/monitor/health")
        health = r3.json()
        print(f"   Memory: {health['container']['memory_percent']}%")
        print(f"   Browsers: {health['pool']['permanent']['active']}")
        # Test 4: Check requests
        print("\n4️⃣ Checking request log...")
        r4 = await client.get("http://localhost:11235/monitor/requests")
        reqs = r4.json()
        print(f"   Active: {len(reqs['active'])}")
        print(f"   Completed: {len(reqs['completed'])}")
        # Test 5: Check endpoint stats
        print("\n5️⃣ Checking endpoint stats...")
        r5 = await client.get("http://localhost:11235/monitor/endpoints/stats")
        stats = r5.json()
        for endpoint, data in stats.items():
            print(f"   {endpoint}: {data['count']} requests, {data['avg_latency_ms']}ms avg")
        print("\n✅ Dashboard should now show activity!")
        print(f"\n🌐 Open: http://localhost:11235/dashboard")
 if __name__ == "__main__":
    asyncio.run(test_dashboard())
--- a/deploy/docker/tests/requirements.txt
+++ b/deploy/docker/tests/requirements.txt
@@ -1,2 +0,0 @@
 httpx>=0.25.0
 docker>=7.0.0
--- a/deploy/docker/utils.py
+++ b/deploy/docker/utils.py
@@ -179,75 +179,3 @@ def verify_email_domain(email: str) -> bool:
        return True if records else False
    except Exception as e:
        return False
 def get_container_memory_percent() -> float:
    """Get actual container memory usage vs limit (cgroup v1/v2 aware)."""
    try:
        # Try cgroup v2 first
        usage_path = Path("/sys/fs/cgroup/memory.current")
        limit_path = Path("/sys/fs/cgroup/memory.max")
        if not usage_path.exists():
            # Fall back to cgroup v1
            usage_path = Path("/sys/fs/cgroup/memory/memory.usage_in_bytes")
            limit_path = Path("/sys/fs/cgroup/memory/memory.limit_in_bytes")
        usage = int(usage_path.read_text())
        limit = int(limit_path.read_text())
        # Handle unlimited (v2: "max", v1: > 1e18)
        if limit > 1e18:
            import psutil
            limit = psutil.virtual_memory().total
        return (usage / limit) * 100
    except:
        # Non-container or unsupported: fallback to host
        import psutil
        return psutil.virtual_memory().percent
 def get_container_id() -> str:
    """Get current container ID (hostname in Docker)."""
    import socket
    return socket.gethostname()
 def detect_deployment_mode() -> tuple[str, list[dict]]:
    """Detect if running in single/swarm/compose mode and get container list.
    Returns:
        (mode, containers) where mode is "single"|"swarm"|"compose"
        containers is list of {id, hostname, healthy}
    """
    import socket
    my_hostname = socket.gethostname()
    # Check if we're behind nginx (Compose mode indicator)
    # In Compose, service name resolves to multiple IPs
    try:
        import socket as sock
        # Try to resolve "crawl4ai" service name (Compose service)
        try:
            addrs = sock.getaddrinfo("crawl4ai", None)
            unique_ips = set(addr[4][0] for addr in addrs)
            if len(unique_ips) > 1:
                # Multiple IPs = Compose with replicas
                containers = [
                    {"id": f"container-{i+1}", "hostname": f"crawl4ai-{i+1}", "healthy": True}
                    for i in range(len(unique_ips))
                ]
                return "compose", containers
        except:
            pass
        # Check for Swarm mode (TODO: needs swarm-specific detection)
        # For now, if hostname pattern matches swarm, detect it
        if "." in my_hostname and len(my_hostname.split(".")) > 2:
            # Swarm hostname format: service.slot.task_id
            return "swarm", [{"id": my_hostname, "hostname": my_hostname, "healthy": True}]
    except:
        pass
    # Default: single container
    return "single", [{"id": my_hostname, "hostname": my_hostname, "healthy": True}]
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
AHMET YILMAZ	89679cee67	#1489 refactor(normalize_url): enhance URL normalization logic and add comprehensive test suite	2025-09-18 18:31:07 +08:00
AHMET YILMAZ	84ba78c852	#1489 refactor(normalize_url): improve query parameter handling and sorting	2025-09-17 18:56:45 +08:00