Add all 5 deployments solution for testing

2025-03-10 18:57:14 +08:00
38 changed files with 6431 additions and 0 deletions
--- a/deploy/aws/Dockerfile
+++ b/deploy/aws/Dockerfile
@@ -0,0 +1,137 @@
+FROM python:3.10-slim
+
+# Set build arguments
+ARG APP_HOME=/app
+ARG GITHUB_REPO=https://github.com/unclecode/crawl4ai.git
+ARG GITHUB_BRANCH=next
+ARG USE_LOCAL=False
+ARG CONFIG_PATH=""
+
+ENV PYTHONFAULTHANDLER=1 \
+    PYTHONHASHSEED=random \
+    PYTHONUNBUFFERED=1 \
+    PIP_NO_CACHE_DIR=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PIP_DISABLE_PIP_VERSION_CHECK=1 \
+    PIP_DEFAULT_TIMEOUT=100 \
+    DEBIAN_FRONTEND=noninteractive \
+    REDIS_HOST=localhost \
+    REDIS_PORT=6379
+
+ARG PYTHON_VERSION=3.10
+ARG INSTALL_TYPE=default
+ARG ENABLE_GPU=false
+ARG TARGETARCH
+
+LABEL maintainer="unclecode"
+LABEL description="🔥🕷️ Crawl4AI: Open-source LLM Friendly Web Crawler & scraper"
+LABEL version="1.0"    
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    curl \
+    wget \
+    gnupg \
+    git \
+    cmake \
+    pkg-config \
+    python3-dev \
+    libjpeg-dev \
+    redis-server \
+    supervisor \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    libglib2.0-0 \
+    libnss3 \
+    libnspr4 \
+    libatk1.0-0 \
+    libatk-bridge2.0-0 \
+    libcups2 \
+    libdrm2 \
+    libdbus-1-3 \
+    libxcb1 \
+    libxkbcommon0 \
+    libx11-6 \
+    libxcomposite1 \
+    libxdamage1 \
+    libxext6 \
+    libxfixes3 \
+    libxrandr2 \
+    libgbm1 \
+    libpango-1.0-0 \
+    libcairo2 \
+    libasound2 \
+    libatspi2.0-0 \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN if [ "$ENABLE_GPU" = "true" ] && [ "$TARGETARCH" = "amd64" ] ; then \
+    apt-get update && apt-get install -y --no-install-recommends \
+    nvidia-cuda-toolkit \
+    && rm -rf /var/lib/apt/lists/* ; \
+else \
+    echo "Skipping NVIDIA CUDA Toolkit installation (unsupported platform or GPU disabled)"; \
+fi
+
+RUN if [ "$TARGETARCH" = "arm64" ]; then \
+    echo "🦾 Installing ARM-specific optimizations"; \
+    apt-get update && apt-get install -y --no-install-recommends \
+    libopenblas-dev \
+    && rm -rf /var/lib/apt/lists/*; \
+elif [ "$TARGETARCH" = "amd64" ]; then \
+    echo "🖥️ Installing AMD64-specific optimizations"; \
+    apt-get update && apt-get install -y --no-install-recommends \
+    libomp-dev \
+    && rm -rf /var/lib/apt/lists/*; \
+else \
+    echo "Skipping platform-specific optimizations (unsupported platform)"; \
+fi
+
+WORKDIR ${APP_HOME}
+
+RUN git clone --branch ${GITHUB_BRANCH} ${GITHUB_REPO} /tmp/crawl4ai
+
+COPY docker/supervisord.conf .
+COPY docker/requirements.txt .
+
+RUN pip install --no-cache-dir -r requirements.txt
+
+RUN if [ "$INSTALL_TYPE" = "all" ] ; then \
+        pip install "/tmp/crawl4ai/[all]" && \
+        python -m nltk.downloader punkt stopwords && \
+        python -m crawl4ai.model_loader ; \
+    elif [ "$INSTALL_TYPE" = "torch" ] ; then \
+        pip install "/tmp/crawl4ai/[torch]" ; \
+    elif [ "$INSTALL_TYPE" = "transformer" ] ; then \
+        pip install "/tmp/crawl4ai/[transformer]" && \
+        python -m crawl4ai.model_loader ; \
+    else \
+        pip install "/tmp/crawl4ai" ; \
+    fi
+    
+RUN pip install --no-cache-dir --upgrade pip && \
+    python -c "import crawl4ai; print('✅ crawl4ai is ready to rock!')" && \
+    python -c "from playwright.sync_api import sync_playwright; print('✅ Playwright is feeling dramatic!')"
+    
+RUN playwright install --with-deps chromium
+
+COPY docker/* ${APP_HOME}/
+RUN if [ -n "$CONFIG_PATH" ] && [ -f "$CONFIG_PATH" ]; then \
+    echo "Using custom config from $CONFIG_PATH" && \
+    cp $CONFIG_PATH /app/config.yml; \
+fi
+
+HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
+    CMD bash -c '\
+    MEM=$(free -m | awk "/^Mem:/{print \$2}"); \
+    if [ $MEM -lt 2048 ]; then \
+        echo "⚠️ Warning: Less than 2GB RAM available! Your container might need a memory boost! 🚀"; \
+        exit 1; \
+    fi && \
+    redis-cli ping > /dev/null && \
+    curl -f http://localhost:8000/health || exit 1'
+
+# EXPOSE 6379
+
+CMD ["supervisord", "-c", "supervisord.conf"]
+    
--- a/deploy/aws/deploy-config.yml
+++ b/deploy/aws/deploy-config.yml
@@ -0,0 +1,3 @@
+project_name: PROJECT_NAME
+domain_name: DOMAIN_NAME
+aws_region: AWS_REGION
--- a/deploy/aws/deploy.py
+++ b/deploy/aws/deploy.py
@@ -0,0 +1,729 @@
+#!/usr/bin/env python3
+import argparse
+import subprocess
+import sys
+import time
+import json
+import yaml
+import requests
+import os
+
+# Steps for deployment
+STEPS = [
+    "refresh_aws_auth",
+    "fetch_or_create_vpc_and_subnets",
+    "create_ecr_repositories",
+    "create_iam_role",
+    "create_security_groups",
+    "request_acm_certificate",
+    "build_and_push_docker",
+    "create_task_definition",
+    "setup_alb",
+    "deploy_ecs_service",
+    "configure_custom_domain",
+    "test_endpoints"
+]
+
+# Utility function to prompt user for confirmation
+def confirm_step(step_name):
+    while True:
+        response = input(f"Proceed with {step_name}? (yes/no): ").strip().lower()
+        if response in ["yes", "no"]:
+            return response == "yes"
+        print("Please enter 'yes' or 'no'.")
+
+# Utility function to run AWS CLI or shell commands and handle errors
+def run_command(command, error_message, additional_diagnostics=None, cwd="."):
+    try:
+        result = subprocess.run(command, capture_output=True, text=True, check=True, cwd=cwd)
+        return result
+    except subprocess.CalledProcessError as e:
+        with open("error_context.md", "w") as f:
+            f.write(f"{error_message}:\n")
+            f.write(f"Command: {' '.join(command)}\n")
+            f.write(f"Exit Code: {e.returncode}\n")
+            f.write(f"Stdout: {e.stdout}\n")
+            f.write(f"Stderr: {e.stderr}\n")
+            if additional_diagnostics:
+                for diag_cmd in additional_diagnostics:
+                    diag_result = subprocess.run(diag_cmd, capture_output=True, text=True)
+                    f.write(f"\nDiagnostic command: {' '.join(diag_cmd)}\n")
+                    f.write(f"Stdout: {diag_result.stdout}\n")
+                    f.write(f"Stderr: {diag_result.stderr}\n")
+        raise Exception(f"{error_message}: {e.stderr}")
+
+# Utility function to load or initialize state
+def load_state(project_name):
+    state_file = f"{project_name}-state.json"
+    if os.path.exists(state_file):
+        with open(state_file, "r") as f:
+            return json.load(f)
+    return {"last_step": -1}
+
+# Utility function to save state
+def save_state(project_name, state):
+    state_file = f"{project_name}-state.json"
+    with open(state_file, "w") as f:
+        json.dump(state, f, indent=4)
+
+# DNS Check Function
+def check_dns_propagation(domain, alb_dns):
+    try:
+        result = subprocess.run(["dig", "+short", domain], capture_output=True, text=True)
+        if alb_dns in result.stdout:
+            return True
+        return False
+    except Exception as e:
+        print(f"Failed to check DNS: {e}")
+        return False
+
+# Step Functions
+def refresh_aws_auth(project_name, state, config):
+    if state["last_step"] >= 0:
+        print("Skipping refresh_aws_auth (already completed)")
+        return
+    if not confirm_step("Refresh AWS authentication"):
+        sys.exit("User aborted.")
+    run_command(
+        ["aws", "sts", "get-caller-identity"],
+        "Failed to verify AWS credentials"
+    )
+    print("AWS authentication verified.")
+    state["last_step"] = 0
+    save_state(project_name, state)
+
+def fetch_or_create_vpc_and_subnets(project_name, state, config):
+    if state["last_step"] >= 1:
+        print("Skipping fetch_or_create_vpc_and_subnets (already completed)")
+        return state["vpc_id"], state["public_subnets"]
+    if not confirm_step("Fetch or Create VPC and Subnets"):
+        sys.exit("User aborted.")
+    
+    # Fetch AWS account ID
+    result = run_command(
+        ["aws", "sts", "get-caller-identity"],
+        "Failed to get AWS account ID"
+    )
+    account_id = json.loads(result.stdout)["Account"]
+    
+    # Fetch default VPC
+    result = run_command(
+        ["aws", "ec2", "describe-vpcs", "--filters", "Name=isDefault,Values=true", "--region", config["aws_region"]],
+        "Failed to describe VPCs"
+    )
+    vpcs = json.loads(result.stdout).get("Vpcs", [])
+    if not vpcs:
+        result = run_command(
+            ["aws", "ec2", "create-vpc", "--cidr-block", "10.0.0.0/16", "--region", config["aws_region"]],
+            "Failed to create VPC"
+        )
+        vpc_id = json.loads(result.stdout)["Vpc"]["VpcId"]
+        run_command(
+            ["aws", "ec2", "modify-vpc-attribute", "--vpc-id", vpc_id, "--enable-dns-hostnames", "--region", config["aws_region"]],
+            "Failed to enable DNS hostnames"
+        )
+    else:
+        vpc_id = vpcs[0]["VpcId"]
+    
+    # Fetch or create subnets
+    result = run_command(
+        ["aws", "ec2", "describe-subnets", "--filters", f"Name=vpc-id,Values={vpc_id}", "--region", config["aws_region"]],
+        "Failed to describe subnets"
+    )
+    subnets = json.loads(result.stdout).get("Subnets", [])
+    if len(subnets) < 2:
+        azs = json.loads(run_command(
+            ["aws", "ec2", "describe-availability-zones", "--region", config["aws_region"]],
+            "Failed to describe availability zones"
+        ).stdout)["AvailabilityZones"][:2]
+        subnet_ids = []
+        for i, az in enumerate(azs):
+            az_name = az["ZoneName"]
+            result = run_command(
+                ["aws", "ec2", "create-subnet", "--vpc-id", vpc_id, "--cidr-block", f"10.0.{i}.0/24", "--availability-zone", az_name, "--region", config["aws_region"]],
+                f"Failed to create subnet in {az_name}"
+            )
+            subnet_id = json.loads(result.stdout)["Subnet"]["SubnetId"]
+            subnet_ids.append(subnet_id)
+            run_command(
+                ["aws", "ec2", "modify-subnet-attribute", "--subnet-id", subnet_id, "--map-public-ip-on-launch", "--region", config["aws_region"]],
+                f"Failed to make subnet {subnet_id} public"
+            )
+    else:
+        subnet_ids = [s["SubnetId"] for s in subnets[:2]]
+    
+    # Ensure internet gateway
+    result = run_command(
+        ["aws", "ec2", "describe-internet-gateways", "--filters", f"Name=attachment.vpc-id,Values={vpc_id}", "--region", config["aws_region"]],
+        "Failed to describe internet gateways"
+    )
+    igws = json.loads(result.stdout).get("InternetGateways", [])
+    if not igws:
+        result = run_command(
+            ["aws", "ec2", "create-internet-gateway", "--region", config["aws_region"]],
+            "Failed to create internet gateway"
+        )
+        igw_id = json.loads(result.stdout)["InternetGateway"]["InternetGatewayId"]
+        run_command(
+            ["aws", "ec2", "attach-internet-gateway", "--vpc-id", vpc_id, "--internet-gateway-id", igw_id, "--region", config["aws_region"]],
+            "Failed to attach internet gateway"
+        )
+    
+    state["vpc_id"] = vpc_id
+    state["public_subnets"] = subnet_ids
+    state["last_step"] = 1
+    save_state(project_name, state)
+    print(f"VPC ID: {vpc_id}, Subnets: {subnet_ids}")
+    return vpc_id, subnet_ids
+
+def create_ecr_repositories(project_name, state, config):
+    if state["last_step"] >= 2:
+        print("Skipping create_ecr_repositories (already completed)")
+        return
+    if not confirm_step("Create ECR Repositories"):
+        sys.exit("User aborted.")
+    
+    account_id = json.loads(run_command(
+        ["aws", "sts", "get-caller-identity"],
+        "Failed to get AWS account ID"
+    ).stdout)["Account"]
+    repos = [project_name, f"{project_name}-nginx"]
+    for repo in repos:
+        result = subprocess.run(
+            ["aws", "ecr", "describe-repositories", "--repository-names", repo, "--region", config["aws_region"]],
+            capture_output=True, text=True
+        )
+        if result.returncode != 0:
+            run_command(
+                ["aws", "ecr", "create-repository", "--repository-name", repo, "--region", config["aws_region"]],
+                f"Failed to create ECR repository {repo}"
+            )
+        print(f"ECR repository {repo} is ready.")
+    state["last_step"] = 2
+    save_state(project_name, state)
+
+def create_iam_role(project_name, state, config):
+    if state["last_step"] >= 3:
+        print("Skipping create_iam_role (already completed)")
+        return
+    if not confirm_step("Create IAM Role"):
+        sys.exit("User aborted.")
+    
+    account_id = json.loads(run_command(
+        ["aws", "sts", "get-caller-identity"],
+        "Failed to get AWS account ID"
+    ).stdout)["Account"]
+    role_name = "ecsTaskExecutionRole"
+    trust_policy = {
+        "Version": "2012-10-17",
+        "Statement": [
+            {
+                "Effect": "Allow",
+                "Principal": {"Service": "ecs-tasks.amazonaws.com"},
+                "Action": "sts:AssumeRole"
+            }
+        ]
+    }
+    with open("trust_policy.json", "w") as f:
+        json.dump(trust_policy, f)
+    
+    result = subprocess.run(
+        ["aws", "iam", "get-role", "--role-name", role_name],
+        capture_output=True, text=True
+    )
+    if result.returncode != 0:
+        run_command(
+            ["aws", "iam", "create-role", "--role-name", role_name, "--assume-role-policy-document", "file://trust_policy.json"],
+            f"Failed to create IAM role {role_name}"
+        )
+    run_command(
+        ["aws", "iam", "attach-role-policy", "--role-name", role_name, "--policy-arn", "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy"],
+        "Failed to attach ECS task execution policy"
+    )
+    os.remove("trust_policy.json")
+    state["execution_role_arn"] = f"arn:aws:iam::{account_id}:role/{role_name}"
+    state["last_step"] = 3
+    save_state(project_name, state)
+    print(f"IAM role {role_name} configured.")
+
+def create_security_groups(project_name, state, config):
+    if state["last_step"] >= 4:
+        print("Skipping create_security_groups (already completed)")
+        return state["alb_sg_id"], state["ecs_sg_id"]
+    if not confirm_step("Create Security Groups"):
+        sys.exit("User aborted.")
+    
+    vpc_id = state["vpc_id"]
+    alb_sg_name = f"{project_name}-alb-sg"
+    result = run_command(
+        ["aws", "ec2", "describe-security-groups", "--filters", f"Name=vpc-id,Values={vpc_id}", f"Name=group-name,Values={alb_sg_name}", "--region", config["aws_region"]],
+        "Failed to describe ALB security group"
+    )
+    if not json.loads(result.stdout).get("SecurityGroups"):
+        result = run_command(
+            ["aws", "ec2", "create-security-group", "--group-name", alb_sg_name, "--description", "Security group for ALB", "--vpc-id", vpc_id, "--region", config["aws_region"]],
+            "Failed to create ALB security group"
+        )
+        alb_sg_id = json.loads(result.stdout)["GroupId"]
+        run_command(
+            ["aws", "ec2", "authorize-security-group-ingress", "--group-id", alb_sg_id, "--protocol", "tcp", "--port", "80", "--cidr", "0.0.0.0/0", "--region", config["aws_region"]],
+            "Failed to authorize HTTP ingress"
+        )
+        run_command(
+            ["aws", "ec2", "authorize-security-group-ingress", "--group-id", alb_sg_id, "--protocol", "tcp", "--port", "443", "--cidr", "0.0.0.0/0", "--region", config["aws_region"]],
+            "Failed to authorize HTTPS ingress"
+        )
+    else:
+        alb_sg_id = json.loads(result.stdout)["SecurityGroups"][0]["GroupId"]
+    
+    ecs_sg_name = f"{project_name}-ecs-sg"
+    result = run_command(
+        ["aws", "ec2", "describe-security-groups", "--filters", f"Name=vpc-id,Values={vpc_id}", f"Name=group-name,Values={ecs_sg_name}", "--region", config["aws_region"]],
+        "Failed to describe ECS security group"
+    )
+    if not json.loads(result.stdout).get("SecurityGroups"):
+        result = run_command(
+            ["aws", "ec2", "create-security-group", "--group-name", ecs_sg_name, "--description", "Security group for ECS tasks", "--vpc-id", vpc_id, "--region", config["aws_region"]],
+            "Failed to create ECS security group"
+        )
+        ecs_sg_id = json.loads(result.stdout)["GroupId"]
+        run_command(
+            ["aws", "ec2", "authorize-security-group-ingress", "--group-id", ecs_sg_id, "--protocol", "tcp", "--port", "80", "--source-group", alb_sg_id, "--region", config["aws_region"]],
+            "Failed to authorize ECS ingress"
+        )
+    else:
+        ecs_sg_id = json.loads(result.stdout)["SecurityGroups"][0]["GroupId"]
+    
+    state["alb_sg_id"] = alb_sg_id
+    state["ecs_sg_id"] = ecs_sg_id
+    state["last_step"] = 4
+    save_state(project_name, state)
+    print("Security groups configured.")
+    return alb_sg_id, ecs_sg_id
+
+def request_acm_certificate(project_name, state, config):
+    if state["last_step"] >= 5:
+        print("Skipping request_acm_certificate (already completed)")
+        return state["cert_arn"]
+    if not confirm_step("Request ACM Certificate"):
+        sys.exit("User aborted.")
+    
+    domain_name = config["domain_name"]
+    result = run_command(
+        ["aws", "acm", "describe-certificates", "--certificate-statuses", "ISSUED", "--region", config["aws_region"]],
+        "Failed to describe certificates"
+    )
+    certificates = json.loads(result.stdout).get("CertificateSummaryList", [])
+    cert_arn = next((c["CertificateArn"] for c in certificates if c["DomainName"] == domain_name), None)
+    
+    if not cert_arn:
+        result = run_command(
+            ["aws", "acm", "request-certificate", "--domain-name", domain_name, "--validation-method", "DNS", "--region", config["aws_region"]],
+            "Failed to request ACM certificate"
+        )
+        cert_arn = json.loads(result.stdout)["CertificateArn"]
+        
+        time.sleep(10)
+        result = run_command(
+            ["aws", "acm", "describe-certificate", "--certificate-arn", cert_arn, "--region", config["aws_region"]],
+            "Failed to describe certificate"
+        )
+        cert_details = json.loads(result.stdout)["Certificate"]
+        dns_validations = cert_details.get("DomainValidationOptions", [])
+        for validation in dns_validations:
+            if validation["ValidationMethod"] == "DNS" and "ResourceRecord" in validation:
+                record = validation["ResourceRecord"]
+                print(f"Please add this DNS record to validate the certificate for {domain_name}:")
+                print(f"Name: {record['Name']}")
+                print(f"Type: {record['Type']}")
+                print(f"Value: {record['Value']}")
+        print("Press Enter after adding the DNS record...")
+        input()
+        
+        while True:
+            result = run_command(
+                ["aws", "acm", "describe-certificate", "--certificate-arn", cert_arn, "--region", config["aws_region"]],
+                "Failed to check certificate status"
+            )
+            status = json.loads(result.stdout)["Certificate"]["Status"]
+            if status == "ISSUED":
+                break
+            elif status in ["FAILED", "REVOKED", "INACTIVE"]:
+                print("Certificate issuance failed.")
+                sys.exit(1)
+            time.sleep(10)
+    
+    state["cert_arn"] = cert_arn
+    state["last_step"] = 5
+    save_state(project_name, state)
+    print(f"Certificate ARN: {cert_arn}")
+    return cert_arn
+
+def build_and_push_docker(project_name, state, config):
+    if state["last_step"] >= 6:
+        print("Skipping build_and_push_docker (already completed)")
+        return state["fastapi_image"], state["nginx_image"]
+    if not confirm_step("Build and Push Docker Images"):
+        sys.exit("User aborted.")
+    
+    with open("./version.txt", "r") as f:
+        version = f.read().strip()
+    
+    account_id = json.loads(run_command(
+        ["aws", "sts", "get-caller-identity"],
+        "Failed to get AWS account ID"
+    ).stdout)["Account"]
+    region = config["aws_region"]
+    
+    login_password = run_command(
+        ["aws", "ecr", "get-login-password", "--region", region],
+        "Failed to get ECR login password"
+    ).stdout.strip()
+    run_command(
+        ["docker", "login", "--username", "AWS", "--password", login_password, f"{account_id}.dkr.ecr.{region}.amazonaws.com"],
+        "Failed to authenticate Docker to ECR"
+    )
+    
+    fastapi_image = f"{account_id}.dkr.ecr.{region}.amazonaws.com/{project_name}:{version}"
+    run_command(
+        ["docker", "build", "-f", "Dockerfile", "-t", fastapi_image, "."],
+        "Failed to build FastAPI Docker image"
+    )
+    run_command(
+        ["docker", "push", fastapi_image],
+        "Failed to push FastAPI image"
+    )
+    
+    nginx_image = f"{account_id}.dkr.ecr.{region}.amazonaws.com/{project_name}-nginx:{version}"
+    run_command(
+        ["docker", "build", "-f", "Dockerfile", "-t", nginx_image, "."],
+        "Failed to build Nginx Docker image",
+        cwd="./nginx"
+    )
+    run_command(
+        ["docker", "push", nginx_image],
+        "Failed to push Nginx image"
+    )
+    
+    state["fastapi_image"] = fastapi_image
+    state["nginx_image"] = nginx_image
+    state["last_step"] = 6
+    save_state(project_name, state)
+    print("Docker images built and pushed.")
+    return fastapi_image, nginx_image
+
+def create_task_definition(project_name, state, config):
+    if state["last_step"] >= 7:
+        print("Skipping create_task_definition (already completed)")
+        return state["task_def_arn"]
+    if not confirm_step("Create Task Definition"):
+        sys.exit("User aborted.")
+    
+    log_group = f"/ecs/{project_name}-logs"
+    result = run_command(
+        ["aws", "logs", "describe-log-groups", "--log-group-name-prefix", log_group, "--region", config["aws_region"]],
+        "Failed to describe log groups"
+    )
+    if not any(lg["logGroupName"] == log_group for lg in json.loads(result.stdout).get("logGroups", [])):
+        run_command(
+            ["aws", "logs", "create-log-group", "--log-group-name", log_group, "--region", config["aws_region"]],
+            f"Failed to create log group {log_group}"
+        )
+    
+    task_definition = {
+        "family": f"{project_name}-taskdef",
+        "networkMode": "awsvpc",
+        "requiresCompatibilities": ["FARGATE"],
+        "cpu": "512",
+        "memory": "2048",
+        "executionRoleArn": state["execution_role_arn"],
+        "containerDefinitions": [
+            {
+                "name": "fastapi",
+                "image": state["fastapi_image"],
+                "portMappings": [{"containerPort": 8000, "hostPort": 8000, "protocol": "tcp"}],
+                "logConfiguration": {
+                    "logDriver": "awslogs",
+                    "options": {
+                        "awslogs-group": log_group,
+                        "awslogs-region": config["aws_region"],
+                        "awslogs-stream-prefix": "fastapi"
+                    }
+                }
+            },
+            {
+                "name": "nginx",
+                "image": state["nginx_image"],
+                "portMappings": [{"containerPort": 80, "hostPort": 80, "protocol": "tcp"}],
+                "logConfiguration": {
+                    "logDriver": "awslogs",
+                    "options": {
+                        "awslogs-group": log_group,
+                        "awslogs-region": config["aws_region"],
+                        "awslogs-stream-prefix": "nginx"
+                    }
+                }
+            }
+        ]
+    }
+    
+    with open("task_def.json", "w") as f:
+        json.dump(task_definition, f)
+    result = run_command(
+        ["aws", "ecs", "register-task-definition", "--cli-input-json", "file://task_def.json", "--region", config["aws_region"]],
+        "Failed to register task definition"
+    )
+    task_def_arn = json.loads(result.stdout)["taskDefinition"]["taskDefinitionArn"]
+    os.remove("task_def.json")
+    
+    state["task_def_arn"] = task_def_arn
+    state["last_step"] = 7
+    save_state(project_name, state)
+    print("Task definition created.")
+    return task_def_arn
+
+def setup_alb(project_name, state, config):
+    if state["last_step"] >= 8:
+        print("Skipping setup_alb (already completed)")
+        return state["alb_arn"], state["tg_arn"], state["alb_dns"]
+    if not confirm_step("Set Up ALB"):
+        sys.exit("User aborted.")
+    
+    vpc_id = state["vpc_id"]
+    public_subnets = state["public_subnets"]
+    alb_name = f"{project_name}-alb"
+    
+    result = subprocess.run(
+        ["aws", "elbv2", "describe-load-balancers", "--names", alb_name, "--region", config["aws_region"]],
+        capture_output=True, text=True
+    )
+    if result.returncode != 0:
+        run_command(
+            ["aws", "elbv2", "create-load-balancer", "--name", alb_name, "--subnets"] + public_subnets + ["--security-groups", state["alb_sg_id"], "--region", config["aws_region"]],
+            "Failed to create ALB"
+        )
+    alb_arn = json.loads(run_command(
+        ["aws", "elbv2", "describe-load-balancers", "--names", alb_name, "--region", config["aws_region"]],
+        "Failed to describe ALB"
+    ).stdout)["LoadBalancers"][0]["LoadBalancerArn"]
+    alb_dns = json.loads(run_command(
+        ["aws", "elbv2", "describe-load-balancers", "--names", alb_name, "--region", config["aws_region"]],
+        "Failed to get ALB DNS name"
+    ).stdout)["LoadBalancers"][0]["DNSName"]
+    
+    tg_name = f"{project_name}-tg"
+    result = subprocess.run(
+        ["aws", "elbv2", "describe-target-groups", "--names", tg_name, "--region", config["aws_region"]],
+        capture_output=True, text=True
+    )
+    if result.returncode != 0:
+        run_command(
+            ["aws", "elbv2", "create-target-group", "--name", tg_name, "--protocol", "HTTP", "--port", "80", "--vpc-id", vpc_id, "--region", config["aws_region"]],
+            "Failed to create target group"
+        )
+    tg_arn = json.loads(run_command(
+        ["aws", "elbv2", "describe-target-groups", "--names", tg_name, "--region", config["aws_region"]],
+        "Failed to describe target group"
+    ).stdout)["TargetGroups"][0]["TargetGroupArn"]
+    
+    result = run_command(
+        ["aws", "elbv2", "describe-listeners", "--load-balancer-arn", alb_arn, "--region", config["aws_region"]],
+        "Failed to describe listeners"
+    )
+    listeners = json.loads(result.stdout).get("Listeners", [])
+    if not any(l["Port"] == 80 for l in listeners):
+        run_command(
+            ["aws", "elbv2", "create-listener", "--load-balancer-arn", alb_arn, "--protocol", "HTTP", "--port", "80", "--default-actions", "Type=redirect,RedirectConfig={Protocol=HTTPS,Port=443,StatusCode=HTTP_301}", "--region", config["aws_region"]],
+            "Failed to create HTTP listener"
+        )
+    if not any(l["Port"] == 443 for l in listeners):
+        run_command(
+            ["aws", "elbv2", "create-listener", "--load-balancer-arn", alb_arn, "--protocol", "HTTPS", "--port", "443", "--certificates", f"CertificateArn={state['cert_arn']}", "--default-actions", f"Type=forward,TargetGroupArn={tg_arn}", "--region", config["aws_region"]],
+            "Failed to create HTTPS listener"
+        )
+    
+    state["alb_arn"] = alb_arn
+    state["tg_arn"] = tg_arn
+    state["alb_dns"] = alb_dns
+    state["last_step"] = 8
+    save_state(project_name, state)
+    print("ALB configured.")
+    return alb_arn, tg_arn, alb_dns
+
+def deploy_ecs_service(project_name, state, config):
+    if state["last_step"] >= 9:
+        print("Skipping deploy_ecs_service (already completed)")
+        return
+    if not confirm_step("Deploy ECS Service"):
+        sys.exit("User aborted.")
+    
+    cluster_name = f"{project_name}-cluster"
+    result = run_command(
+        ["aws", "ecs", "describe-clusters", "--clusters", cluster_name, "--region", config["aws_region"]],
+        "Failed to describe clusters"
+    )
+    if not json.loads(result.stdout).get("clusters"):
+        run_command(
+            ["aws", "ecs", "create-cluster", "--cluster-name", cluster_name, "--region", config["aws_region"]],
+            "Failed to create ECS cluster"
+        )
+    
+    service_name = f"{project_name}-service"
+    result = run_command(
+        ["aws", "ecs", "describe-services", "--cluster", cluster_name, "--services", service_name, "--region", config["aws_region"]],
+        "Failed to describe services",
+        additional_diagnostics=[["aws", "ecs", "list-tasks", "--cluster", cluster_name, "--service-name", service_name, "--region", config["aws_region"]]]
+    )
+    services = json.loads(result.stdout).get("services", [])
+    if not services or services[0]["status"] == "INACTIVE":
+        run_command(
+            ["aws", "ecs", "create-service", "--cluster", cluster_name, "--service-name", service_name, "--task-definition", state["task_def_arn"], "--desired-count", "1", "--launch-type", "FARGATE", "--network-configuration", f"awsvpcConfiguration={{subnets={json.dumps(state['public_subnets'])},securityGroups=[{state['ecs_sg_id']}],assignPublicIp=ENABLED}}", "--load-balancers", f"targetGroupArn={state['tg_arn']},containerName=nginx,containerPort=80", "--region", config["aws_region"]],
+            "Failed to create ECS service"
+        )
+    else:
+        run_command(
+            ["aws", "ecs", "update-service", "--cluster", cluster_name, "--service", service_name, "--task-definition", state["task_def_arn"], "--region", config["aws_region"]],
+            "Failed to update ECS service"
+        )
+    
+    state["last_step"] = 9
+    save_state(project_name, state)
+    print("ECS service deployed.")
+
+def configure_custom_domain(project_name, state, config):
+    if state["last_step"] >= 10:
+        print("Skipping configure_custom_domain (already completed)")
+        return
+    if not confirm_step("Configure Custom Domain"):
+        sys.exit("User aborted.")
+    
+    domain_name = config["domain_name"]
+    alb_dns = state["alb_dns"]
+    print(f"Please add a CNAME record for {domain_name} pointing to {alb_dns} in your DNS provider.")
+    print("Press Enter after updating the DNS record...")
+    input()
+    
+    while not check_dns_propagation(domain_name, alb_dns):
+        print("DNS propagation not complete. Waiting 30 seconds before retrying...")
+        time.sleep(30)
+    print("DNS propagation confirmed.")
+    
+    state["last_step"] = 10
+    save_state(project_name, state)
+    print("Custom domain configured.")
+
+def test_endpoints(project_name, state, config):
+    if state["last_step"] >= 11:
+        print("Skipping test_endpoints (already completed)")
+        return
+    if not confirm_step("Test Endpoints"):
+        sys.exit("User aborted.")
+    
+    domain = config["domain_name"]
+    time.sleep(30)  # Wait for service to stabilize
+    
+    response = requests.get(f"https://{domain}/health", verify=False)
+    if response.status_code != 200:
+        with open("error_context.md", "w") as f:
+            f.write("Health endpoint test failed:\n")
+            f.write(f"Status Code: {response.status_code}\n")
+            f.write(f"Response: {response.text}\n")
+        sys.exit(1)
+    print("Health endpoint test passed.")
+    
+    payload = {
+        "urls": ["https://example.com"],
+        "browser_config": {"headless": True},
+        "crawler_config": {"stream": False}
+    }
+    response = requests.post(f"https://{domain}/crawl", json=payload, verify=False)
+    if response.status_code != 200:
+        with open("error_context.md", "w") as f:
+            f.write("Crawl endpoint test failed:\n")
+            f.write(f"Status Code: {response.status_code}\n")
+            f.write(f"Response: {response.text}\n")
+        sys.exit(1)
+    print("Crawl endpoint test passed.")
+    
+    state["last_step"] = 11
+    save_state(project_name, state)
+    print("Endpoints tested successfully.")
+
+# Main Deployment Function
+def deploy(project_name, force=False):
+    config_file = f"{project_name}-config.yml"
+    if not os.path.exists(config_file):
+        print(f"Configuration file {config_file} not found. Run 'init' first.")
+        sys.exit(1)
+    
+    with open(config_file, "r") as f:
+        config = yaml.safe_load(f)
+    
+    state = load_state(project_name)
+    if force:
+        state = {"last_step": -1}
+    
+    last_step = state.get("last_step", -1)
+    
+    for step_idx, step_name in enumerate(STEPS):
+        if step_idx <= last_step:
+            print(f"Skipping {step_name} (already completed)")
+            continue
+        print(f"Executing step: {step_name}")
+        func = globals()[step_name]
+        if step_name == "fetch_or_create_vpc_and_subnets":
+            vpc_id, public_subnets = func(project_name, state, config)
+        elif step_name == "create_security_groups":
+            alb_sg_id, ecs_sg_id = func(project_name, state, config)
+        elif step_name == "request_acm_certificate":
+            cert_arn = func(project_name, state, config)
+        elif step_name == "build_and_push_docker":
+            fastapi_image, nginx_image = func(project_name, state, config)
+        elif step_name == "create_task_definition":
+            task_def_arn = func(project_name, state, config)
+        elif step_name == "setup_alb":
+            alb_arn, tg_arn, alb_dns = func(project_name, state, config)
+        elif step_name == "deploy_ecs_service":
+            func(project_name, state, config)
+        elif step_name == "configure_custom_domain":
+            func(project_name, state, config)
+        elif step_name == "test_endpoints":
+            func(project_name, state, config)
+        else:
+            func(project_name, state, config)
+
+# Init Command
+def init(project_name, domain_name, aws_region):
+    config = {
+        "project_name": project_name,
+        "domain_name": domain_name,
+        "aws_region": aws_region
+    }
+    config_file = f"{project_name}-config.yml"
+    with open(config_file, "w") as f:
+        yaml.dump(config, f)
+    print(f"Configuration file {config_file} created.")
+
+# Argument Parser
+parser = argparse.ArgumentParser(description="Crawl4AI Deployment Script")
+subparsers = parser.add_subparsers(dest="command")
+
+# Init Parser
+init_parser = subparsers.add_parser("init", help="Initialize configuration")
+init_parser.add_argument("--project", required=True, help="Project name")
+init_parser.add_argument("--domain", required=True, help="Domain name")
+init_parser.add_argument("--region", required=True, help="AWS region")
+
+# Deploy Parser
+deploy_parser = subparsers.add_parser("deploy", help="Deploy the project")
+deploy_parser.add_argument("--project", required=True, help="Project name")
+deploy_parser.add_argument("--force", action="store_true", help="Force redeployment from start")
+
+args = parser.parse_args()
+
+if args.command == "init":
+    init(args.project, args.domain, args.region)
+elif args.command == "deploy":
+    deploy(args.project, args.force)
+else:
+    parser.print_help()
--- a/deploy/aws/docker/.dockerignore
+++ b/deploy/aws/docker/.dockerignore
@@ -0,0 +1,31 @@
+# .dockerignore
+*
+
+# Allow specific files and directories when using local installation
+!crawl4ai/
+!docs/
+!deploy/docker/
+!setup.py
+!pyproject.toml
+!README.md
+!LICENSE
+!MANIFEST.in
+!setup.cfg
+!mkdocs.yml
+
+.git/
+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+.DS_Store
+.env
+.venv
+venv/
+tests/
+coverage.xml
+*.log
+*.swp
+*.egg-info/
+dist/
+build/
--- a/deploy/aws/docker/.llm.env.example
+++ b/deploy/aws/docker/.llm.env.example
@@ -0,0 +1,8 @@
+# LLM Provider Keys
+OPENAI_API_KEY=your_openai_key_here
+DEEPSEEK_API_KEY=your_deepseek_key_here
+ANTHROPIC_API_KEY=your_anthropic_key_here
+GROQ_API_KEY=your_groq_key_here
+TOGETHER_API_KEY=your_together_key_here
+MISTRAL_API_KEY=your_mistral_key_here
+GEMINI_API_TOKEN=your_gemini_key_here
--- a/deploy/aws/docker/README.md
+++ b/deploy/aws/docker/README.md
@@ -0,0 +1,847 @@
+# Crawl4AI Docker Guide 🐳
+
+## Table of Contents
+- [Prerequisites](#prerequisites)
+- [Installation](#installation)
+  - [Local Build](#local-build)
+  - [Docker Hub](#docker-hub)
+- [Dockerfile Parameters](#dockerfile-parameters)
+- [Using the API](#using-the-api)
+  - [Understanding Request Schema](#understanding-request-schema)
+  - [REST API Examples](#rest-api-examples)
+  - [Python SDK](#python-sdk)
+- [Metrics & Monitoring](#metrics--monitoring)
+- [Deployment Scenarios](#deployment-scenarios)
+- [Complete Examples](#complete-examples)
+- [Getting Help](#getting-help)
+
+## Prerequisites
+
+Before we dive in, make sure you have:
+- Docker installed and running (version 20.10.0 or higher)
+- At least 4GB of RAM available for the container
+- Python 3.10+ (if using the Python SDK)
+- Node.js 16+ (if using the Node.js examples)
+
+> 💡 **Pro tip**: Run `docker info` to check your Docker installation and available resources.
+
+## Installation
+
+### Local Build
+
+Let's get your local environment set up step by step!
+
+#### 1. Building the Image
+
+First, clone the repository and build the Docker image:
+
+```bash
+# Clone the repository
+git clone https://github.com/unclecode/crawl4ai.git
+cd crawl4ai/deploy
+
+# Build the Docker image
+docker build --platform=linux/amd64 --no-cache -t crawl4ai .
+
+# Or build for arm64
+docker build --platform=linux/arm64 --no-cache -t crawl4ai .
+```
+
+#### 2. Environment Setup
+
+If you plan to use LLMs (Language Models), you'll need to set up your API keys. Create a `.llm.env` file:
+
+```env
+# OpenAI
+OPENAI_API_KEY=sk-your-key
+
+# Anthropic
+ANTHROPIC_API_KEY=your-anthropic-key
+
+# DeepSeek
+DEEPSEEK_API_KEY=your-deepseek-key
+
+# Check out https://docs.litellm.ai/docs/providers for more providers!
+```
+
+> 🔑 **Note**: Keep your API keys secure! Never commit them to version control.
+
+#### 3. Running the Container
+
+You have several options for running the container:
+
+Basic run (no LLM support):
+```bash
+docker run -d -p 8000:8000 --name crawl4ai crawl4ai
+```
+
+With LLM support:
+```bash
+docker run -d -p 8000:8000 \
+  --env-file .llm.env \
+  --name crawl4ai \
+  crawl4ai
+```
+
+Using host environment variables (Not a good practice, but works for local testing):
+```bash
+docker run -d -p 8000:8000 \
+  --env-file .llm.env \
+  --env "$(env)" \
+  --name crawl4ai \
+  crawl4ai
+```
+
+#### Multi-Platform Build
+For distributing your image across different architectures, use `buildx`:
+
+```bash
+# Set up buildx builder
+docker buildx create --use
+
+# Build for multiple platforms
+docker buildx build \
+  --platform linux/amd64,linux/arm64 \
+  -t crawl4ai \
+  --push \
+  .
+```
+
+> 💡 **Note**: Multi-platform builds require Docker Buildx and need to be pushed to a registry.
+
+#### Development Build
+For development, you might want to enable all features:
+
+```bash
+docker build -t crawl4ai
+  --build-arg INSTALL_TYPE=all \
+  --build-arg PYTHON_VERSION=3.10 \
+  --build-arg ENABLE_GPU=true \
+  .
+```
+
+#### GPU-Enabled Build
+If you plan to use GPU acceleration:
+
+```bash
+docker build -t crawl4ai
+  --build-arg ENABLE_GPU=true \
+  deploy/docker/
+```
+
+### Build Arguments Explained
+
+| Argument | Description | Default | Options |
+|----------|-------------|---------|----------|
+| PYTHON_VERSION | Python version | 3.10 | 3.8, 3.9, 3.10 |
+| INSTALL_TYPE | Feature set | default | default, all, torch, transformer |
+| ENABLE_GPU | GPU support | false | true, false |
+| APP_HOME | Install path | /app | any valid path |
+
+### Build Best Practices
+
+1. **Choose the Right Install Type**
+   - `default`: Basic installation, smallest image, to be honest, I use this most of the time.
+   - `all`: Full features, larger image (include transformer, and nltk, make sure you really need them)
+
+2. **Platform Considerations**
+   - Let Docker auto-detect platform unless you need cross-compilation
+   - Use --platform for specific architecture requirements
+   - Consider buildx for multi-architecture distribution
+
+3. **Performance Optimization**
+   - The image automatically includes platform-specific optimizations
+   - AMD64 gets OpenMP optimizations
+   - ARM64 gets OpenBLAS optimizations
+
+### Docker Hub
+
+> 🚧 Coming soon! The image will be available at `crawl4ai`. Stay tuned!
+
+## Using the API
+
+In the following sections, we discuss two ways to communicate with the Docker server. One option is to use the client SDK that I developed for Python, and I will soon develop one for Node.js. I highly recommend this approach to avoid mistakes. Alternatively, you can take a more technical route by using the JSON structure and passing it to all the URLs, which I will explain in detail.
+
+### Python SDK
+
+The SDK makes things easier! Here's how to use it:
+
+```python
+from crawl4ai.docker_client import Crawl4aiDockerClient
+from crawl4ai import BrowserConfig, CrawlerRunConfig
+
+async def main():
+    async with Crawl4aiDockerClient(base_url="http://localhost:8000", verbose=True) as client:
+      # If JWT is enabled, you can authenticate like this: (more on this later)
+        # await client.authenticate("test@example.com")
+        
+        # Non-streaming crawl
+        results = await client.crawl(
+            ["https://example.com", "https://python.org"],
+            browser_config=BrowserConfig(headless=True),
+            crawler_config=CrawlerRunConfig()
+        )
+        print(f"Non-streaming results: {results}")
+        
+        # Streaming crawl
+        crawler_config = CrawlerRunConfig(stream=True)
+        async for result in await client.crawl(
+            ["https://example.com", "https://python.org"],
+            browser_config=BrowserConfig(headless=True),
+            crawler_config=crawler_config
+        ):
+            print(f"Streamed result: {result}")
+        
+        # Get schema
+        schema = await client.get_schema()
+        print(f"Schema: {schema}")
+
+if __name__ == "__main__":
+    asyncio.run(main())
+```
+
+`Crawl4aiDockerClient` is an async context manager that handles the connection for you. You can pass in optional parameters for more control:
+
+- `base_url` (str): Base URL of the Crawl4AI Docker server
+- `timeout` (float): Default timeout for requests in seconds
+- `verify_ssl` (bool): Whether to verify SSL certificates
+- `verbose` (bool): Whether to show logging output
+- `log_file` (str, optional): Path to log file if file logging is desired
+
+This client SDK generates a properly structured JSON request for the server's HTTP API.
+
+## Second Approach: Direct API Calls
+
+This is super important! The API expects a specific structure that matches our Python classes. Let me show you how it works.
+
+### Understanding Configuration Structure
+
+Let's dive deep into how configurations work in Crawl4AI. Every configuration object follows a consistent pattern of `type` and `params`. This structure enables complex, nested configurations while maintaining clarity.
+
+#### The Basic Pattern
+
+Try this in Python to understand the structure:
+```python
+from crawl4ai import BrowserConfig
+
+# Create a config and see its structure
+config = BrowserConfig(headless=True)
+print(config.dump())
+```
+
+This outputs:
+```json
+{
+    "type": "BrowserConfig",
+    "params": {
+        "headless": true
+    }
+}
+```
+
+#### Simple vs Complex Values
+
+The structure follows these rules:
+- Simple values (strings, numbers, booleans, lists) are passed directly
+- Complex values (classes, dictionaries) use the type-params pattern
+
+For example, with dictionaries:
+```json
+{
+    "browser_config": {
+        "type": "BrowserConfig",
+        "params": {
+            "headless": true,           // Simple boolean - direct value
+            "viewport": {               // Complex dictionary - needs type-params
+                "type": "dict",
+                "value": {
+                    "width": 1200,
+                    "height": 800
+                }
+            }
+        }
+    }
+}
+```
+
+#### Strategy Pattern and Nesting
+
+Strategies (like chunking or content filtering) demonstrate why we need this structure. Consider this chunking configuration:
+
+```json
+{
+    "crawler_config": {
+        "type": "CrawlerRunConfig",
+        "params": {
+            "chunking_strategy": {
+                "type": "RegexChunking",      // Strategy implementation
+                "params": {
+                    "patterns": ["\n\n", "\\.\\s+"]
+                }
+            }
+        }
+    }
+}
+```
+
+Here, `chunking_strategy` accepts any chunking implementation. The `type` field tells the system which strategy to use, and `params` configures that specific strategy.
+
+#### Complex Nested Example
+
+Let's look at a more complex example with content filtering:
+
+```json
+{
+    "crawler_config": {
+        "type": "CrawlerRunConfig",
+        "params": {
+            "markdown_generator": {
+                "type": "DefaultMarkdownGenerator",
+                "params": {
+                    "content_filter": {
+                        "type": "PruningContentFilter",
+                        "params": {
+                            "threshold": 0.48,
+                            "threshold_type": "fixed"
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+```
+
+This shows how deeply configurations can nest while maintaining a consistent structure.
+
+#### Quick Grammar Overview
+```
+config := {
+    "type": string,
+    "params": {
+        key: simple_value | complex_value
+    }
+}
+
+simple_value := string | number | boolean | [simple_value]
+complex_value := config | dict_value
+
+dict_value := {
+    "type": "dict",
+    "value": object
+}
+```
+
+#### Important Rules 🚨
+
+- Always use the type-params pattern for class instances
+- Use direct values for primitives (numbers, strings, booleans)
+- Wrap dictionaries with {"type": "dict", "value": {...}}
+- Arrays/lists are passed directly without type-params
+- All parameters are optional unless specifically required
+
+#### Pro Tip 💡
+
+The easiest way to get the correct structure is to:
+1. Create configuration objects in Python
+2. Use the `dump()` method to see their JSON representation
+3. Use that JSON in your API calls
+
+Example:
+```python
+from crawl4ai import CrawlerRunConfig, PruningContentFilter
+
+config = CrawlerRunConfig(
+    content_filter=PruningContentFilter(threshold=0.48)
+)
+print(config.dump())  # Use this JSON in your API calls
+```
+
+
+#### More Examples
+
+**Advanced Crawler Configuration**
+
+```json
+{
+    "urls": ["https://example.com"],
+    "crawler_config": {
+        "type": "CrawlerRunConfig",
+        "params": {
+            "cache_mode": "bypass",
+            "markdown_generator": {
+                "type": "DefaultMarkdownGenerator",
+                "params": {
+                    "content_filter": {
+                        "type": "PruningContentFilter",
+                        "params": {
+                            "threshold": 0.48,
+                            "threshold_type": "fixed",
+                            "min_word_threshold": 0
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+```
+
+**Extraction Strategy**:
+
+```json
+{
+    "crawler_config": {
+        "type": "CrawlerRunConfig",
+        "params": {
+            "extraction_strategy": {
+                "type": "JsonCssExtractionStrategy",
+                "params": {
+                    "schema": {
+                        "baseSelector": "article.post",
+                        "fields": [
+                            {"name": "title", "selector": "h1", "type": "text"},
+                            {"name": "content", "selector": ".content", "type": "html"}
+                        ]
+                    }
+                }
+            }
+        }
+    }
+}
+```
+
+**LLM Extraction Strategy**
+
+```json
+{
+  "crawler_config": {
+    "type": "CrawlerRunConfig",
+    "params": {
+      "extraction_strategy": {
+        "type": "LLMExtractionStrategy",
+        "params": {
+          "instruction": "Extract article title, author, publication date and main content",
+          "provider": "openai/gpt-4",
+          "api_token": "your-api-token",
+          "schema": {
+            "type": "dict",
+            "value": {
+              "title": "Article Schema",
+              "type": "object",
+              "properties": {
+                "title": {
+                  "type": "string",
+                  "description": "The article's headline"
+                },
+                "author": {
+                  "type": "string",
+                  "description": "The author's name"
+                },
+                "published_date": {
+                  "type": "string",
+                  "format": "date-time",
+                  "description": "Publication date and time"
+                },
+                "content": {
+                  "type": "string",
+                  "description": "The main article content"
+                }
+              },
+              "required": ["title", "content"]
+            }
+          }
+        }
+      }
+    }
+  }
+}
+```
+
+**Deep Crawler Example**
+
+```json
+{
+  "crawler_config": {
+    "type": "CrawlerRunConfig",
+    "params": {
+      "deep_crawl_strategy": {
+        "type": "BFSDeepCrawlStrategy",
+        "params": {
+          "max_depth": 3,
+          "max_pages": 100,
+          "filter_chain": {
+            "type": "FastFilterChain",
+            "params": {
+              "filters": [
+                {
+                  "type": "FastContentTypeFilter",
+                  "params": {
+                    "allowed_types": ["text/html", "application/xhtml+xml"]
+                  }
+                },
+                {
+                  "type": "FastDomainFilter",
+                  "params": {
+                    "allowed_domains": ["blog.*", "docs.*"],
+                    "blocked_domains": ["ads.*", "analytics.*"]
+                  }
+                },
+                {
+                  "type": "FastURLPatternFilter",
+                  "params": {
+                    "allowed_patterns": ["^/blog/", "^/docs/"],
+                    "blocked_patterns": [".*/ads/", ".*/sponsored/"]
+                  }
+                }
+              ]
+            }
+          },
+          "url_scorer": {
+            "type": "FastCompositeScorer",
+            "params": {
+              "scorers": [
+                {
+                  "type": "FastKeywordRelevanceScorer",
+                  "params": {
+                    "keywords": ["tutorial", "guide", "documentation"],
+                    "weight": 1.0
+                  }
+                },
+                {
+                  "type": "FastPathDepthScorer",
+                  "params": {
+                    "weight": 0.5,
+                    "preferred_depth": 2
+                  }
+                },
+                {
+                  "type": "FastFreshnessScorer",
+                  "params": {
+                    "weight": 0.8,
+                    "max_age_days": 365
+                  }
+                }
+              ]
+            }
+          }
+        }
+      }
+    }
+  }
+}
+```
+
+### REST API Examples
+
+Let's look at some practical examples:
+
+#### Simple Crawl
+
+```python
+import requests
+
+crawl_payload = {
+    "urls": ["https://example.com"],
+    "browser_config": {"headless": True},
+    "crawler_config": {"stream": False}
+}
+response = requests.post(
+    "http://localhost:8000/crawl",
+    # headers={"Authorization": f"Bearer {token}"},  # If JWT is enabled, more on this later
+    json=crawl_payload
+)
+print(response.json())  # Print the response for debugging
+```
+
+#### Streaming Results
+
+```python
+async def test_stream_crawl(session, token: str):
+    """Test the /crawl/stream endpoint with multiple URLs."""
+    url = "http://localhost:8000/crawl/stream"
+    payload = {
+        "urls": [
+            "https://example.com",
+            "https://example.com/page1",  
+            "https://example.com/page2",  
+            "https://example.com/page3",  
+        ],
+        "browser_config": {"headless": True, "viewport": {"width": 1200}},
+        "crawler_config": {"stream": True, "cache_mode": "aggressive"}
+    }
+
+    # headers = {"Authorization": f"Bearer {token}"} # If JWT is enabled, more on this later
+    
+    try:
+        async with session.post(url, json=payload, headers=headers) as response:
+            status = response.status
+            print(f"Status: {status} (Expected: 200)")
+            assert status == 200, f"Expected 200, got {status}"
+            
+            # Read streaming response line-by-line (NDJSON)
+            async for line in response.content:
+                if line:
+                    data = json.loads(line.decode('utf-8').strip())
+                    print(f"Streamed Result: {json.dumps(data, indent=2)}")
+    except Exception as e:
+        print(f"Error in streaming crawl test: {str(e)}")
+```
+
+## Metrics & Monitoring
+
+Keep an eye on your crawler with these endpoints:
+
+- `/health` - Quick health check
+- `/metrics` - Detailed Prometheus metrics
+- `/schema` - Full API schema
+
+Example health check:
+```bash
+curl http://localhost:8000/health
+```
+
+## Deployment Scenarios
+
+> 🚧 Coming soon! We'll cover:
+> - Kubernetes deployment
+> - Cloud provider setups (AWS, GCP, Azure)
+> - High-availability configurations
+> - Load balancing strategies
+
+## Complete Examples
+
+Check out the `examples` folder in our repository for full working examples! Here are two to get you started:
+[Using Client SDK](https://github.com/unclecode/crawl4ai/blob/main/docs/examples/docker_python_sdk_example.py)
+[Using REST API](https://github.com/unclecode/crawl4ai/blob/main/docs/examples/docker_python_rest_api_example.py)
+
+## Server Configuration
+
+The server's behavior can be customized through the `config.yml` file. Let's explore how to configure your Crawl4AI server for optimal performance and security.
+
+### Understanding config.yml
+
+The configuration file is located at `deploy/docker/config.yml`. You can either modify this file before building the image or mount a custom configuration when running the container.
+
+Here's a detailed breakdown of the configuration options:
+
+```yaml
+# Application Configuration
+app:
+  title: "Crawl4AI API"           # Server title in OpenAPI docs
+  version: "1.0.0"               # API version
+  host: "0.0.0.0"               # Listen on all interfaces
+  port: 8000                    # Server port
+  reload: True                  # Enable hot reloading (development only)
+  timeout_keep_alive: 300       # Keep-alive timeout in seconds
+
+# Rate Limiting Configuration
+rate_limiting:
+  enabled: True                 # Enable/disable rate limiting
+  default_limit: "100/minute"   # Rate limit format: "number/timeunit"
+  trusted_proxies: []          # List of trusted proxy IPs
+  storage_uri: "memory://"     # Use "redis://localhost:6379" for production
+
+# Security Configuration
+security:
+  enabled: false               # Master toggle for security features
+  jwt_enabled: true            # Enable JWT authentication
+  https_redirect: True         # Force HTTPS
+  trusted_hosts: ["*"]         # Allowed hosts (use specific domains in production)
+  headers:                     # Security headers
+    x_content_type_options: "nosniff"
+    x_frame_options: "DENY"
+    content_security_policy: "default-src 'self'"
+    strict_transport_security: "max-age=63072000; includeSubDomains"
+
+# Crawler Configuration
+crawler:
+  memory_threshold_percent: 95.0  # Memory usage threshold
+  rate_limiter:
+    base_delay: [1.0, 2.0]      # Min and max delay between requests
+  timeouts:
+    stream_init: 30.0           # Stream initialization timeout
+    batch_process: 300.0        # Batch processing timeout
+
+# Logging Configuration
+logging:
+  level: "INFO"                 # Log level (DEBUG, INFO, WARNING, ERROR)
+  format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+
+# Observability Configuration
+observability:
+  prometheus:
+    enabled: True              # Enable Prometheus metrics
+    endpoint: "/metrics"       # Metrics endpoint
+  health_check:
+    endpoint: "/health"        # Health check endpoint
+```
+
+### JWT Authentication
+
+When `security.jwt_enabled` is set to `true` in your config.yml, all endpoints require JWT authentication via bearer tokens. Here's how it works:
+
+#### Getting a Token
+```python
+POST /token
+Content-Type: application/json
+
+{
+    "email": "user@example.com"
+}
+```
+
+The endpoint returns:
+```json
+{
+    "email": "user@example.com",
+    "access_token": "eyJ0eXAiOiJKV1QiLCJhbGciOi...",
+    "token_type": "bearer"
+}
+```
+
+#### Using the Token
+Add the token to your requests:
+```bash
+curl -H "Authorization: Bearer eyJ0eXAiOiJKV1QiLCJhbGci..." http://localhost:8000/crawl
+```
+
+Using the Python SDK:
+```python
+from crawl4ai.docker_client import Crawl4aiDockerClient
+
+async with Crawl4aiDockerClient() as client:
+    # Authenticate first
+    await client.authenticate("user@example.com")
+    
+    # Now all requests will include the token automatically
+    result = await client.crawl(urls=["https://example.com"])
+```
+
+#### Production Considerations 💡
+The default implementation uses a simple email verification. For production use, consider:
+- Email verification via OTP/magic links
+- OAuth2 integration
+- Rate limiting token generation
+- Token expiration and refresh mechanisms
+- IP-based restrictions
+
+### Configuration Tips and Best Practices
+
+1. **Production Settings** 🏭
+
+   ```yaml
+   app:
+     reload: False              # Disable reload in production
+     timeout_keep_alive: 120    # Lower timeout for better resource management
+   
+   rate_limiting:
+     storage_uri: "redis://redis:6379"  # Use Redis for distributed rate limiting
+     default_limit: "50/minute"         # More conservative rate limit
+   
+   security:
+     enabled: true                      # Enable all security features
+     trusted_hosts: ["your-domain.com"] # Restrict to your domain
+   ```
+
+2. **Development Settings** 🛠️
+
+   ```yaml
+   app:
+     reload: True               # Enable hot reloading
+     timeout_keep_alive: 300    # Longer timeout for debugging
+   
+   logging:
+     level: "DEBUG"            # More verbose logging
+   ```
+
+3. **High-Traffic Settings** 🚦
+
+   ```yaml
+   crawler:
+     memory_threshold_percent: 85.0  # More conservative memory limit
+     rate_limiter:
+       base_delay: [2.0, 4.0]       # More aggressive rate limiting
+   ```
+
+### Customizing Your Configuration
+
+#### Method 1: Pre-build Configuration
+
+```bash
+# Copy and modify config before building
+cd crawl4ai/deploy
+vim custom-config.yml # Or use any editor
+
+# Build with custom config
+docker build --platform=linux/amd64 --no-cache -t crawl4ai:latest .
+```
+
+#### Method 2: Build-time Configuration
+
+Use a custom config during build:
+
+```bash
+# Build with custom config
+docker build --platform=linux/amd64 --no-cache \
+  --build-arg CONFIG_PATH=/path/to/custom-config.yml \ 
+  -t crawl4ai:latest .
+```
+
+#### Method 3: Runtime Configuration
+```bash
+# Mount custom config at runtime
+docker run -d -p 8000:8000 \
+  -v $(pwd)/custom-config.yml:/app/config.yml \
+  crawl4ai-server:prod
+```
+
+> 💡 Note: When using Method 2, `/path/to/custom-config.yml` is relative to deploy directory.
+> 💡 Note: When using Method 3, ensure your custom config file has all required fields as the container will use this instead of the built-in config.
+
+### Configuration Recommendations
+
+1. **Security First** 🔒
+   - Always enable security in production
+   - Use specific trusted_hosts instead of wildcards
+   - Set up proper rate limiting to protect your server
+   - Consider your environment before enabling HTTPS redirect
+
+2. **Resource Management** 💻
+   - Adjust memory_threshold_percent based on available RAM
+   - Set timeouts according to your content size and network conditions
+   - Use Redis for rate limiting in multi-container setups
+
+3. **Monitoring** 📊
+   - Enable Prometheus if you need metrics
+   - Set DEBUG logging in development, INFO in production
+   - Regular health check monitoring is crucial
+
+4. **Performance Tuning** ⚡
+   - Start with conservative rate limiter delays
+   - Increase batch_process timeout for large content
+   - Adjust stream_init timeout based on initial response times
+
+## Getting Help
+
+We're here to help you succeed with Crawl4AI! Here's how to get support:
+
+- 📖 Check our [full documentation](https://docs.crawl4ai.com)
+- 🐛 Found a bug? [Open an issue](https://github.com/unclecode/crawl4ai/issues)
+- 💬 Join our [Discord community](https://discord.gg/crawl4ai)
+- ⭐ Star us on GitHub to show support!
+
+## Summary
+
+In this guide, we've covered everything you need to get started with Crawl4AI's Docker deployment:
+- Building and running the Docker container
+- Configuring the environment
+- Making API requests with proper typing
+- Using the Python SDK
+- Monitoring your deployment
+
+Remember, the examples in the `examples` folder are your friends - they show real-world usage patterns that you can adapt for your needs.
+
+Keep exploring, and don't hesitate to reach out if you need help! We're building something amazing together. 🚀
+
+Happy crawling! 🕷️
--- a/deploy/aws/docker/api.py
+++ b/deploy/aws/docker/api.py
@@ -0,0 +1,442 @@
+import os
+import json
+import asyncio
+from typing import List, Tuple
+
+import logging
+from typing import Optional, AsyncGenerator
+from urllib.parse import unquote
+from fastapi import HTTPException, Request, status
+from fastapi.background import BackgroundTasks
+from fastapi.responses import JSONResponse
+from redis import asyncio as aioredis
+
+from crawl4ai import (
+    AsyncWebCrawler,
+    CrawlerRunConfig,
+    LLMExtractionStrategy,
+    CacheMode,
+    BrowserConfig,
+    MemoryAdaptiveDispatcher,
+    RateLimiter
+)
+from crawl4ai.utils import perform_completion_with_backoff
+from crawl4ai.content_filter_strategy import (
+    PruningContentFilter,
+    BM25ContentFilter,
+    LLMContentFilter
+)
+from crawl4ai.markdown_generation_strategy import DefaultMarkdownGenerator
+from crawl4ai.content_scraping_strategy import LXMLWebScrapingStrategy
+
+from utils import (
+    TaskStatus,
+    FilterType,
+    get_base_url,
+    is_task_id,
+    should_cleanup_task,
+    decode_redis_hash
+)
+
+logger = logging.getLogger(__name__)
+
+async def handle_llm_qa(
+    url: str,
+    query: str,
+    config: dict
+) -> str:
+    """Process QA using LLM with crawled content as context."""
+    try:
+        # Extract base URL by finding last '?q=' occurrence
+        last_q_index = url.rfind('?q=')
+        if last_q_index != -1:
+            url = url[:last_q_index]
+
+        # Get markdown content
+        async with AsyncWebCrawler() as crawler:
+            result = await crawler.arun(url)
+            if not result.success:
+                raise HTTPException(
+                    status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+                    detail=result.error_message
+                )
+            content = result.markdown_v2.fit_markdown
+
+        # Create prompt and get LLM response
+        prompt = f"""Use the following content as context to answer the question.
+    Content:
+    {content}
+
+    Question: {query}
+
+    Answer:"""
+
+        response = perform_completion_with_backoff(
+            provider=config["llm"]["provider"],
+            prompt_with_variables=prompt,
+            api_token=os.environ.get(config["llm"].get("api_key_env", ""))
+        )
+
+        return response.choices[0].message.content
+    except Exception as e:
+        logger.error(f"QA processing error: {str(e)}", exc_info=True)
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=str(e)
+        )
+
+async def process_llm_extraction(
+    redis: aioredis.Redis,
+    config: dict,
+    task_id: str,
+    url: str,
+    instruction: str,
+    schema: Optional[str] = None,
+    cache: str = "0"
+) -> None:
+    """Process LLM extraction in background."""
+    try:
+        # If config['llm'] has api_key then ignore the api_key_env
+        api_key = ""
+        if "api_key" in config["llm"]:
+            api_key = config["llm"]["api_key"]
+        else:
+            api_key = os.environ.get(config["llm"].get("api_key_env", None), "")
+        llm_strategy = LLMExtractionStrategy(
+            provider=config["llm"]["provider"],
+            api_token=api_key,
+            instruction=instruction,
+            schema=json.loads(schema) if schema else None,
+        )
+
+        cache_mode = CacheMode.ENABLED if cache == "1" else CacheMode.WRITE_ONLY
+
+        async with AsyncWebCrawler() as crawler:
+            result = await crawler.arun(
+                url=url,
+                config=CrawlerRunConfig(
+                    extraction_strategy=llm_strategy,
+                    scraping_strategy=LXMLWebScrapingStrategy(),
+                    cache_mode=cache_mode
+                )
+            )
+
+        if not result.success:
+            await redis.hset(f"task:{task_id}", mapping={
+                "status": TaskStatus.FAILED,
+                "error": result.error_message
+            })
+            return
+
+        try:
+            content = json.loads(result.extracted_content)
+        except json.JSONDecodeError:
+            content = result.extracted_content
+        await redis.hset(f"task:{task_id}", mapping={
+            "status": TaskStatus.COMPLETED,
+            "result": json.dumps(content)
+        })
+
+    except Exception as e:
+        logger.error(f"LLM extraction error: {str(e)}", exc_info=True)
+        await redis.hset(f"task:{task_id}", mapping={
+            "status": TaskStatus.FAILED,
+            "error": str(e)
+        })
+
+async def handle_markdown_request(
+    url: str,
+    filter_type: FilterType,
+    query: Optional[str] = None,
+    cache: str = "0",
+    config: Optional[dict] = None
+) -> str:
+    """Handle markdown generation requests."""
+    try:
+        decoded_url = unquote(url)
+        if not decoded_url.startswith(('http://', 'https://')):
+            decoded_url = 'https://' + decoded_url
+
+        if filter_type == FilterType.RAW:
+            md_generator = DefaultMarkdownGenerator()
+        else:
+            content_filter = {
+                FilterType.FIT: PruningContentFilter(),
+                FilterType.BM25: BM25ContentFilter(user_query=query or ""),
+                FilterType.LLM: LLMContentFilter(
+                    provider=config["llm"]["provider"],
+                    api_token=os.environ.get(config["llm"].get("api_key_env", None), ""),
+                    instruction=query or "Extract main content"
+                )
+            }[filter_type]
+            md_generator = DefaultMarkdownGenerator(content_filter=content_filter)
+
+        cache_mode = CacheMode.ENABLED if cache == "1" else CacheMode.WRITE_ONLY
+
+        async with AsyncWebCrawler() as crawler:
+            result = await crawler.arun(
+                url=decoded_url,
+                config=CrawlerRunConfig(
+                    markdown_generator=md_generator,
+                    scraping_strategy=LXMLWebScrapingStrategy(),
+                    cache_mode=cache_mode
+                )
+            )
+            
+            if not result.success:
+                raise HTTPException(
+                    status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+                    detail=result.error_message
+                )
+
+            return (result.markdown_v2.raw_markdown 
+                   if filter_type == FilterType.RAW 
+                   else result.markdown_v2.fit_markdown)
+
+    except Exception as e:
+        logger.error(f"Markdown error: {str(e)}", exc_info=True)
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=str(e)
+        )
+
+async def handle_llm_request(
+    redis: aioredis.Redis,
+    background_tasks: BackgroundTasks,
+    request: Request,
+    input_path: str,
+    query: Optional[str] = None,
+    schema: Optional[str] = None,
+    cache: str = "0",
+    config: Optional[dict] = None
+) -> JSONResponse:
+    """Handle LLM extraction requests."""
+    base_url = get_base_url(request)
+    
+    try:
+        if is_task_id(input_path):
+            return await handle_task_status(
+                redis, input_path, base_url
+            )
+
+        if not query:
+            return JSONResponse({
+                "message": "Please provide an instruction",
+                "_links": {
+                    "example": {
+                        "href": f"{base_url}/llm/{input_path}?q=Extract+main+content",
+                        "title": "Try this example"
+                    }
+                }
+            })
+
+        return await create_new_task(
+            redis,
+            background_tasks,
+            input_path,
+            query,
+            schema,
+            cache,
+            base_url,
+            config
+        )
+
+    except Exception as e:
+        logger.error(f"LLM endpoint error: {str(e)}", exc_info=True)
+        return JSONResponse({
+            "error": str(e),
+            "_links": {
+                "retry": {"href": str(request.url)}
+            }
+        }, status_code=status.HTTP_500_INTERNAL_SERVER_ERROR)
+
+async def handle_task_status(
+    redis: aioredis.Redis,
+    task_id: str,
+    base_url: str
+) -> JSONResponse:
+    """Handle task status check requests."""
+    task = await redis.hgetall(f"task:{task_id}")
+    if not task:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail="Task not found"
+        )
+
+    task = decode_redis_hash(task)
+    response = create_task_response(task, task_id, base_url)
+
+    if task["status"] in [TaskStatus.COMPLETED, TaskStatus.FAILED]:
+        if should_cleanup_task(task["created_at"]):
+            await redis.delete(f"task:{task_id}")
+
+    return JSONResponse(response)
+
+async def create_new_task(
+    redis: aioredis.Redis,
+    background_tasks: BackgroundTasks,
+    input_path: str,
+    query: str,
+    schema: Optional[str],
+    cache: str,
+    base_url: str,
+    config: dict
+) -> JSONResponse:
+    """Create and initialize a new task."""
+    decoded_url = unquote(input_path)
+    if not decoded_url.startswith(('http://', 'https://')):
+        decoded_url = 'https://' + decoded_url
+
+    from datetime import datetime
+    task_id = f"llm_{int(datetime.now().timestamp())}_{id(background_tasks)}"
+    
+    await redis.hset(f"task:{task_id}", mapping={
+        "status": TaskStatus.PROCESSING,
+        "created_at": datetime.now().isoformat(),
+        "url": decoded_url
+    })
+
+    background_tasks.add_task(
+        process_llm_extraction,
+        redis,
+        config,
+        task_id,
+        decoded_url,
+        query,
+        schema,
+        cache
+    )
+
+    return JSONResponse({
+        "task_id": task_id,
+        "status": TaskStatus.PROCESSING,
+        "url": decoded_url,
+        "_links": {
+            "self": {"href": f"{base_url}/llm/{task_id}"},
+            "status": {"href": f"{base_url}/llm/{task_id}"}
+        }
+    })
+
+def create_task_response(task: dict, task_id: str, base_url: str) -> dict:
+    """Create response for task status check."""
+    response = {
+        "task_id": task_id,
+        "status": task["status"],
+        "created_at": task["created_at"],
+        "url": task["url"],
+        "_links": {
+            "self": {"href": f"{base_url}/llm/{task_id}"},
+            "refresh": {"href": f"{base_url}/llm/{task_id}"}
+        }
+    }
+
+    if task["status"] == TaskStatus.COMPLETED:
+        response["result"] = json.loads(task["result"])
+    elif task["status"] == TaskStatus.FAILED:
+        response["error"] = task["error"]
+
+    return response
+
+async def stream_results(crawler: AsyncWebCrawler, results_gen: AsyncGenerator) -> AsyncGenerator[bytes, None]:
+    """Stream results with heartbeats and completion markers."""
+    import json
+    from utils import datetime_handler
+
+    try:
+        async for result in results_gen:
+            try:
+                result_dict = result.model_dump()
+                logger.info(f"Streaming result for {result_dict.get('url', 'unknown')}")
+                data = json.dumps(result_dict, default=datetime_handler) + "\n"
+                yield data.encode('utf-8')
+            except Exception as e:
+                logger.error(f"Serialization error: {e}")
+                error_response = {"error": str(e), "url": getattr(result, 'url', 'unknown')}
+                yield (json.dumps(error_response) + "\n").encode('utf-8')
+
+        yield json.dumps({"status": "completed"}).encode('utf-8')
+        
+    except asyncio.CancelledError:
+        logger.warning("Client disconnected during streaming")
+    finally:
+        try:
+            await crawler.close()
+        except Exception as e:
+            logger.error(f"Crawler cleanup error: {e}")
+
+async def handle_crawl_request(
+    urls: List[str],
+    browser_config: dict,
+    crawler_config: dict,
+    config: dict
+) -> dict:
+    """Handle non-streaming crawl requests."""
+    try:
+        browser_config = BrowserConfig.load(browser_config)
+        crawler_config = CrawlerRunConfig.load(crawler_config)
+
+        dispatcher = MemoryAdaptiveDispatcher(
+            memory_threshold_percent=config["crawler"]["memory_threshold_percent"],
+            rate_limiter=RateLimiter(
+                base_delay=tuple(config["crawler"]["rate_limiter"]["base_delay"])
+            )
+        )
+
+        async with AsyncWebCrawler(config=browser_config) as crawler:
+            results = await crawler.arun_many(
+                urls=urls,
+                config=crawler_config,
+                dispatcher=dispatcher
+            )
+            
+            return {
+                "success": True,
+                "results": [result.model_dump() for result in results]
+            }
+
+    except Exception as e:
+        logger.error(f"Crawl error: {str(e)}", exc_info=True)
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=str(e)
+        )
+
+async def handle_stream_crawl_request(
+    urls: List[str],
+    browser_config: dict,
+    crawler_config: dict,
+    config: dict
+) -> Tuple[AsyncWebCrawler, AsyncGenerator]:
+    """Handle streaming crawl requests."""
+    try:
+        browser_config = BrowserConfig.load(browser_config)
+        browser_config.verbose = True
+        crawler_config = CrawlerRunConfig.load(crawler_config)
+        crawler_config.scraping_strategy = LXMLWebScrapingStrategy()
+
+        dispatcher = MemoryAdaptiveDispatcher(
+            memory_threshold_percent=config["crawler"]["memory_threshold_percent"],
+            rate_limiter=RateLimiter(
+                base_delay=tuple(config["crawler"]["rate_limiter"]["base_delay"])
+            )
+        )
+
+        crawler = AsyncWebCrawler(config=browser_config)
+        await crawler.start()
+
+        results_gen = await crawler.arun_many(
+            urls=urls,
+            config=crawler_config,
+            dispatcher=dispatcher
+        )
+
+        return crawler, results_gen
+
+    except Exception as e:
+        if 'crawler' in locals():
+            await crawler.close()
+        logger.error(f"Stream crawl error: {str(e)}", exc_info=True)
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=str(e)
+        )
--- a/deploy/aws/docker/auth.py
+++ b/deploy/aws/docker/auth.py
@@ -0,0 +1,46 @@
+import os
+from datetime import datetime, timedelta, timezone
+from typing import Dict, Optional
+from jwt import JWT, jwk_from_dict
+from jwt.utils import get_int_from_datetime
+from fastapi import Depends, HTTPException
+from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
+from pydantic import EmailStr
+from pydantic.main import BaseModel
+import base64
+
+instance = JWT()
+security = HTTPBearer()
+SECRET_KEY = os.environ.get("SECRET_KEY", "mysecret")
+ACCESS_TOKEN_EXPIRE_MINUTES = 60
+
+def get_jwk_from_secret(secret: str):
+    """Convert a secret string into a JWK object."""
+    secret_bytes = secret.encode('utf-8')
+    b64_secret = base64.urlsafe_b64encode(secret_bytes).rstrip(b'=').decode('utf-8')
+    return jwk_from_dict({"kty": "oct", "k": b64_secret})
+
+def create_access_token(data: dict, expires_delta: Optional[timedelta] = None) -> str:
+    """Create a JWT access token with an expiration."""
+    to_encode = data.copy()
+    expire = datetime.now(timezone.utc) + (expires_delta or timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES))
+    to_encode.update({"exp": get_int_from_datetime(expire)})
+    signing_key = get_jwk_from_secret(SECRET_KEY)
+    return instance.encode(to_encode, signing_key, alg='HS256')
+
+def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)) -> Dict:
+    """Verify the JWT token from the Authorization header."""
+    token = credentials.credentials
+    verifying_key = get_jwk_from_secret(SECRET_KEY)
+    try:
+        payload = instance.decode(token, verifying_key, do_time_check=True, algorithms='HS256')
+        return payload
+    except Exception:
+        raise HTTPException(status_code=401, detail="Invalid or expired token")
+
+def get_token_dependency(config: Dict):
+    """Return the token dependency if JWT is enabled, else None."""
+    return verify_token if config.get("security", {}).get("jwt_enabled", False) else None
+
+class TokenRequest(BaseModel):
+    email: EmailStr
--- a/deploy/aws/docker/config.yml
+++ b/deploy/aws/docker/config.yml
@@ -0,0 +1,71 @@
+# Application Configuration
+app:
+  title: "Crawl4AI API"
+  version: "1.0.0"
+  host: "0.0.0.0"
+  port: 8000
+  reload: True
+  timeout_keep_alive: 300
+
+# Default LLM Configuration
+llm:
+  provider: "openai/gpt-4o-mini"
+  api_key_env: "OPENAI_API_KEY"
+  # api_key: sk-...  # If you pass the API key directly then api_key_env will be ignored
+
+# Redis Configuration
+redis:
+  host: "localhost"
+  port: 6379
+  db: 0
+  password: ""
+  ssl: False
+  ssl_cert_reqs: None
+  ssl_ca_certs: None
+  ssl_certfile: None
+  ssl_keyfile: None
+  ssl_cert_reqs: None
+  ssl_ca_certs: None
+  ssl_certfile: None
+  ssl_keyfile: None
+
+# Rate Limiting Configuration
+rate_limiting:
+  enabled: True
+  default_limit: "1000/minute"
+  trusted_proxies: []
+  storage_uri: "memory://"  # Use "redis://localhost:6379" for production
+
+# Security Configuration
+security:
+  enabled: true 
+  jwt_enabled: true 
+  https_redirect: false
+  trusted_hosts: ["*"]
+  headers:
+    x_content_type_options: "nosniff"
+    x_frame_options: "DENY"
+    content_security_policy: "default-src 'self'"
+    strict_transport_security: "max-age=63072000; includeSubDomains"
+
+# Crawler Configuration
+crawler:
+  memory_threshold_percent: 95.0
+  rate_limiter:
+    base_delay: [1.0, 2.0]
+  timeouts:
+    stream_init: 30.0  # Timeout for stream initialization
+    batch_process: 300.0  # Timeout for batch processing
+
+# Logging Configuration
+logging:
+  level: "INFO"
+  format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+
+# Observability Configuration
+observability:
+  prometheus:
+    enabled: True
+    endpoint: "/metrics"
+  health_check:
+    endpoint: "/health"
--- a/deploy/aws/docker/requirements.txt
+++ b/deploy/aws/docker/requirements.txt
@@ -0,0 +1,10 @@
+crawl4ai
+fastapi
+uvicorn
+gunicorn>=23.0.0
+slowapi>=0.1.9
+prometheus-fastapi-instrumentator>=7.0.2
+redis>=5.2.1
+jwt>=1.3.1
+dnspython>=2.7.0
+email-validator>=2.2.0
--- a/deploy/aws/docker/server.py
+++ b/deploy/aws/docker/server.py
@@ -0,0 +1,181 @@
+import os
+import sys
+import time
+from typing import List, Optional, Dict
+from fastapi import FastAPI, HTTPException, Request, Query, Path, Depends
+from fastapi.responses import StreamingResponse, RedirectResponse, PlainTextResponse, JSONResponse
+from fastapi.middleware.httpsredirect import HTTPSRedirectMiddleware
+from fastapi.middleware.trustedhost import TrustedHostMiddleware
+from pydantic import BaseModel, Field
+from slowapi import Limiter
+from slowapi.util import get_remote_address
+from prometheus_fastapi_instrumentator import Instrumentator
+from redis import asyncio as aioredis
+
+sys.path.append(os.path.dirname(os.path.realpath(__file__)))
+from utils import FilterType, load_config, setup_logging, verify_email_domain
+from api import (
+    handle_markdown_request,
+    handle_llm_qa,
+    handle_stream_crawl_request,
+    handle_crawl_request,
+    stream_results
+)
+from auth import create_access_token, get_token_dependency, TokenRequest  # Import from auth.py
+
+__version__ = "0.2.6"
+
+class CrawlRequest(BaseModel):
+    urls: List[str] = Field(min_length=1, max_length=100)
+    browser_config: Optional[Dict] = Field(default_factory=dict)
+    crawler_config: Optional[Dict] = Field(default_factory=dict)
+
+# Load configuration and setup
+config = load_config()
+setup_logging(config)
+
+# Initialize Redis
+redis = aioredis.from_url(config["redis"].get("uri", "redis://localhost"))
+
+# Initialize rate limiter
+limiter = Limiter(
+    key_func=get_remote_address,
+    default_limits=[config["rate_limiting"]["default_limit"]],
+    storage_uri=config["rate_limiting"]["storage_uri"]
+)
+
+app = FastAPI(
+    title=config["app"]["title"],
+    version=config["app"]["version"]
+)
+
+# Configure middleware
+def setup_security_middleware(app, config):
+    sec_config = config.get("security", {})
+    if sec_config.get("enabled", False):
+        if sec_config.get("https_redirect", False):
+            app.add_middleware(HTTPSRedirectMiddleware)
+        if sec_config.get("trusted_hosts", []) != ["*"]:
+            app.add_middleware(TrustedHostMiddleware, allowed_hosts=sec_config["trusted_hosts"])
+
+setup_security_middleware(app, config)
+
+# Prometheus instrumentation
+if config["observability"]["prometheus"]["enabled"]:
+    Instrumentator().instrument(app).expose(app)
+
+# Get token dependency based on config
+token_dependency = get_token_dependency(config)
+
+# Middleware for security headers
+@app.middleware("http")
+async def add_security_headers(request: Request, call_next):
+    response = await call_next(request)
+    if config["security"]["enabled"]:
+        response.headers.update(config["security"]["headers"])
+    return response
+
+# Token endpoint (always available, but usage depends on config)
+@app.post("/token")
+async def get_token(request_data: TokenRequest):
+    if not verify_email_domain(request_data.email):
+        raise HTTPException(status_code=400, detail="Invalid email domain")
+    token = create_access_token({"sub": request_data.email})
+    return {"email": request_data.email, "access_token": token, "token_type": "bearer"}
+
+# Endpoints with conditional auth
+@app.get("/md/{url:path}")
+@limiter.limit(config["rate_limiting"]["default_limit"])
+async def get_markdown(
+    request: Request,
+    url: str,
+    f: FilterType = FilterType.FIT,
+    q: Optional[str] = None,
+    c: Optional[str] = "0",
+    token_data: Optional[Dict] = Depends(token_dependency)
+):
+    result = await handle_markdown_request(url, f, q, c, config)
+    return PlainTextResponse(result)
+
+@app.get("/llm/{url:path}", description="URL should be without http/https prefix")
+async def llm_endpoint(
+    request: Request,
+    url: str = Path(...),
+    q: Optional[str] = Query(None),
+    token_data: Optional[Dict] = Depends(token_dependency)
+):
+    if not q:
+        raise HTTPException(status_code=400, detail="Query parameter 'q' is required")
+    if not url.startswith(('http://', 'https://')):
+        url = 'https://' + url
+    try:
+        answer = await handle_llm_qa(url, q, config)
+        return JSONResponse({"answer": answer})
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+@app.get("/schema")
+async def get_schema():
+    from crawl4ai import BrowserConfig, CrawlerRunConfig
+    return {"browser": BrowserConfig().dump(), "crawler": CrawlerRunConfig().dump()}
+
+@app.get(config["observability"]["health_check"]["endpoint"])
+async def health():
+    return {"status": "ok", "timestamp": time.time(), "version": __version__}
+
+@app.get(config["observability"]["prometheus"]["endpoint"])
+async def metrics():
+    return RedirectResponse(url=config["observability"]["prometheus"]["endpoint"])
+
+@app.post("/crawl")
+@limiter.limit(config["rate_limiting"]["default_limit"])
+async def crawl(
+    request: Request,
+    crawl_request: CrawlRequest,
+    token_data: Optional[Dict] = Depends(token_dependency)
+):
+    if not crawl_request.urls:
+        raise HTTPException(status_code=400, detail="At least one URL required")
+    
+    results = await handle_crawl_request(
+        urls=crawl_request.urls,
+        browser_config=crawl_request.browser_config,
+        crawler_config=crawl_request.crawler_config,
+        config=config
+    )
+
+    return JSONResponse(results)
+
+
+@app.post("/crawl/stream")
+@limiter.limit(config["rate_limiting"]["default_limit"])
+async def crawl_stream(
+    request: Request,
+    crawl_request: CrawlRequest,
+    token_data: Optional[Dict] = Depends(token_dependency)
+):
+    if not crawl_request.urls:
+        raise HTTPException(status_code=400, detail="At least one URL required")
+
+    crawler, results_gen = await handle_stream_crawl_request(
+        urls=crawl_request.urls,
+        browser_config=crawl_request.browser_config,
+        crawler_config=crawl_request.crawler_config,
+        config=config
+    )
+
+    return StreamingResponse(
+        stream_results(crawler, results_gen),
+        media_type='application/x-ndjson',
+        headers={'Cache-Control': 'no-cache', 'Connection': 'keep-alive', 'X-Stream-Status': 'active'}
+    )
+
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(
+        "server:app",
+        host=config["app"]["host"],
+        port=config["app"]["port"],
+        reload=config["app"]["reload"],
+        timeout_keep_alive=config["app"]["timeout_keep_alive"]
+    )
--- a/deploy/aws/docker/supervisord.conf
+++ b/deploy/aws/docker/supervisord.conf
@@ -0,0 +1,12 @@
+[supervisord]
+nodaemon=true
+
+[program:redis]
+command=redis-server
+autorestart=true
+priority=10
+
+[program:gunicorn]
+command=gunicorn --bind 0.0.0.0:8000 --workers 4 --threads 2 --timeout 300 --graceful-timeout 60 --keep-alive 65 --log-level debug --worker-class uvicorn.workers.UvicornWorker --max-requests 1000 --max-requests-jitter 50 server:app
+autorestart=true
+priority=20
--- a/deploy/aws/docker/utils.py
+++ b/deploy/aws/docker/utils.py
@@ -0,0 +1,66 @@
+import dns.resolver
+import logging
+import yaml
+from datetime import datetime
+from enum import Enum
+from pathlib import Path
+from fastapi import Request
+from typing import Dict, Optional
+
+class TaskStatus(str, Enum):
+    PROCESSING = "processing"
+    FAILED = "failed"
+    COMPLETED = "completed"
+
+class FilterType(str, Enum):
+    RAW = "raw"
+    FIT = "fit"
+    BM25 = "bm25"
+    LLM = "llm"
+
+def load_config() -> Dict:
+    """Load and return application configuration."""
+    config_path = Path(__file__).parent / "config.yml"
+    with open(config_path, "r") as config_file:
+        return yaml.safe_load(config_file)
+
+def setup_logging(config: Dict) -> None:
+    """Configure application logging."""
+    logging.basicConfig(
+        level=config["logging"]["level"],
+        format=config["logging"]["format"]
+    )
+
+def get_base_url(request: Request) -> str:
+    """Get base URL including scheme and host."""
+    return f"{request.url.scheme}://{request.url.netloc}"
+
+def is_task_id(value: str) -> bool:
+    """Check if the value matches task ID pattern."""
+    return value.startswith("llm_") and "_" in value
+
+def datetime_handler(obj: any) -> Optional[str]:
+    """Handle datetime serialization for JSON."""
+    if hasattr(obj, 'isoformat'):
+        return obj.isoformat()
+    raise TypeError(f"Object of type {type(obj)} is not JSON serializable")
+
+def should_cleanup_task(created_at: str) -> bool:
+    """Check if task should be cleaned up based on creation time."""
+    created = datetime.fromisoformat(created_at)
+    return (datetime.now() - created).total_seconds() > 3600
+
+def decode_redis_hash(hash_data: Dict[bytes, bytes]) -> Dict[str, str]:
+    """Decode Redis hash data from bytes to strings."""
+    return {k.decode('utf-8'): v.decode('utf-8') for k, v in hash_data.items()}
+
+
+
+def verify_email_domain(email: str) -> bool:
+    try:
+        domain = email.split('@')[1]
+        # Try to resolve MX records for the domain.
+        records = dns.resolver.resolve(domain, 'MX')
+        return True if records else False
+    except Exception as e:
+        return False
--- a/deploy/aws/howto.md
+++ b/deploy/aws/howto.md
@@ -0,0 +1,77 @@
+# Crawl4AI API Quickstart
+
+This document shows how to generate an API token and use it to call the `/crawl` and `/md` endpoints.
+
+---
+
+## 1. Crawl Example
+
+Send a POST request to `/crawl` with the following JSON payload:
+
+```json
+{
+  "urls": ["https://example.com"],
+  "browser_config": { "headless": true, "verbose": true },
+  "crawler_config": { "stream": false, "cache_mode": "enabled" }
+}
+```
+
+**cURL Command:**
+
+```bash
+curl -X POST "https://api.crawl4ai.com/crawl" \
+  -H "Authorization: Bearer YOUR_API_TOKEN" \
+  -H "Content-Type: application/json" \
+  -d '{
+        "urls": ["https://example.com"],
+        "browser_config": {"headless": true, "verbose": true},
+        "crawler_config": {"stream": false, "cache_mode": "enabled"}
+      }'
+```
+
+---
+
+## 2. Markdown Retrieval Example
+
+To retrieve markdown from a given URL (e.g., `https://example.com`), use:
+
+```bash
+curl -X GET "https://api.crawl4ai.com/md/example.com" \
+  -H "Authorization: Bearer YOUR_API_TOKEN"
+```
+
+---
+
+## 3. Python Code Example (Using `requests`)
+
+Below is a sample Python script that demonstrates using the `requests` library to call the API endpoints:
+
+```python
+import requests
+
+BASE_URL = "https://api.crawl4ai.com"
+TOKEN = "YOUR_API_TOKEN"  # Replace with your actual token
+
+headers = {
+    "Authorization": f"Bearer {TOKEN}",
+    "Content-Type": "application/json"
+}
+
+# Crawl endpoint example
+crawl_payload = {
+    "urls": ["https://example.com"],
+    "browser_config": {"headless": True, "verbose": True},
+    "crawler_config": {"stream": False, "cache_mode": "enabled"}
+}
+
+crawl_response = requests.post(f"{BASE_URL}/crawl", json=crawl_payload, headers=headers)
+print("Crawl Response:", crawl_response.json())
+
+# /md endpoint example
+md_response = requests.get(f"{BASE_URL}/md/example.com", headers=headers)
+print("Markdown Content:", md_response.text)
+```
+
+---
+
+Happy crawling!
--- a/deploy/aws/nginx/Dockerfile
+++ b/deploy/aws/nginx/Dockerfile
@@ -0,0 +1,2 @@
+FROM nginx:alpine
+COPY nginx.conf /etc/nginx/conf.d/default.conf
--- a/deploy/aws/nginx/nginx.conf
+++ b/deploy/aws/nginx/nginx.conf
@@ -0,0 +1,55 @@
+server {
+    listen 80;
+    server_name api.crawl4ai.com;
+
+    # Main logging settings
+    error_log /var/log/nginx/error.log debug;
+    access_log /var/log/nginx/access.log combined buffer=512k flush=1m;
+
+    # Timeout and buffering settings
+    proxy_connect_timeout 300;
+    proxy_send_timeout 300;
+    proxy_read_timeout 300;
+    send_timeout 300;
+    proxy_buffer_size 128k;
+    proxy_buffers 4 256k;
+    proxy_busy_buffers_size 256k;
+
+    # Health check location
+    location /health {
+        proxy_pass http://127.0.0.1:8000/health;
+        proxy_set_header Host $host;
+        proxy_set_header X-Real-IP $remote_addr;
+        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+        proxy_set_header X-Forwarded-Proto $scheme;
+    }
+
+    # Main proxy for application endpoints
+    location / {
+        proxy_pass http://127.0.0.1:8000;
+        proxy_set_header Host $host;
+        proxy_set_header X-Real-IP $remote_addr;
+        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+        proxy_set_header X-Forwarded-Proto $scheme;
+        add_header X-Debug-Info $request_uri;
+        proxy_request_buffering off;
+        proxy_http_version 1.1;
+        proxy_set_header Connection "";
+        proxy_buffering off;
+    }
+
+    # New endpoint: serve Nginx error log
+    location /nginx/error {
+        # Using "alias" to serve the error log file
+        alias /var/log/nginx/error.log;
+        # Optionally, you might restrict access with "allow" and "deny" directives.
+    }
+
+    # New endpoint: serve Nginx access log
+    location /nginx/access {
+        alias /var/log/nginx/access.log;
+    }
+
+    client_max_body_size 10M;
+    client_body_buffer_size 128k;
+}
--- a/deploy/aws/version.txt
+++ b/deploy/aws/version.txt
@@ -0,0 +1 @@
+v0.1.0
--- a/deploy/gcloud-function/Dockerfile
+++ b/deploy/gcloud-function/Dockerfile
@@ -0,0 +1,63 @@
+FROM --platform=linux/amd64 python:3.10-slim
+
+# Install system dependencies required for Chromium and Git
+RUN apt-get update && apt-get install -y \
+    python3-dev \
+    pkg-config \
+    libjpeg-dev \
+    gcc \
+    build-essential \
+    libnss3 \
+    libnspr4 \
+    libatk1.0-0 \
+    libatk-bridge2.0-0 \
+    libcups2 \
+    libdrm2 \
+    libxkbcommon0 \
+    libxcomposite1 \
+    libxdamage1 \
+    libxfixes3 \
+    libxrandr2 \
+    libgbm1 \
+    libasound2 \
+    libpango-1.0-0 \
+    libcairo2 \
+    procps \
+    git \
+    socat \
+    && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+
+# Make a directory for crawl4ai call it crawl4ai_repo
+# RUN mkdir crawl4ai_repo
+
+# # Clone Crawl4ai from the next branch and install it
+# RUN git clone --branch next https://github.com/unclecode/crawl4ai.git ./crawl4ai_repo \
+#     && cd crawl4ai_repo \
+#     && pip install . \
+#     && cd .. \
+#     && rm -rf crawl4ai_repo
+
+RUN python3 -m venv /app/venv
+ENV PATH="/app/venv/bin:$PATH"
+# RUN pip install git+https://github.com/unclecode/crawl4ai.git@next
+   
+# Copy requirements and install remaining dependencies
+COPY requirements.txt .
+RUN pip install -r requirements.txt
+
+# Copy application files
+COPY resources /app/resources
+COPY main.py .
+COPY start.sh .
+
+# Set permissions for Chrome binary and start script
+RUN chmod +x /app/resources/chrome/headless_shell && \
+    chmod -R 755 /app/resources/chrome && \
+    chmod +x start.sh
+
+ENV FUNCTION_TARGET=crawl
+EXPOSE 8080 9223
+
+CMD ["/app/start.sh"]
--- a/deploy/gcloud-function/config.yml
+++ b/deploy/gcloud-function/config.yml
@@ -0,0 +1,8 @@
+project_id: PROJECT_ID
+region: REGION_NAME
+artifact_repo: ARTIFACT_REPO_NAME
+function_name: FUNCTION_NAME
+memory: "2048MB"
+timeout: "540s"
+local_image: "gcr.io/ARTIFACT_REPO_NAME/crawl4ai:latest"
+test_query_url: "https://example.com"
--- a/deploy/gcloud-function/deploy.py
+++ b/deploy/gcloud-function/deploy.py
@@ -0,0 +1,187 @@
+#!/usr/bin/env python3
+import argparse
+import subprocess
+import sys
+import yaml
+import requests
+
+def run_command(cmd, explanation, require_confirm=True, allow_already_exists=False):
+    print("\n=== {} ===".format(explanation))
+    if require_confirm:
+        input("Press Enter to run: [{}]\n".format(cmd))
+    print("Running: {}".format(cmd))
+    result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
+    if result.returncode != 0:
+        if allow_already_exists and "ALREADY_EXISTS" in result.stderr:
+            print("Repository already exists, skipping creation.")
+            return ""
+        print("Error:\n{}".format(result.stderr))
+        sys.exit(1)
+    out = result.stdout.strip()
+    if out:
+        print("Output:\n{}".format(out))
+    return out
+
+def load_config():
+    try:
+        with open("config.yml", "r") as f:
+            config = yaml.safe_load(f)
+    except Exception as e:
+        print("Failed to load config.yml: {}".format(e))
+        sys.exit(1)
+    required = ["project_id", "region", "artifact_repo", "function_name", "local_image"]
+    for key in required:
+        if key not in config or not config[key]:
+            print("Missing required config parameter: {}".format(key))
+            sys.exit(1)
+    return config
+
+def deploy_function(config):
+    project_id     = config["project_id"]
+    region         = config["region"]
+    artifact_repo  = config["artifact_repo"]
+    function_name  = config["function_name"]
+    memory         = config.get("memory", "2048MB")
+    timeout        = config.get("timeout", "540s")
+    local_image    = config["local_image"]
+    test_query_url = config.get("test_query_url", "https://example.com")
+
+    # Repository image format: "<region>-docker.pkg.dev/<project_id>/<artifact_repo>/<function_name>:latest"
+    repo_image = f"{region}-docker.pkg.dev/{project_id}/{artifact_repo}/{function_name}:latest"
+
+    # 1. Create Artifact Registry repository (skip if exists)
+    cmd = f"gcloud artifacts repositories create {artifact_repo} --repository-format=docker --location={region} --project={project_id}"
+    run_command(cmd, "Creating Artifact Registry repository (if it doesn't exist)", allow_already_exists=True)
+
+    # 2. Tag the local Docker image with the repository image name
+    cmd = f"docker tag {local_image} {repo_image}"
+    run_command(cmd, "Tagging Docker image for Artifact Registry")
+
+    # 3. Authenticate Docker to Artifact Registry
+    cmd = f"gcloud auth configure-docker {region}-docker.pkg.dev"
+    run_command(cmd, "Authenticating Docker to Artifact Registry")
+
+    # 4. Push the tagged Docker image to Artifact Registry
+    cmd = f"docker push {repo_image}"
+    run_command(cmd, "Pushing Docker image to Artifact Registry")
+
+    # 5. Deploy the Cloud Function using the custom container
+    cmd = (
+        f"gcloud beta functions deploy {function_name} "
+        f"--gen2 "
+        f"--runtime=python310 "
+        f"--entry-point=crawl "
+        f"--region={region} "
+        f"--docker-repository={region}-docker.pkg.dev/{project_id}/{artifact_repo} "
+        f"--trigger-http "
+        f"--memory={memory} "
+        f"--timeout={timeout} "
+        f"--project={project_id}"
+    )
+    run_command(cmd, "Deploying Cloud Function using custom container")
+
+    # 6. Set the Cloud Function to allow public (unauthenticated) invocations
+    cmd = (
+        f"gcloud functions add-iam-policy-binding {function_name} "
+        f"--region={region} "
+        f"--member='allUsers' "
+        f"--role='roles/cloudfunctions.invoker' "
+        f"--project={project_id}"
+        f"--quiet"
+    )
+    run_command(cmd, "Setting Cloud Function IAM to allow public invocations")
+
+    # 7. Retrieve the deployed Cloud Function URL
+    cmd = (
+        f"gcloud functions describe {function_name} "
+        f"--region={region} "
+        f"--project={project_id} "
+        f"--format='value(serviceConfig.uri)'"
+    )
+    deployed_url = run_command(cmd, "Extracting deployed Cloud Function URL", require_confirm=False)
+    print("\nDeployed URL: {}\n".format(deployed_url))
+
+    # 8. Test the deployed function
+    test_url = f"{deployed_url}?url={test_query_url}"
+    print("Testing function with: {}".format(test_url))
+    try:
+        response = requests.get(test_url)
+        print("Response status: {}".format(response.status_code))
+        print("Response body:\n{}".format(response.text))
+        if response.status_code == 200:
+            print("Test successful!")
+        else:
+            print("Non-200 response; check function logs.")
+    except Exception as e:
+        print("Test request error: {}".format(e))
+        sys.exit(1)
+
+    # 9. Final usage help
+    print("\nDeployment complete!")
+    print("Invoke your function with:")
+    print(f"curl '{deployed_url}?url={test_query_url}'")
+    print("For further instructions, refer to your documentation.")
+
+def delete_function(config):
+    project_id    = config["project_id"]
+    region        = config["region"]
+    function_name = config["function_name"]
+    cmd = f"gcloud functions delete {function_name} --region={region} --project={project_id} --quiet"
+    run_command(cmd, "Deleting Cloud Function")
+
+def describe_function(config):
+    project_id    = config["project_id"]
+    region        = config["region"]
+    function_name = config["function_name"]
+    cmd = (
+        f"gcloud functions describe {function_name} "
+        f"--region={region} "
+        f"--project={project_id} "
+        f"--format='value(serviceConfig.uri)'"
+    )
+    deployed_url = run_command(cmd, "Describing Cloud Function to extract URL", require_confirm=False)
+    print("\nCloud Function URL: {}\n".format(deployed_url))
+
+def clear_all(config):
+    print("\n=== CLEAR ALL RESOURCES ===")
+    project_id    = config["project_id"]
+    region        = config["region"]
+    artifact_repo = config["artifact_repo"]
+
+    confirm = input("WARNING: This will DELETE the Cloud Function and the Artifact Registry repository. Are you sure? (y/N): ")
+    if confirm.lower() != "y":
+        print("Aborting clear operation.")
+        sys.exit(0)
+
+    # Delete the Cloud Function
+    delete_function(config)
+    # Delete the Artifact Registry repository
+    cmd = f"gcloud artifacts repositories delete {artifact_repo} --location={region} --project={project_id} --quiet"
+    run_command(cmd, "Deleting Artifact Registry repository", require_confirm=False)
+    print("All resources cleared.")
+
+def main():
+    parser = argparse.ArgumentParser(description="Deploy, delete, describe, or clear Cloud Function resources using config.yml")
+    subparsers = parser.add_subparsers(dest="command", required=True)
+
+    subparsers.add_parser("deploy", help="Deploy the Cloud Function")
+    subparsers.add_parser("delete", help="Delete the deployed Cloud Function")
+    subparsers.add_parser("describe", help="Describe the Cloud Function and return its URL")
+    subparsers.add_parser("clear", help="Delete the Cloud Function and Artifact Registry repository")
+
+    args = parser.parse_args()
+    config = load_config()
+
+    if args.command == "deploy":
+        deploy_function(config)
+    elif args.command == "delete":
+        delete_function(config)
+    elif args.command == "describe":
+        describe_function(config)
+    elif args.command == "clear":
+        clear_all(config)
+    else:
+        parser.print_help()
+
+if __name__ == "__main__":
+    main()
--- a/deploy/gcloud-function/guide.md
+++ b/deploy/gcloud-function/guide.md
@@ -0,0 +1,204 @@
+# Deploying Crawl4ai on Google Cloud Functions
+
+This guide explains how to deploy **Crawl4ai**—an open‑source web crawler library—on Google Cloud Functions Gen2 using a custom container. We assume your project folder already includes:
+
+- **Dockerfile:** Builds your container image (which installs Crawl4ai from its Git repository).
+- **start.sh:** Activates your virtual environment and starts the function (using the Functions Framework).
+- **main.py:** Contains your function logic with the entry point `crawl` (and imports Crawl4ai).
+
+The guide is divided into two parts:
+1. Manual deployment steps (using CLI commands)
+2. Automated deployment using a Python script (`deploy.py`)
+
+---
+
+## Part 1: Manual Deployment Process
+
+### Prerequisites
+
+- **Google Cloud Project:** Ensure your project is active and billing is enabled.
+- **Google Cloud CLI & Docker:** Installed and configured on your local machine.
+- **Permissions:** You must have rights to create Cloud Functions and Artifact Registry repositories.
+- **Files:** Your Dockerfile, start.sh, and main.py should be in the same directory.
+
+### Step 1: Build Your Docker Image
+
+Your Dockerfile packages Crawl4ai along with all its dependencies. Build your image with:
+
+```bash
+docker build -t gcr.io/<PROJECT_ID>/<FUNCTION_NAME>:latest .
+```
+
+Replace `<PROJECT_ID>` with your Google Cloud project ID and `<FUNCTION_NAME>` with your chosen function name (for example, `crawl4ai-t1`).
+
+### Step 2: Create an Artifact Registry Repository
+
+Cloud Functions Gen2 requires your custom container image to reside in an Artifact Registry repository. Create one by running:
+
+```bash
+gcloud artifacts repositories create <ARTIFACT_REPO> \
+  --repository-format=docker \
+  --location=<REGION> \
+  --project=<PROJECT_ID>
+```
+
+Replace `<ARTIFACT_REPO>` (for example, `crawl4ai`) and `<REGION>` (for example, `asia-east1`).  
+> **Note:** If you receive an `ALREADY_EXISTS` error, the repository is already created; simply proceed to the next step.
+
+### Step 3: Tag Your Docker Image
+
+Tag your locally built Docker image so it matches the Artifact Registry format:
+
+```bash
+docker tag gcr.io/<PROJECT_ID>/<FUNCTION_NAME>:latest <REGION>-docker.pkg.dev/<PROJECT_ID>/<ARTIFACT_REPO>/<FUNCTION_NAME>:latest
+```
+
+This step “renames” the image so you can push it to your repository.
+
+### Step 4: Authenticate Docker to Artifact Registry
+
+Configure Docker authentication to the Artifact Registry:
+
+```bash
+gcloud auth configure-docker <REGION>-docker.pkg.dev
+```
+
+This ensures Docker can securely push images to your registry using your Cloud credentials.
+
+### Step 5: Push the Docker Image
+
+Push the tagged image to Artifact Registry:
+
+```bash
+docker push <REGION>-docker.pkg.dev/<PROJECT_ID>/<ARTIFACT_REPO>/<FUNCTION_NAME>:latest
+```
+
+Once complete, your container image (with Crawl4ai installed) is hosted in Artifact Registry.
+
+### Step 6: Deploy the Cloud Function
+
+Deploy your function using the custom container image. Run:
+
+```bash
+gcloud beta functions deploy <FUNCTION_NAME> \
+  --gen2 \
+  --region=<REGION> \
+  --docker-repository=<REGION>-docker.pkg.dev/<PROJECT_ID>/<ARTIFACT_REPO> \
+  --trigger-http \
+  --memory=2048MB \
+  --timeout=540s \
+  --project=<PROJECT_ID>
+```
+
+This command tells Cloud Functions Gen2 to pull your container image from Artifact Registry and deploy it. Make sure your main.py defines the `crawl` entry point.
+
+### Step 7: Make the Function Public
+
+To allow external (unauthenticated) access, update the function’s IAM policy:
+
+```bash
+gcloud functions add-iam-policy-binding <FUNCTION_NAME> \
+  --region=<REGION> \
+  --member="allUsers" \
+  --role="roles/cloudfunctions.invoker" \
+  --project=<PROJECT_ID> \
+  --quiet
+```
+
+Using the `--quiet` flag ensures the command runs non‑interactively so the policy is applied immediately.
+
+### Step 8: Retrieve and Test Your Function URL
+
+Get the URL for your deployed function:
+
+```bash
+gcloud functions describe <FUNCTION_NAME> \
+  --region=<REGION> \
+  --project=<PROJECT_ID> \
+  --format='value(serviceConfig.uri)'
+```
+
+Test your deployment with a sample GET request (using curl or your browser):
+
+```bash
+curl "<FUNCTION_URL>?url=https://example.com"
+```
+
+Replace `<FUNCTION_URL>` with the output URL from the previous command. A successful test (HTTP status 200) means Crawl4ai is running on Cloud Functions.
+
+---
+
+## Part 2: Automated Deployment with deploy.py
+
+For a more streamlined process, use the provided `deploy.py` script. This Python script automates the manual steps, prompting you to confirm key actions and providing detailed logs throughout the process.
+
+### What deploy.py Does:
+
+- **Reads Parameters:** It loads a `config.yml` file containing all necessary parameters such as `project_id`, `region`, `artifact_repo`, `function_name`, `local_image`, etc.
+- **Creates/Skips Repository:** It creates the Artifact Registry repository (or skips if it already exists).
+- **Tags & Pushes:** It tags your local Docker image and pushes it to the Artifact Registry.
+- **Deploys the Function:** It deploys the Cloud Function with your custom container.
+- **Updates IAM:** It sets the IAM policy to allow public access (using the `--quiet` flag).
+- **Tests the Deployment:** It extracts the deployed URL and performs a test request.
+- **Additional Commands:** You can also use subcommands in the script to delete or describe the deployed function, or even clear all resources.
+
+### Example config.yml
+
+Create a `config.yml` file in the same folder as your Dockerfile. An example configuration:
+
+```yaml
+project_id: your-project-id
+region: asia-east1
+artifact_repo: crawl4ai
+function_name: crawl4ai-t1
+memory: "2048MB"
+timeout: "540s"
+local_image: "gcr.io/your-project-id/crawl4ai-t1:latest"
+test_query_url: "https://example.com"
+```
+
+### How to Use deploy.py
+
+- **Deploy the Function:**
+
+  ```bash
+  python deploy.py deploy
+  ```
+
+  The script will guide you through each step, display the output, and ask for confirmation before executing critical commands.
+
+- **Describe the Function:**
+
+  If you forget the function URL and want to retrieve it later:
+
+  ```bash
+  python deploy.py describe
+  ```
+
+- **Delete the Function:**
+
+  To remove just the Cloud Function:
+
+  ```bash
+  python deploy.py delete
+  ```
+
+- **Clear All Resources:**
+
+  To delete both the Cloud Function and the Artifact Registry repository:
+
+  ```bash
+  python deploy.py clear
+  ```
+
+---
+
+## Conclusion
+
+This guide has walked you through two deployment methods for Crawl4ai on Google Cloud Functions Gen2:
+
+1. **Manual Deployment:** Building your Docker image, pushing it to Artifact Registry, deploying the Cloud Function, and setting up IAM.
+2. **Automated Deployment:** Using `deploy.py` with a configuration file to handle the entire process interactively.
+
+By following these instructions, you can deploy, test, and manage your Crawl4ai-based Cloud Function with ease. Enjoy using Crawl4ai in your cloud environment!
+
--- a/deploy/gcloud-function/main.py
+++ b/deploy/gcloud-function/main.py
@@ -0,0 +1,158 @@
+# Cleanup Chrome process on module unload
+import atexit
+import asyncio
+import logging
+import functions_framework
+from flask import jsonify, Request
+import os
+import sys
+import time
+import subprocess
+import signal
+import requests
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+logger.info(f"Python version: {sys.version}")
+logger.info(f"Python path: {sys.path}")
+
+# Try to find where crawl4ai is coming from
+try:
+    import crawl4ai
+    logger.info(f"Crawl4AI module location: {crawl4ai.__file__}")
+    logger.info(f"Contents of crawl4ai: {dir(crawl4ai)}")
+except ImportError:
+    logger.error("Crawl4AI module not found")
+
+# Now attempt the import
+from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode, CrawlResult
+
+# Configure logging
+logging.basicConfig(level=logging.DEBUG)
+logger = logging.getLogger(__name__)
+
+# Paths and constants
+FUNCTION_DIR = os.path.dirname(os.path.realpath(__file__))
+CHROME_BINARY = os.path.join(FUNCTION_DIR, "resources/chrome/headless_shell")
+CDP_PORT = 9222
+
+def start_chrome():
+    """Start Chrome process synchronously with exponential backoff."""
+    logger.debug("Starting Chrome process...")
+    chrome_args = [
+        CHROME_BINARY,
+        f"--remote-debugging-port={CDP_PORT}",
+        "--remote-debugging-address=0.0.0.0",
+        "--no-sandbox",
+        "--disable-setuid-sandbox",
+        "--headless=new",
+        "--disable-gpu",
+        "--disable-dev-shm-usage",
+        "--no-zygote",
+        "--single-process",
+        "--disable-features=site-per-process",
+        "--no-first-run",
+        "--disable-extensions"
+    ]
+    
+    process = subprocess.Popen(
+        chrome_args,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        preexec_fn=os.setsid
+    )
+    
+    logger.debug(f"Chrome process started with PID: {process.pid}")
+    
+    # Wait for CDP endpoint with exponential backoff
+    wait_time = 1  # Start with 1 second
+    max_wait_time = 16  # Cap at 16 seconds per retry
+    max_attempts = 10  # Total attempts
+    for attempt in range(max_attempts):
+        try:
+            response = requests.get(f"http://127.0.0.1:{CDP_PORT}/json/version", timeout=2)
+            if response.status_code == 200:
+                # Get ws URL from response
+                ws_url = response.json()['webSocketDebuggerUrl']
+                logger.debug("Chrome CDP is ready")
+                logger.debug(f"CDP URL: {ws_url}")
+                return process
+        except requests.exceptions.ConnectionError:
+            logger.debug(f"Waiting for CDP endpoint (attempt {attempt + 1}/{max_attempts}), retrying in {wait_time} seconds")
+            time.sleep(wait_time)
+            wait_time = min(wait_time * 2, max_wait_time)  # Double wait time, up to max
+    
+    # If we get here, all retries failed
+    stdout, stderr = process.communicate()  # Get output for debugging
+    logger.error(f"Chrome stdout: {stdout.decode()}")
+    logger.error(f"Chrome stderr: {stderr.decode()}")
+    raise Exception("Chrome CDP endpoint failed to start after retries")
+
+async def fetch_with_crawl4ai(url: str) -> dict:
+    """Fetch page content using Crawl4ai and return the result object"""
+    # Get CDP URL from the running Chrome instance
+    version_response = requests.get(f'http://localhost:{CDP_PORT}/json/version')
+    cdp_url = version_response.json()['webSocketDebuggerUrl']
+    
+    # Configure and run Crawl4ai
+    browser_config = BrowserConfig(cdp_url=cdp_url, use_managed_browser=True)
+    async with AsyncWebCrawler(config=browser_config) as crawler:
+        crawler_config = CrawlerRunConfig(
+            cache_mode=CacheMode.BYPASS,
+        )
+        result : CrawlResult = await crawler.arun(
+            url=url, config=crawler_config
+        )
+        return result.model_dump()  # Convert Pydantic model to dict for JSON response
+
+# Start Chrome when the module loads
+logger.info("Starting Chrome process on module load")
+chrome_process = start_chrome()
+
+@functions_framework.http
+def crawl(request: Request):
+    """HTTP Cloud Function to fetch web content using Crawl4ai"""
+    try:
+        url = request.args.get('url')
+        if not url:
+            return jsonify({'error': 'URL parameter is required', 'status': 400}), 400
+        
+        # Create and run an asyncio event loop
+        loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(loop)
+        
+        try:
+            result = loop.run_until_complete(
+                asyncio.wait_for(fetch_with_crawl4ai(url), timeout=10.0)
+            )
+            return jsonify({
+                'status': 200,
+                'data': result
+            })
+        finally:
+            loop.close()
+            
+    except Exception as e:
+        error_msg = f"Unexpected error: {str(e)}"
+        logger.error(error_msg, exc_info=True)
+        return jsonify({
+            'error': error_msg,
+            'status': 500,
+            'details': {
+                'error_type': type(e).__name__,
+                'stack_trace': str(e),
+                'chrome_running': chrome_process.poll() is None if chrome_process else False
+            }
+        }), 500
+
+
+@atexit.register
+def cleanup():
+    """Cleanup Chrome process on shutdown"""
+    if chrome_process and chrome_process.poll() is None:
+        try:
+            os.killpg(os.getpgid(chrome_process.pid), signal.SIGTERM)
+            logger.info("Chrome process terminated")
+        except Exception as e:
+            logger.error(f"Failed to terminate Chrome process: {e}")
--- a/deploy/gcloud-function/requirements.txt
+++ b/deploy/gcloud-function/requirements.txt
@@ -0,0 +1,5 @@
+functions-framework==3.*
+flask==2.3.3
+requests==2.31.0
+websockets==12.0
+git+https://github.com/unclecode/crawl4ai.git@next
--- a/deploy/gcloud-function/resources/chrome/fonts.conf
+++ b/deploy/gcloud-function/resources/chrome/fonts.conf
@@ -0,0 +1,10 @@
+<?xml version="1.0" ?>
+<!DOCTYPE fontconfig SYSTEM "fonts.dtd">
+<fontconfig>
+  <dir>/var/task/.fonts</dir>
+  <dir>/var/task/fonts</dir>
+  <dir>/opt/fonts</dir>
+  <dir>/tmp/fonts</dir>
+  <cachedir>/tmp/fonts-cache/</cachedir>
+  <config></config>
+</fontconfig>
--- a/deploy/gcloud-function/resources/chrome/fonts/Open_Sans/OpenSans-Bold.ttf
+++ b/deploy/gcloud-function/resources/chrome/fonts/Open_Sans/OpenSans-Bold.ttf
--- a/deploy/gcloud-function/resources/chrome/fonts/Open_Sans/OpenSans-Italic.ttf
+++ b/deploy/gcloud-function/resources/chrome/fonts/Open_Sans/OpenSans-Italic.ttf
--- a/deploy/gcloud-function/resources/chrome/fonts/Open_Sans/OpenSans-Light.ttf
+++ b/deploy/gcloud-function/resources/chrome/fonts/Open_Sans/OpenSans-Light.ttf
--- a/deploy/gcloud-function/resources/chrome/fonts/Open_Sans/OpenSans-Regular.ttf
+++ b/deploy/gcloud-function/resources/chrome/fonts/Open_Sans/OpenSans-Regular.ttf
--- a/deploy/gcloud-function/resources/chrome/libvulkan.so.1
+++ b/deploy/gcloud-function/resources/chrome/libvulkan.so.1
--- a/deploy/gcloud-function/resources/chrome/vk_swiftshader_icd.json
+++ b/deploy/gcloud-function/resources/chrome/vk_swiftshader_icd.json
@@ -0,0 +1 @@
+{"file_format_version": "1.0.0", "ICD": {"library_path": "./libvk_swiftshader.so", "api_version": "1.0.5"}}
--- a/deploy/lambda/Dockerfile
+++ b/deploy/lambda/Dockerfile
@@ -0,0 +1,104 @@
+FROM python:3.12-bookworm AS python-builder
+
+RUN pip install poetry
+
+ENV POETRY_NO_INTERACTION=1 \
+    POETRY_CACHE_DIR=/tmp/poetry_cache
+
+WORKDIR /app
+
+COPY pyproject.toml poetry.lock ./
+RUN --mount=type=cache,target=$POETRY_CACHE_DIR poetry export -f requirements.txt -o requirements.txt
+
+# Install build dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    python3-dev \
+    python3-setuptools \
+    python3-wheel \
+    python3-pip \
+    gcc \
+    g++ \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install specific dependencies that have build issues
+RUN pip install --no-cache-dir cchardet
+
+FROM python:3.12-bookworm
+
+# Install AWS Lambda Runtime Interface Client
+RUN python3 -m pip install --no-cache-dir awslambdaric
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    curl \
+    wget \
+    gnupg \
+    git \
+    cmake \
+    pkg-config \
+    python3-dev \
+    libjpeg-dev \
+    redis-server \
+    supervisor \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    libglib2.0-0 \
+    libnss3 \
+    libnspr4 \
+    libatk1.0-0 \
+    libatk-bridge2.0-0 \
+    libcups2 \
+    libdrm2 \
+    libdbus-1-3 \
+    libxcb1 \
+    libxkbcommon0 \
+    libx11-6 \
+    libxcomposite1 \
+    libxdamage1 \
+    libxext6 \
+    libxfixes3 \
+    libxrandr2 \
+    libgbm1 \
+    libpango-1.0-0 \
+    libcairo2 \
+    libasound2 \
+    libatspi2.0-0 \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install build essentials for any compilations needed
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    python3-dev \
+    && rm -rf /var/lib/apt/lists/*
+
+# Set up function directory and browser path
+ARG FUNCTION_DIR="/function"
+RUN mkdir -p "${FUNCTION_DIR}/pw-browsers"
+RUN mkdir -p "/tmp/.crawl4ai"
+
+# Set critical environment variables
+ENV PLAYWRIGHT_BROWSERS_PATH="${FUNCTION_DIR}/pw-browsers" \
+    HOME="/tmp" \
+    CRAWL4_AI_BASE_DIRECTORY="/tmp/.crawl4ai"
+
+# Create Craw4ai base directory
+RUN mkdir -p ${CRAWL4_AI_BASE_DIRECTORY}
+
+RUN pip install --no-cache-dir faust-cchardet
+
+# Install Crawl4ai and dependencies
+RUN pip install --no-cache-dir git+https://github.com/unclecode/crawl4ai.git@next
+
+# Install Chromium only (no deps flag)
+RUN playwright install chromium
+
+# Copy function code
+COPY lambda_function.py ${FUNCTION_DIR}/
+
+# Set working directory
+WORKDIR ${FUNCTION_DIR}
+
+ENTRYPOINT [ "/usr/local/bin/python", "-m", "awslambdaric" ]
+CMD [ "lambda_function.handler" ]
--- a/deploy/lambda/deploy.py
+++ b/deploy/lambda/deploy.py
--- a/deploy/lambda/guide.md
+++ b/deploy/lambda/guide.md
@@ -0,0 +1,345 @@
+# Deploying Crawl4ai on AWS Lambda
+
+This guide walks you through deploying Crawl4ai as an AWS Lambda function with API Gateway integration. You'll learn how to set up, test, and clean up your deployment.
+
+## Prerequisites
+
+Before you begin, ensure you have:
+
+- AWS CLI installed and configured (`aws configure`)
+- Docker installed and running
+- Python 3.8+ installed
+- Basic familiarity with AWS services
+
+## Project Files
+
+Your project directory should contain:
+
+- `Dockerfile`: Container configuration for Lambda
+- `lambda_function.py`: Lambda handler code
+- `deploy.py`: Our deployment script
+
+## Step 1: Install Required Python Packages
+
+Install the Python packages needed for our deployment script:
+
+```bash
+pip install typer rich
+```
+
+## Step 2: Run the Deployment Script
+
+Our Python script automates the entire deployment process:
+
+```bash
+python deploy.py
+```
+
+The script will guide you through:
+
+1. Configuration setup (AWS region, function name, memory allocation)
+2. Docker image building
+3. ECR repository creation
+4. Lambda function deployment
+5. API Gateway setup
+6. Provisioned concurrency configuration (optional)
+
+Follow the prompts and confirm each step by pressing Enter.
+
+## Step 3: Manual Deployment (Alternative to the Script)
+
+If you prefer to deploy manually or understand what the script does, follow these steps:
+
+### Building and Pushing the Docker Image
+
+```bash
+# Build the Docker image
+docker build -t crawl4ai-lambda .
+
+# Create an ECR repository (if it doesn't exist)
+aws ecr create-repository --repository-name crawl4ai-lambda
+
+# Get ECR login password and login
+aws ecr get-login-password | docker login --username AWS --password-stdin $(aws sts get-caller-identity --query Account --output text).dkr.ecr.us-east-1.amazonaws.com
+
+# Tag the image
+ECR_URI=$(aws ecr describe-repositories --repository-names crawl4ai-lambda --query 'repositories[0].repositoryUri' --output text)
+docker tag crawl4ai-lambda:latest $ECR_URI:latest
+
+# Push the image to ECR
+docker push $ECR_URI:latest
+```
+
+### Creating the Lambda Function
+
+```bash
+# Get IAM role ARN (create it if needed)
+ROLE_ARN=$(aws iam get-role --role-name lambda-execution-role --query 'Role.Arn' --output text)
+
+# Create Lambda function
+aws lambda create-function \
+    --function-name crawl4ai-function \
+    --package-type Image \
+    --code ImageUri=$ECR_URI:latest \
+    --role $ROLE_ARN \
+    --timeout 300 \
+    --memory-size 4096 \
+    --ephemeral-storage Size=10240 \
+    --environment "Variables={CRAWL4_AI_BASE_DIRECTORY=/tmp/.crawl4ai,HOME=/tmp,PLAYWRIGHT_BROWSERS_PATH=/function/pw-browsers}"
+```
+
+If you're updating an existing function:
+
+```bash
+# Update function code
+aws lambda update-function-code \
+    --function-name crawl4ai-function \
+    --image-uri $ECR_URI:latest
+
+# Update function configuration
+aws lambda update-function-configuration \
+    --function-name crawl4ai-function \
+    --timeout 300 \
+    --memory-size 4096 \
+    --ephemeral-storage Size=10240 \
+    --environment "Variables={CRAWL4_AI_BASE_DIRECTORY=/tmp/.crawl4ai,HOME=/tmp,PLAYWRIGHT_BROWSERS_PATH=/function/pw-browsers}"
+```
+
+### Setting Up API Gateway
+
+```bash
+# Create API Gateway
+API_ID=$(aws apigateway create-rest-api --name crawl4ai-api --query 'id' --output text)
+
+# Get root resource ID
+PARENT_ID=$(aws apigateway get-resources --rest-api-id $API_ID --query 'items[?path==`/`].id' --output text)
+
+# Create resource
+RESOURCE_ID=$(aws apigateway create-resource --rest-api-id $API_ID --parent-id $PARENT_ID --path-part "crawl" --query 'id' --output text)
+
+# Create POST method
+aws apigateway put-method --rest-api-id $API_ID --resource-id $RESOURCE_ID --http-method POST --authorization-type NONE
+
+# Get Lambda function ARN
+LAMBDA_ARN=$(aws lambda get-function --function-name crawl4ai-function --query 'Configuration.FunctionArn' --output text)
+
+# Set Lambda integration
+aws apigateway put-integration \
+    --rest-api-id $API_ID \
+    --resource-id $RESOURCE_ID \
+    --http-method POST \
+    --type AWS_PROXY \
+    --integration-http-method POST \
+    --uri arn:aws:apigateway:us-east-1:lambda:path/2015-03-31/functions/$LAMBDA_ARN/invocations
+
+# Deploy API
+aws apigateway create-deployment --rest-api-id $API_ID --stage-name prod
+
+# Set Lambda permission
+ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
+aws lambda add-permission \
+    --function-name crawl4ai-function \
+    --statement-id apigateway \
+    --action lambda:InvokeFunction \
+    --principal apigateway.amazonaws.com \
+    --source-arn "arn:aws:execute-api:us-east-1:$ACCOUNT_ID:$API_ID/*/POST/crawl"
+```
+
+### Setting Up Provisioned Concurrency (Optional)
+
+This reduces cold starts:
+
+```bash
+# Publish a version
+VERSION=$(aws lambda publish-version --function-name crawl4ai-function --query 'Version' --output text)
+
+# Create alias
+aws lambda create-alias \
+    --function-name crawl4ai-function \
+    --name prod \
+    --function-version $VERSION
+
+# Configure provisioned concurrency
+aws lambda put-provisioned-concurrency-config \
+    --function-name crawl4ai-function \
+    --qualifier prod \
+    --provisioned-concurrent-executions 2
+
+# Update API Gateway to use alias
+LAMBDA_ALIAS_ARN="arn:aws:lambda:us-east-1:$ACCOUNT_ID:function:crawl4ai-function:prod"
+aws apigateway put-integration \
+    --rest-api-id $API_ID \
+    --resource-id $RESOURCE_ID \
+    --http-method POST \
+    --type AWS_PROXY \
+    --integration-http-method POST \
+    --uri arn:aws:apigateway:us-east-1:lambda:path/2015-03-31/functions/$LAMBDA_ALIAS_ARN/invocations
+
+# Redeploy API Gateway
+aws apigateway create-deployment --rest-api-id $API_ID --stage-name prod
+```
+
+## Step 4: Testing the Deployment
+
+Once deployed, test your function with:
+
+```bash
+ENDPOINT_URL="https://$API_ID.execute-api.us-east-1.amazonaws.com/prod/crawl"
+
+# Test with curl
+curl -X POST $ENDPOINT_URL \
+  -H "Content-Type: application/json" \
+  -d '{"url":"https://example.com"}'
+```
+
+Or using Python:
+
+```python
+import requests
+import json
+
+url = "https://your-api-id.execute-api.us-east-1.amazonaws.com/prod/crawl"
+payload = {
+    "url": "https://example.com",
+    "browser_config": {
+        "headless": True,
+        "verbose": False
+    },
+    "crawler_config": {
+        "crawler_config": {
+            "type": "CrawlerRunConfig",
+            "params": {
+                "markdown_generator": {
+                    "type": "DefaultMarkdownGenerator",
+                    "params": {
+                        "content_filter": {
+                            "type": "PruningContentFilter",
+                            "params": {
+                                "threshold": 0.48,
+                                "threshold_type": "fixed"
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+response = requests.post(url, json=payload)
+result = response.json()
+print(json.dumps(result, indent=2))
+```
+
+## Step 5: Cleaning Up Resources
+
+To remove all AWS resources created for this deployment:
+
+```bash
+python deploy.py cleanup
+```
+
+Or manually:
+
+```bash
+# Delete API Gateway
+aws apigateway delete-rest-api --rest-api-id $API_ID
+
+# Remove provisioned concurrency (if configured)
+aws lambda delete-provisioned-concurrency-config \
+    --function-name crawl4ai-function \
+    --qualifier prod
+
+# Delete alias (if created)
+aws lambda delete-alias \
+    --function-name crawl4ai-function \
+    --name prod
+
+# Delete Lambda function
+aws lambda delete-function --function-name crawl4ai-function
+
+# Delete ECR repository
+aws ecr delete-repository --repository-name crawl4ai-lambda --force
+```
+
+## Troubleshooting
+
+### Cold Start Issues
+
+If experiencing long cold starts:
+- Enable provisioned concurrency
+- Increase memory allocation (4096 MB recommended)
+- Ensure the Lambda function has enough ephemeral storage
+
+### Permission Errors
+
+If you encounter permission errors:
+- Check the IAM role has the necessary permissions
+- Ensure API Gateway has permission to invoke the Lambda function
+
+### Container Size Issues
+
+If your container is too large:
+- Optimize the Dockerfile
+- Use multi-stage builds
+- Consider removing unnecessary dependencies
+
+## Performance Considerations
+
+- Lambda memory affects CPU allocation - higher memory means faster execution
+- Provisioned concurrency eliminates cold starts but costs more
+- Optimize the Playwright setup for faster browser initialization
+
+## Security Best Practices
+
+- Use the principle of least privilege for IAM roles
+- Implement API Gateway authentication for production deployments
+- Consider using AWS KMS for storing sensitive configuration
+
+## Useful AWS Console Links
+
+Here are quick links to access important AWS console pages for monitoring and managing your deployment:
+
+| Resource | Console Link |
+|----------|-------------|
+| Lambda Functions | [AWS Lambda Console](https://console.aws.amazon.com/lambda/home#/functions) |
+| Lambda Function Logs | [CloudWatch Logs](https://console.aws.amazon.com/cloudwatch/home#logsV2:log-groups) |
+| API Gateway | [API Gateway Console](https://console.aws.amazon.com/apigateway/home) |
+| ECR Repositories | [ECR Console](https://console.aws.amazon.com/ecr/repositories) |
+| IAM Roles | [IAM Console](https://console.aws.amazon.com/iamv2/home#/roles) |
+| CloudWatch Metrics | [CloudWatch Metrics](https://console.aws.amazon.com/cloudwatch/home#metricsV2) |
+
+### Monitoring Lambda Execution
+
+To monitor your Lambda function:
+
+1. Go to the [Lambda function console](https://console.aws.amazon.com/lambda/home#/functions)
+2. Select your function (`crawl4ai-function`)
+3. Click the "Monitor" tab to see:
+   - Invocation metrics
+   - Success/failure rates
+   - Duration statistics
+
+### Viewing Lambda Logs
+
+To see detailed execution logs:
+
+1. Go to [CloudWatch Logs](https://console.aws.amazon.com/cloudwatch/home#logsV2:log-groups)
+2. Find the log group named `/aws/lambda/crawl4ai-function`
+3. Click to see the latest log streams
+4. Each stream contains logs from a function execution
+
+### Checking API Gateway Traffic
+
+To monitor API requests:
+
+1. Go to the [API Gateway console](https://console.aws.amazon.com/apigateway/home)
+2. Select your API (`crawl4ai-api`)
+3. Click "Dashboard" to see:
+   - API calls
+   - Latency
+   - Error rates
+
+## Conclusion
+
+You now have Crawl4ai running as a serverless function on AWS Lambda! This setup allows you to crawl websites on-demand without maintaining infrastructure, while paying only for the compute time you use.
--- a/deploy/lambda/lambda_function.py
+++ b/deploy/lambda/lambda_function.py
@@ -0,0 +1,107 @@
+import json
+import asyncio
+import os
+
+# Ensure environment variables and directories are set
+os.environ['CRAWL4_AI_BASE_DIRECTORY'] = '/tmp/.crawl4ai'
+os.environ['HOME'] = '/tmp'
+
+# Create directory if it doesn't exist
+os.makedirs('/tmp/.crawl4ai', exist_ok=True)
+
+from crawl4ai import (
+    AsyncWebCrawler,
+    BrowserConfig,
+    CrawlerRunConfig,
+    CacheMode
+)
+
+
+def handler(event, context):
+    # Parse the incoming event (API Gateway request)
+    try:
+        body = json.loads(event.get('body', '{}'))
+        
+        url = body.get('url')
+        if not url:
+            return {
+                'statusCode': 400,
+                'body': json.dumps({'error': 'URL is required'})
+            }
+        
+        # Get optional configurations or use defaults
+        browser_config_dict = body.get('browser_config', {})
+        crawler_config_dict = body.get('crawler_config', {})
+        
+        # Run the crawler
+        result = asyncio.run(crawl(url, browser_config_dict, crawler_config_dict))
+        
+        # Return successful response
+        return {
+            'statusCode': 200,
+            'headers': {
+                'Content-Type': 'application/json'
+            },
+            'body': json.dumps(result)
+        }
+    
+    except Exception as e:
+        # Handle errors
+        import traceback
+        return {
+            'statusCode': 500,
+            'body': json.dumps({
+                'error': str(e),
+                'traceback': traceback.format_exc()
+            })
+        }
+
+async def crawl(url, browser_config_dict, crawler_config_dict):
+    """
+    Run the crawler with the provided configurations, with Lambda-specific settings
+    """
+    # Start with user-provided config but override with Lambda-required settings
+    base_browser_config = BrowserConfig.load(browser_config_dict) if browser_config_dict else BrowserConfig()
+    
+    # Apply Lambda-specific browser configurations
+    browser_config = BrowserConfig(
+        verbose=True,
+        browser_type="chromium",
+        headless=True,
+        user_agent_mode="random",
+        light_mode=True,
+        use_managed_browser=False,
+        extra_args=[
+            "--headless=new",
+            "--no-sandbox",
+            "--disable-dev-shm-usage",
+            "--disable-setuid-sandbox",
+            "--remote-allow-origins=*",
+            "--autoplay-policy=user-gesture-required",
+            "--single-process",            
+        ],
+        # # Carry over any other settings from user config that aren't overridden
+        # **{k: v for k, v in base_browser_config.model_dump().items() 
+        #    if k not in ['verbose', 'browser_type', 'headless', 'user_agent_mode', 
+        #                'light_mode', 'use_managed_browser', 'extra_args']}
+    )
+    
+    # Start with user-provided crawler config but ensure cache is bypassed
+    base_crawler_config = CrawlerRunConfig.load(crawler_config_dict) if crawler_config_dict else CrawlerRunConfig()
+    
+    # Apply Lambda-specific crawler configurations
+    crawler_config = CrawlerRunConfig(
+        exclude_external_links=base_crawler_config.exclude_external_links,
+        remove_overlay_elements=True,
+        magic=True,
+        cache_mode=CacheMode.BYPASS,
+        # Carry over markdown generator and other settings
+        markdown_generator=base_crawler_config.markdown_generator
+    )
+    
+    # Perform the crawl with Lambda-optimized settings
+    async with AsyncWebCrawler(config=browser_config) as crawler:
+        result = await crawler.arun(url=url, config=crawler_config)
+        
+        # Return serializable results
+        return result.model_dump()
--- a/deploy/modal/crawl4ai_api_service.py
+++ b/deploy/modal/crawl4ai_api_service.py
@@ -0,0 +1,543 @@
+import os
+import time
+import uuid
+from datetime import datetime
+from typing import Dict, Any, Optional, List
+
+import modal
+from modal import Image, App, Volume, Secret, web_endpoint, function
+
+# Configuration
+APP_NAME = "crawl4ai-api"
+CRAWL4AI_VERSION = "next"  # Using the 'next' branch
+PYTHON_VERSION = "3.10"  # Compatible with playwright
+DEFAULT_CREDITS = 1000
+
+# Create a custom image with Crawl4ai and its dependencies
+image = Image.debian_slim(python_version=PYTHON_VERSION).pip_install(
+    ["fastapi[standard]", "pymongo", "pydantic"]
+).run_commands(
+    "apt-get update",
+    "apt-get install -y software-properties-common",
+    "apt-get install -y git",
+    "apt-add-repository non-free",
+    "apt-add-repository contrib",
+    # Install crawl4ai from the next branch
+    f"pip install -U git+https://github.com/unclecode/crawl4ai.git@{CRAWL4AI_VERSION}",
+    "pip install -U fastapi[standard]",
+    "pip install -U pydantic",
+    # Install playwright and browsers
+    "crawl4ai-setup",
+)
+
+# Create persistent volume for user database
+user_db = Volume.from_name("crawl4ai-users", create_if_missing=True)
+
+# Create admin secret for secure operations
+admin_secret = Secret.from_name("admin-secret", create_if_missing=True)
+
+# Define the app
+app = App(APP_NAME, image=image)
+
+# Default configurations
+DEFAULT_BROWSER_CONFIG = {
+    "headless": True,
+    "verbose": False,
+}
+
+DEFAULT_CRAWLER_CONFIG = {
+    "crawler_config": {
+        "type": "CrawlerRunConfig",
+        "params": {
+            "markdown_generator": {
+                "type": "DefaultMarkdownGenerator",
+                "params": {
+                    "content_filter": {
+                        "type": "PruningContentFilter",
+                        "params": {
+                            "threshold": 0.48,
+                            "threshold_type": "fixed"
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+# Database operations
+@app.function(volumes={"/data": user_db})
+def init_db() -> None:
+    """Initialize database with indexes."""
+    from pymongo import MongoClient, ASCENDING
+    
+    client = MongoClient("mongodb://localhost:27017")
+    db = client.crawl4ai_db
+    
+    # Ensure indexes for faster lookups
+    db.users.create_index([("api_token", ASCENDING)], unique=True)
+    db.users.create_index([("email", ASCENDING)], unique=True)
+    
+    # Create usage stats collection
+    db.usage_stats.create_index([("user_id", ASCENDING), ("timestamp", ASCENDING)])
+    
+    print("Database initialized with required indexes")
+
+@app.function(volumes={"/data": user_db})
+def get_user_by_token(api_token: str) -> Optional[Dict[str, Any]]:
+    """Get user by API token."""
+    from pymongo import MongoClient
+    from bson.objectid import ObjectId
+    
+    client = MongoClient("mongodb://localhost:27017")
+    db = client.crawl4ai_db
+    
+    user = db.users.find_one({"api_token": api_token})
+    if not user:
+        return None
+    
+    # Convert ObjectId to string for serialization
+    user["_id"] = str(user["_id"])
+    return user
+
+@app.function(volumes={"/data": user_db})
+def create_user(email: str, name: str) -> Dict[str, Any]:
+    """Create a new user with initial credits."""
+    from pymongo import MongoClient
+    from bson.objectid import ObjectId
+    
+    client = MongoClient("mongodb://localhost:27017")
+    db = client.crawl4ai_db
+    
+    # Generate API token
+    api_token = str(uuid.uuid4())
+    
+    user = {
+        "email": email,
+        "name": name,
+        "api_token": api_token,
+        "credits": DEFAULT_CREDITS,
+        "created_at": datetime.utcnow(),
+        "updated_at": datetime.utcnow(),
+        "is_active": True
+    }
+    
+    try:
+        result = db.users.insert_one(user)
+        user["_id"] = str(result.inserted_id)
+        return user
+    except Exception as e:
+        if "duplicate key error" in str(e):
+            return {"error": "User with this email already exists"}
+        raise
+
+@app.function(volumes={"/data": user_db})
+def update_user_credits(api_token: str, amount: int) -> Dict[str, Any]:
+    """Update user credits (add or subtract)."""
+    from pymongo import MongoClient
+    
+    client = MongoClient("mongodb://localhost:27017")
+    db = client.crawl4ai_db
+    
+    # First get current user to check credits
+    user = db.users.find_one({"api_token": api_token})
+    if not user:
+        return {"success": False, "error": "User not found"}
+    
+    # For deductions, ensure sufficient credits
+    if amount < 0 and user["credits"] + amount < 0:
+        return {"success": False, "error": "Insufficient credits"}
+    
+    # Update credits
+    result = db.users.update_one(
+        {"api_token": api_token},
+        {
+            "$inc": {"credits": amount},
+            "$set": {"updated_at": datetime.utcnow()}
+        }
+    )
+    
+    if result.modified_count == 1:
+        # Get updated user
+        updated_user = db.users.find_one({"api_token": api_token})
+        return {
+            "success": True, 
+            "credits": updated_user["credits"]
+        }
+    else:
+        return {"success": False, "error": "Failed to update credits"}
+
+@app.function(volumes={"/data": user_db})
+def log_usage(user_id: str, url: str, success: bool, error: Optional[str] = None) -> None:
+    """Log usage statistics."""
+    from pymongo import MongoClient
+    from bson.objectid import ObjectId
+    
+    client = MongoClient("mongodb://localhost:27017")
+    db = client.crawl4ai_db
+    
+    log_entry = {
+        "user_id": user_id,
+        "url": url,
+        "timestamp": datetime.utcnow(),
+        "success": success,
+        "error": error
+    }
+    
+    db.usage_stats.insert_one(log_entry)
+
+# Main crawling function
+@app.function(timeout=300)  # 5 minute timeout
+async def crawl(
+    url: str,
+    browser_config: Optional[Dict[str, Any]] = None,
+    crawler_config: Optional[Dict[str, Any]] = None,
+) -> Dict[str, Any]:
+    """
+    Crawl a given URL using Crawl4ai.
+    
+    Args:
+        url: The URL to crawl
+        browser_config: Optional browser configuration to override defaults
+        crawler_config: Optional crawler configuration to override defaults
+        
+    Returns:
+        A dictionary containing the crawl results
+    """
+    from crawl4ai import (
+        AsyncWebCrawler,
+        BrowserConfig,
+        CrawlerRunConfig,
+        CrawlResult
+    )
+
+    # Prepare browser config using the loader method
+    if browser_config is None:
+        browser_config = DEFAULT_BROWSER_CONFIG
+    browser_config_obj = BrowserConfig.load(browser_config)
+    
+    # Prepare crawler config using the loader method
+    if crawler_config is None:
+        crawler_config = DEFAULT_CRAWLER_CONFIG
+    crawler_config_obj = CrawlerRunConfig.load(crawler_config)    
+    
+    # Perform the crawl
+    async with AsyncWebCrawler(config=browser_config_obj) as crawler:
+        result: CrawlResult = await crawler.arun(url=url, config=crawler_config_obj)
+        
+        # Return serializable results
+        try:
+            # Try newer Pydantic v2 method
+            return result.model_dump()
+        except AttributeError:
+            try:
+                # Try older Pydantic v1 method
+                return result.dict()
+            except AttributeError:
+                # Fallback to manual conversion
+                return {
+                    "url": result.url,
+                    "title": result.title,
+                    "status": result.status,
+                    "content": str(result.content) if hasattr(result, "content") else None,
+                    "links": [{"url": link.url, "text": link.text} for link in result.links] if hasattr(result, "links") else [],
+                    "markdown_v2": {
+                        "raw_markdown": result.markdown_v2.raw_markdown if hasattr(result, "markdown_v2") else None
+                    }
+                }
+
+# API endpoints
+@app.function()
+@web_endpoint(method="POST")
+def crawl_endpoint(data: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Web endpoint that accepts POST requests with JSON data containing:
+    - api_token: User's API token
+    - url: The URL to crawl
+    - browser_config: Optional browser configuration
+    - crawler_config: Optional crawler configuration
+    
+    Returns the crawl results and remaining credits.
+    """
+    # Extract and validate API token
+    api_token = data.get("api_token")
+    if not api_token:
+        return {
+            "success": False,
+            "error": "API token is required",
+            "status_code": 401
+        }
+    
+    # Verify user
+    user = get_user_by_token.remote(api_token)
+    if not user:
+        return {
+            "success": False,
+            "error": "Invalid API token",
+            "status_code": 401
+        }
+    
+    if not user.get("is_active", False):
+        return {
+            "success": False,
+            "error": "Account is inactive",
+            "status_code": 403
+        }
+    
+    # Validate URL
+    url = data.get("url")
+    if not url:
+        return {
+            "success": False,
+            "error": "URL is required",
+            "status_code": 400
+        }
+    
+    # Check credits
+    if user.get("credits", 0) <= 0:
+        return {
+            "success": False,
+            "error": "Insufficient credits",
+            "status_code": 403
+        }
+    
+    # Deduct credit first (1 credit per call)
+    credit_result = update_user_credits.remote(api_token, -1)
+    if not credit_result.get("success", False):
+        return {
+            "success": False,
+            "error": credit_result.get("error", "Failed to process credits"),
+            "status_code": 500
+        }
+    
+    # Extract configs
+    browser_config = data.get("browser_config")
+    crawler_config = data.get("crawler_config")
+    
+    # Perform crawl
+    try:
+        start_time = time.time()
+        result = crawl.remote(url, browser_config, crawler_config)
+        execution_time = time.time() - start_time
+        
+        # Log successful usage
+        log_usage.spawn(user["_id"], url, True)
+        
+        return {
+            "success": True,
+            "data": result,
+            "credits_remaining": credit_result.get("credits"),
+            "execution_time_seconds": round(execution_time, 2),
+            "status_code": 200
+        }
+    except Exception as e:
+        # Log failed usage
+        log_usage.spawn(user["_id"], url, False, str(e))
+        
+        # Return error
+        return {
+            "success": False,
+            "error": f"Crawling error: {str(e)}",
+            "credits_remaining": credit_result.get("credits"),
+            "status_code": 500
+        }
+
+# Admin endpoints
+@app.function(secrets=[admin_secret])
+@web_endpoint(method="POST")
+def admin_create_user(data: Dict[str, Any]) -> Dict[str, Any]:
+    """Admin endpoint to create new users."""
+    # Validate admin token
+    admin_token = data.get("admin_token")
+    if admin_token != os.environ.get("ADMIN_TOKEN"):
+        return {
+            "success": False,
+            "error": "Invalid admin token",
+            "status_code": 401
+        }
+    
+    # Validate input
+    email = data.get("email")
+    name = data.get("name")
+    
+    if not email or not name:
+        return {
+            "success": False,
+            "error": "Email and name are required",
+            "status_code": 400
+        }
+    
+    # Create user
+    user = create_user.remote(email, name)
+    
+    if "error" in user:
+        return {
+            "success": False,
+            "error": user["error"],
+            "status_code": 400
+        }
+    
+    return {
+        "success": True,
+        "data": {
+            "user_id": user["_id"],
+            "email": user["email"],
+            "name": user["name"],
+            "api_token": user["api_token"],
+            "credits": user["credits"],
+            "created_at": user["created_at"].isoformat() if isinstance(user["created_at"], datetime) else user["created_at"]
+        },
+        "status_code": 201
+    }
+
+@app.function(secrets=[admin_secret])
+@web_endpoint(method="POST")
+def admin_update_credits(data: Dict[str, Any]) -> Dict[str, Any]:
+    """Admin endpoint to update user credits."""
+    # Validate admin token
+    admin_token = data.get("admin_token")
+    if admin_token != os.environ.get("ADMIN_TOKEN"):
+        return {
+            "success": False,
+            "error": "Invalid admin token",
+            "status_code": 401
+        }
+    
+    # Validate input
+    api_token = data.get("api_token")
+    amount = data.get("amount")
+    
+    if not api_token:
+        return {
+            "success": False,
+            "error": "API token is required",
+            "status_code": 400
+        }
+    
+    if not isinstance(amount, int):
+        return {
+            "success": False,
+            "error": "Amount must be an integer",
+            "status_code": 400
+        }
+    
+    # Update credits
+    result = update_user_credits.remote(api_token, amount)
+    
+    if not result.get("success", False):
+        return {
+            "success": False,
+            "error": result.get("error", "Failed to update credits"),
+            "status_code": 400
+        }
+    
+    return {
+        "success": True,
+        "data": {
+            "credits": result["credits"]
+        },
+        "status_code": 200
+    }
+
+@app.function(secrets=[admin_secret])
+@web_endpoint(method="GET")
+def admin_get_users(admin_token: str) -> Dict[str, Any]:
+    """Admin endpoint to list all users."""
+    # Validate admin token
+    if admin_token != os.environ.get("ADMIN_TOKEN"):
+        return {
+            "success": False,
+            "error": "Invalid admin token",
+            "status_code": 401
+        }
+    
+    users = get_all_users.remote()
+    
+    return {
+        "success": True,
+        "data": users,
+        "status_code": 200
+    }
+
+@app.function(volumes={"/data": user_db})
+def get_all_users() -> List[Dict[str, Any]]:
+    """Get all users (for admin)."""
+    from pymongo import MongoClient
+    
+    client = MongoClient("mongodb://localhost:27017")
+    db = client.crawl4ai_db
+    
+    users = []
+    for user in db.users.find():
+        # Convert ObjectId to string
+        user["_id"] = str(user["_id"])
+        # Convert datetime to ISO format
+        for field in ["created_at", "updated_at"]:
+            if field in user and isinstance(user[field], datetime):
+                user[field] = user[field].isoformat()
+        users.append(user)
+    
+    return users
+
+# Public endpoints
+@app.function()
+@web_endpoint(method="GET")
+def health_check() -> Dict[str, Any]:
+    """Health check endpoint."""
+    return {
+        "status": "online",
+        "service": APP_NAME,
+        "version": CRAWL4AI_VERSION,
+        "timestamp": datetime.utcnow().isoformat()
+    }
+
+@app.function()
+@web_endpoint(method="GET")
+def check_credits(api_token: str) -> Dict[str, Any]:
+    """Check user credits."""
+    if not api_token:
+        return {
+            "success": False,
+            "error": "API token is required",
+            "status_code": 401
+        }
+    
+    user = get_user_by_token.remote(api_token)
+    if not user:
+        return {
+            "success": False,
+            "error": "Invalid API token",
+            "status_code": 401
+        }
+    
+    return {
+        "success": True,
+        "data": {
+            "credits": user["credits"],
+            "email": user["email"],
+            "name": user["name"]
+        },
+        "status_code": 200
+    }
+
+# Local entrypoint for testing
+@app.local_entrypoint()
+def main(url: str = "https://www.modal.com"):
+    """Command line entrypoint for local testing."""
+    print("Initializing database...")
+    init_db.remote()
+    
+    print(f"Testing crawl on URL: {url}")
+    result = crawl.remote(url)
+    
+    # Print sample of result
+    print("\nCrawl Result Sample:")
+    if "title" in result:
+        print(f"Title: {result['title']}")
+    if "status" in result:
+        print(f"Status: {result['status']}")
+    if "links" in result:
+        print(f"Links found: {len(result['links'])}")
+    if "markdown_v2" in result and result["markdown_v2"] and "raw_markdown" in result["markdown_v2"]:
+        print("\nMarkdown Preview (first 300 chars):")
+        print(result["markdown_v2"]["raw_markdown"][:300] + "...")
--- a/deploy/modal/entry.py
+++ b/deploy/modal/entry.py
@@ -0,0 +1,127 @@
+import modal
+from typing import Optional, Dict, Any
+
+# Create a custom image with Crawl4ai and its dependencies
+# "pip install crawl4ai",
+image = modal.Image.debian_slim(python_version="3.10").pip_install(["fastapi[standard]"]).run_commands(
+    "apt-get update",
+    "apt-get install -y software-properties-common",
+    "apt-get install -y git",
+    "apt-add-repository non-free",
+    "apt-add-repository contrib",
+    "pip install -U git+https://github.com/unclecode/crawl4ai.git@next",
+    "pip install -U fastapi[standard]",
+    "pip install -U pydantic",
+    "crawl4ai-setup",  # This installs playwright and downloads chromium
+    # Print fastpi version
+    "python -m fastapi --version",
+)
+
+# Define the app
+app = modal.App("crawl4ai", image=image)
+
+# Define default configurations
+DEFAULT_BROWSER_CONFIG = {
+    "headless": True,
+    "verbose": False,
+}
+
+DEFAULT_CRAWLER_CONFIG = {
+    "crawler_config": {
+        "type": "CrawlerRunConfig",
+        "params": {
+            "markdown_generator": {
+                "type": "DefaultMarkdownGenerator",
+                "params": {
+                    "content_filter": {
+                        "type": "PruningContentFilter",
+                        "params": {
+                            "threshold": 0.48,
+                            "threshold_type": "fixed"
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+@app.function(timeout=300)  # 5 minute timeout
+async def crawl(
+    url: str,
+    browser_config: Optional[Dict[str, Any]] = None,
+    crawler_config: Optional[Dict[str, Any]] = None,
+) -> Dict[str, Any]:
+    """
+    Crawl a given URL using Crawl4ai.
+    
+    Args:
+        url: The URL to crawl
+        browser_config: Optional browser configuration to override defaults
+        crawler_config: Optional crawler configuration to override defaults
+        
+    Returns:
+        A dictionary containing the crawl results
+    """
+    from crawl4ai import (
+        AsyncWebCrawler,
+        BrowserConfig,
+        CrawlerRunConfig,
+        CrawlResult
+    )
+
+
+    # Prepare browser config using the loader method
+    if browser_config is None:
+        browser_config = DEFAULT_BROWSER_CONFIG
+    browser_config_obj = BrowserConfig.load(browser_config)
+    
+    # Prepare crawler config using the loader method
+    if crawler_config is None:
+        crawler_config = DEFAULT_CRAWLER_CONFIG
+    crawler_config_obj = CrawlerRunConfig.load(crawler_config)    
+    
+    
+    # Perform the crawl
+    async with AsyncWebCrawler(config=browser_config_obj) as crawler:
+        result: CrawlResult = await crawler.arun(url=url, config=crawler_config_obj)
+        
+        # Return serializable results
+        try:
+            # Try newer Pydantic v2 method
+            return result.model_dump()
+        except AttributeError:
+            try:
+                # Try older Pydantic v1 method
+                return result.__dict__
+            except AttributeError:
+                # Fallback to returning the raw result
+                return result
+
+@app.function()
+@modal.web_endpoint(method="POST")
+def crawl_endpoint(data: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Web endpoint that accepts POST requests with JSON data containing:
+    - url: The URL to crawl
+    - browser_config: Optional browser configuration
+    - crawler_config: Optional crawler configuration
+    
+    Returns the crawl results.
+    """
+    url = data.get("url")
+    if not url:
+        return {"error": "URL is required"}
+    
+    browser_config = data.get("browser_config")
+    crawler_config = data.get("crawler_config")
+    
+    return crawl.remote(url, browser_config, crawler_config)
+
+@app.local_entrypoint()
+def main(url: str = "https://www.modal.com"):
+    """
+    Command line entrypoint for local testing.
+    """
+    result = crawl.remote(url)
+    print(result)
--- a/deploy/modal/guide.md
+++ b/deploy/modal/guide.md
@@ -0,0 +1,453 @@
+# Deploying Crawl4ai with Modal: A Comprehensive Tutorial
+
+Hey there! UncleCode here. I'm excited to show you how to deploy Crawl4ai using Modal - a fantastic serverless platform that makes deployment super simple and scalable.
+
+In this tutorial, I'll walk you through deploying your own Crawl4ai instance on Modal's infrastructure. This will give you a powerful, scalable web crawling solution without having to worry about infrastructure management.
+
+## What is Modal?
+
+Modal is a serverless platform that allows you to run Python functions in the cloud without managing servers. It's perfect for deploying Crawl4ai because:
+
+1. It handles all the infrastructure for you
+2. It scales automatically based on demand
+3. It makes deployment incredibly simple
+
+## Prerequisites
+
+Before we get started, you'll need:
+
+- A Modal account (sign up at [modal.com](https://modal.com))
+- Python 3.10 or later installed on your local machine
+- Basic familiarity with Python and command-line operations
+
+## Step 1: Setting Up Your Modal Account
+
+First, sign up for a Modal account at [modal.com](https://modal.com) if you haven't already. Modal offers a generous free tier that's perfect for getting started.
+
+After signing up, install the Modal CLI and authenticate:
+
+```bash
+pip install modal
+modal token new
+```
+
+This will open a browser window where you can authenticate and generate a token for the CLI.
+
+## Step 2: Creating Your Crawl4ai Deployment
+
+Now, let's create a Python file called `crawl4ai_modal.py` with our deployment code:
+
+```python
+import modal
+from typing import Optional, Dict, Any
+
+# Create a custom image with Crawl4ai and its dependencies
+image = modal.Image.debian_slim(python_version="3.10").pip_install(
+    ["fastapi[standard]"]
+).run_commands(
+    "apt-get update",
+    "apt-get install -y software-properties-common",
+    "apt-get install -y git",
+    "apt-add-repository non-free",
+    "apt-add-repository contrib",
+    "pip install -U crawl4ai",
+    "pip install -U fastapi[standard]",
+    "pip install -U pydantic",
+    "crawl4ai-setup",  # This installs playwright and downloads chromium
+)
+
+# Define the app
+app = modal.App("crawl4ai", image=image)
+
+# Define default configurations
+DEFAULT_BROWSER_CONFIG = {
+    "headless": True,
+    "verbose": False,
+}
+
+DEFAULT_CRAWLER_CONFIG = {
+    "crawler_config": {
+        "type": "CrawlerRunConfig",
+        "params": {
+            "markdown_generator": {
+                "type": "DefaultMarkdownGenerator",
+                "params": {
+                    "content_filter": {
+                        "type": "PruningContentFilter",
+                        "params": {
+                            "threshold": 0.48,
+                            "threshold_type": "fixed"
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+@app.function(timeout=300)  # 5 minute timeout
+async def crawl(
+    url: str,
+    browser_config: Optional[Dict[str, Any]] = None,
+    crawler_config: Optional[Dict[str, Any]] = None,
+) -> Dict[str, Any]:
+    """
+    Crawl a given URL using Crawl4ai.
+    
+    Args:
+        url: The URL to crawl
+        browser_config: Optional browser configuration to override defaults
+        crawler_config: Optional crawler configuration to override defaults
+        
+    Returns:
+        A dictionary containing the crawl results
+    """
+    from crawl4ai import (
+        AsyncWebCrawler,
+        BrowserConfig,
+        CrawlerRunConfig,
+        CrawlResult
+    )
+
+    # Prepare browser config using the loader method
+    if browser_config is None:
+        browser_config = DEFAULT_BROWSER_CONFIG
+    browser_config_obj = BrowserConfig.load(browser_config)
+    
+    # Prepare crawler config using the loader method
+    if crawler_config is None:
+        crawler_config = DEFAULT_CRAWLER_CONFIG
+    crawler_config_obj = CrawlerRunConfig.load(crawler_config)    
+    
+    # Perform the crawl
+    async with AsyncWebCrawler(config=browser_config_obj) as crawler:
+        result: CrawlResult = await crawler.arun(url=url, config=crawler_config_obj)
+        
+        # Return serializable results
+        try:
+            # Try newer Pydantic v2 method
+            return result.model_dump()
+        except AttributeError:
+            try:
+                # Try older Pydantic v1 method
+                return result.dict()
+            except AttributeError:
+                # Fallback to manual conversion
+                return {
+                    "url": result.url,
+                    "title": result.title,
+                    "status": result.status,
+                    "content": str(result.content) if hasattr(result, "content") else None,
+                    "links": [{"url": link.url, "text": link.text} for link in result.links] if hasattr(result, "links") else [],
+                    "markdown_v2": {
+                        "raw_markdown": result.markdown_v2.raw_markdown if hasattr(result, "markdown_v2") else None
+                    }
+                }
+
+@app.function()
+@modal.web_endpoint(method="POST")
+def crawl_endpoint(data: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Web endpoint that accepts POST requests with JSON data containing:
+    - url: The URL to crawl
+    - browser_config: Optional browser configuration
+    - crawler_config: Optional crawler configuration
+    
+    Returns the crawl results.
+    """
+    url = data.get("url")
+    if not url:
+        return {"error": "URL is required"}
+    
+    browser_config = data.get("browser_config")
+    crawler_config = data.get("crawler_config")
+    
+    return crawl.remote(url, browser_config, crawler_config)
+
+@app.local_entrypoint()
+def main(url: str = "https://www.modal.com"):
+    """
+    Command line entrypoint for local testing.
+    """
+    result = crawl.remote(url)
+    print(result)
+```
+
+## Step 3: Understanding the Code Components
+
+Let's break down what's happening in this code:
+
+### 1. Image Definition
+
+```python
+image = modal.Image.debian_slim(python_version="3.10").pip_install(
+    ["fastapi[standard]"]
+).run_commands(
+    "apt-get update",
+    "apt-get install -y software-properties-common",
+    "apt-get install -y git",
+    "apt-add-repository non-free",
+    "apt-add-repository contrib",
+    "pip install -U git+https://github.com/unclecode/crawl4ai.git@next",
+    "pip install -U fastapi[standard]",
+    "pip install -U pydantic",
+    "crawl4ai-setup",  # This installs playwright and downloads chromium
+)
+```
+
+This section defines the container image that Modal will use to run your code. It:
+- Starts with a Debian Slim base image with Python 3.10
+- Installs FastAPI
+- Updates the system packages
+- Installs Git and other dependencies
+- Installs Crawl4ai from the GitHub repository
+- Runs the Crawl4ai setup to install Playwright and download Chromium
+
+### 2. Modal App Definition
+
+```python
+app = modal.App("crawl4ai", image=image)
+```
+
+This creates a Modal application named "crawl4ai" that uses the image we defined above.
+
+### 3. Default Configurations
+
+```python
+DEFAULT_BROWSER_CONFIG = {
+    "headless": True,
+    "verbose": False,
+}
+
+DEFAULT_CRAWLER_CONFIG = {
+    "crawler_config": {
+        "type": "CrawlerRunConfig",
+        "params": {
+            "markdown_generator": {
+                "type": "DefaultMarkdownGenerator",
+                "params": {
+                    "content_filter": {
+                        "type": "PruningContentFilter",
+                        "params": {
+                            "threshold": 0.48,
+                            "threshold_type": "fixed"
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+```
+
+These define the default configurations for the browser and crawler. You can customize these settings based on your specific needs.
+
+### 4. The Crawl Function
+
+```python
+@app.function(timeout=300)
+async def crawl(url, browser_config, crawler_config):
+    # Function implementation
+```
+
+This is the main function that performs the crawling. It:
+- Takes a URL and optional configurations
+- Sets up the browser and crawler with those configurations
+- Performs the crawl
+- Returns the results in a serializable format
+
+The `@app.function(timeout=300)` decorator tells Modal to run this function in the cloud with a 5-minute timeout.
+
+### 5. The Web Endpoint
+
+```python
+@app.function()
+@modal.web_endpoint(method="POST")
+def crawl_endpoint(data: Dict[str, Any]) -> Dict[str, Any]:
+    # Function implementation
+```
+
+This creates a web endpoint that accepts POST requests. It:
+- Extracts the URL and configurations from the request
+- Calls the crawl function with those parameters
+- Returns the results
+
+### 6. Local Entrypoint
+
+```python
+@app.local_entrypoint()
+def main(url: str = "https://www.modal.com"):
+    # Function implementation
+```
+
+This provides a way to test the application from the command line.
+
+## Step 4: Testing Locally
+
+Before deploying, let's test our application locally:
+
+```bash
+modal run crawl4ai_modal.py --url "https://example.com"
+```
+
+This command will:
+1. Upload your code to Modal
+2. Create the necessary containers
+3. Run the `main` function with the specified URL
+4. Return the results
+
+Modal will handle all the infrastructure setup for you. You should see the crawling results printed to your console.
+
+## Step 5: Deploying Your Application
+
+Once you're satisfied with the local testing, it's time to deploy:
+
+```bash
+modal deploy crawl4ai_modal.py
+```
+
+This will deploy your application to Modal's cloud. The deployment process will output URLs for your web endpoints.
+
+You should see output similar to:
+
+```
+✓ Deployed crawl4ai.
+  URLs:
+    crawl_endpoint => https://your-username--crawl-endpoint.modal.run
+```
+
+Save this URL - you'll need it to make requests to your deployment.
+
+## Step 6: Using Your Deployment
+
+Now that your application is deployed, you can use it by sending POST requests to the endpoint URL:
+
+```bash
+curl -X POST https://your-username--crawl-endpoint.modal.run \
+  -H "Content-Type: application/json" \
+  -d '{"url": "https://example.com"}'
+```
+
+Or in Python:
+
+```python
+import requests
+
+response = requests.post(
+    "https://your-username--crawl-endpoint.modal.run",
+    json={"url": "https://example.com"}
+)
+
+result = response.json()
+print(result)
+```
+
+You can also customize the browser and crawler configurations:
+
+```python
+requests.post(
+    "https://your-username--crawl-endpoint.modal.run",
+    json={
+        "url": "https://example.com",
+        "browser_config": {
+            "headless": False,
+            "verbose": True
+        },
+        "crawler_config": {
+            "crawler_config": {
+                "type": "CrawlerRunConfig",
+                "params": {
+                    "markdown_generator": {
+                        "type": "DefaultMarkdownGenerator",
+                        "params": {
+                            "content_filter": {
+                                "type": "PruningContentFilter",
+                                "params": {
+                                    "threshold": 0.6,  # Adjusted threshold
+                                    "threshold_type": "fixed"
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+)
+```
+
+## Step 7: Calling Your Deployment from Another Python Script
+
+You can also call your deployed function directly from another Python script:
+
+```python
+import modal
+
+# Get a reference to the deployed function
+crawl_function = modal.Function.from_name("crawl4ai", "crawl")
+
+# Call the function
+result = crawl_function.remote("https://example.com")
+print(result)
+```
+
+## Understanding Modal's Execution Flow
+
+To understand how Modal works, it's important to know:
+
+1. **Local vs. Remote Execution**: When you call a function with `.remote()`, it runs in Modal's cloud, not on your local machine.
+
+2. **Container Lifecycle**: Modal creates containers on-demand and destroys them when they're not needed.
+
+3. **Caching**: Modal caches your container images to speed up subsequent runs.
+
+4. **Serverless Scaling**: Modal automatically scales your application based on demand.
+
+## Customizing Your Deployment
+
+You can customize your deployment in several ways:
+
+### Changing the Crawl4ai Version
+
+To use a different version of Crawl4ai, update the installation command in the image definition:
+
+```python
+"pip install -U git+https://github.com/unclecode/crawl4ai.git@main",  # Use main branch
+```
+
+### Adjusting Resource Limits
+
+You can change the resources allocated to your functions:
+
+```python
+@app.function(timeout=600, cpu=2, memory=4096)  # 10 minute timeout, 2 CPUs, 4GB RAM
+async def crawl(...):
+    # Function implementation
+```
+
+### Keeping Containers Warm
+
+To reduce cold start times, you can keep containers warm:
+
+```python
+@app.function(keep_warm=1)  # Keep 1 container warm
+async def crawl(...):
+    # Function implementation
+```
+
+## Conclusion
+
+That's it! You've successfully deployed Crawl4ai on Modal. You now have a scalable web crawling solution that can handle as many requests as you need without requiring any infrastructure management.
+
+The beauty of this setup is its simplicity - Modal handles all the hard parts, letting you focus on using Crawl4ai to extract the data you need.
+
+Feel free to reach out if you have any questions or need help with your deployment!
+
+Happy crawling!
+- UncleCode
+
+## Additional Resources
+
+- [Modal Documentation](https://modal.com/docs)
+- [Crawl4ai GitHub Repository](https://github.com/unclecode/crawl4ai)
+- [Crawl4ai Documentation](https://docs.crawl4ai.com)
--- a/deploy/modal/test_modal.py
+++ b/deploy/modal/test_modal.py
@@ -0,0 +1,317 @@
+
+#!/usr/bin/env python3
+"""
+Crawl4ai API Testing Script
+
+This script tests all endpoints of the Crawl4ai API service and demonstrates their usage.
+"""
+
+import argparse
+import json
+import sys
+import time
+from typing import Dict, Any, List, Optional
+
+import requests
+
+# Colors for terminal output
+class Colors:
+    HEADER = '\033[95m'
+    BLUE = '\033[94m'
+    GREEN = '\033[92m'
+    YELLOW = '\033[93m'
+    RED = '\033[91m'
+    ENDC = '\033[0m'
+    BOLD = '\033[1m'
+    UNDERLINE = '\033[4m'
+
+def print_header(text: str) -> None:
+    """Print a formatted header."""
+    print(f"\n{Colors.HEADER}{Colors.BOLD}{'=' * 80}{Colors.ENDC}")
+    print(f"{Colors.HEADER}{Colors.BOLD}{text.center(80)}{Colors.ENDC}")
+    print(f"{Colors.HEADER}{Colors.BOLD}{'=' * 80}{Colors.ENDC}\n")
+
+def print_step(text: str) -> None:
+    """Print a formatted step description."""
+    print(f"{Colors.BLUE}{Colors.BOLD}>> {text}{Colors.ENDC}")
+
+def print_success(text: str) -> None:
+    """Print a success message."""
+    print(f"{Colors.GREEN}✓ {text}{Colors.ENDC}")
+
+def print_warning(text: str) -> None:
+    """Print a warning message."""
+    print(f"{Colors.YELLOW}⚠ {text}{Colors.ENDC}")
+
+def print_error(text: str) -> None:
+    """Print an error message."""
+    print(f"{Colors.RED}✗ {text}{Colors.ENDC}")
+
+def print_json(data: Dict[str, Any]) -> None:
+    """Pretty print JSON data."""
+    print(json.dumps(data, indent=2))
+
+def make_request(method: str, url: str, params: Optional[Dict[str, Any]] = None, 
+                 json_data: Optional[Dict[str, Any]] = None, 
+                 expected_status: int = 200) -> Dict[str, Any]:
+    """Make an HTTP request and handle errors."""
+    print_step(f"Making {method.upper()} request to {url}")
+    
+    if params:
+        print(f"  Parameters: {params}")
+    if json_data:
+        print(f"  JSON Data: {json_data}")
+    
+    try:
+        response = requests.request(
+            method=method,
+            url=url,
+            params=params,
+            json=json_data,
+            timeout=300  # 5 minute timeout for crawling operations
+        )
+        
+        status_code = response.status_code
+        print(f"  Status Code: {status_code}")
+        
+        try:
+            data = response.json()
+            print("  Response:")
+            print_json(data)
+            
+            if status_code != expected_status:
+                print_error(f"Expected status code {expected_status}, got {status_code}")
+                return data
+            
+            print_success("Request successful")
+            return data
+        except ValueError:
+            print_error("Response is not valid JSON")
+            print(response.text)
+            return {"error": "Invalid JSON response"}
+            
+    except requests.RequestException as e:
+        print_error(f"Request failed: {str(e)}")
+        return {"error": str(e)}
+
+def test_health_check(base_url: str) -> bool:
+    """Test the health check endpoint."""
+    print_header("Testing Health Check Endpoint")
+    
+    response = make_request("GET", f"{base_url}/health_check")
+    
+    if "status" in response and response["status"] == "online":
+        print_success("Health check passed")
+        return True
+    else:
+        print_error("Health check failed")
+        return False
+
+def test_admin_create_user(base_url: str, admin_token: str, email: str, name: str) -> Optional[str]:
+    """Test creating a new user."""
+    print_header("Testing Admin User Creation")
+    
+    response = make_request(
+        "POST", 
+        f"{base_url}/admin_create_user",
+        json_data={
+            "admin_token": admin_token,
+            "email": email,
+            "name": name
+        },
+        expected_status=201
+    )
+    
+    if response.get("success") and "data" in response:
+        api_token = response["data"].get("api_token")
+        if api_token:
+            print_success(f"User created successfully with API token: {api_token}")
+            return api_token
+    
+    print_error("Failed to create user")
+    return None
+
+def test_check_credits(base_url: str, api_token: str) -> Optional[int]:
+    """Test checking user credits."""
+    print_header("Testing Check Credits Endpoint")
+    
+    response = make_request(
+        "GET",
+        f"{base_url}/check_credits",
+        params={"api_token": api_token}
+    )
+    
+    if response.get("success") and "data" in response:
+        credits = response["data"].get("credits")
+        if credits is not None:
+            print_success(f"User has {credits} credits")
+            return credits
+    
+    print_error("Failed to check credits")
+    return None
+
+def test_crawl_endpoint(base_url: str, api_token: str, url: str) -> bool:
+    """Test the crawl endpoint."""
+    print_header("Testing Crawl Endpoint")
+    
+    response = make_request(
+        "POST",
+        f"{base_url}/crawl_endpoint",
+        json_data={
+            "api_token": api_token,
+            "url": url
+        }
+    )
+    
+    if response.get("success") and "data" in response:
+        print_success("Crawl completed successfully")
+        
+        # Display some crawl result data
+        data = response["data"]
+        if "title" in data:
+            print(f"Page Title: {data['title']}")
+        if "status" in data:
+            print(f"Status: {data['status']}")
+        if "links" in data:
+            print(f"Links found: {len(data['links'])}")
+        if "markdown_v2" in data and data["markdown_v2"] and "raw_markdown" in data["markdown_v2"]:
+            print("Markdown Preview (first 200 chars):")
+            print(data["markdown_v2"]["raw_markdown"][:200] + "...")
+            
+        credits_remaining = response.get("credits_remaining")
+        if credits_remaining is not None:
+            print(f"Credits remaining: {credits_remaining}")
+            
+        return True
+    
+    print_error("Crawl failed")
+    return False
+
+def test_admin_update_credits(base_url: str, admin_token: str, api_token: str, amount: int) -> bool:
+    """Test updating user credits."""
+    print_header("Testing Admin Update Credits")
+    
+    response = make_request(
+        "POST",
+        f"{base_url}/admin_update_credits",
+        json_data={
+            "admin_token": admin_token,
+            "api_token": api_token,
+            "amount": amount
+        }
+    )
+    
+    if response.get("success") and "data" in response:
+        print_success(f"Credits updated successfully, new balance: {response['data'].get('credits')}")
+        return True
+    
+    print_error("Failed to update credits")
+    return False
+
+def test_admin_get_users(base_url: str, admin_token: str) -> List[Dict[str, Any]]:
+    """Test getting all users."""
+    print_header("Testing Admin Get All Users")
+    
+    response = make_request(
+        "GET",
+        f"{base_url}/admin_get_users",
+        params={"admin_token": admin_token}
+    )
+    
+    if response.get("success") and "data" in response:
+        users = response["data"]
+        print_success(f"Retrieved {len(users)} users")
+        return users
+    
+    print_error("Failed to get users")
+    return []
+
+def run_full_test(base_url: str, admin_token: str) -> None:
+    """Run all tests in sequence."""
+    # Remove trailing slash if present
+    base_url = base_url.rstrip('/')
+    
+    # Test 1: Health Check
+    if not test_health_check(base_url):
+        print_error("Health check failed, aborting tests")
+        sys.exit(1)
+    
+    # Test 2: Create a test user
+    email = f"test-user-{int(time.time())}@example.com"
+    name = "Test User"
+    api_token = test_admin_create_user(base_url, admin_token, email, name)
+    
+    if not api_token:
+        print_error("User creation failed, aborting tests")
+        sys.exit(1)
+    
+    # Test 3: Check initial credits
+    initial_credits = test_check_credits(base_url, api_token)
+    
+    if initial_credits is None:
+        print_error("Credit check failed, aborting tests")
+        sys.exit(1)
+    
+    # Test 4: Perform a crawl
+    test_url = "https://news.ycombinator.com"
+    crawl_success = test_crawl_endpoint(base_url, api_token, test_url)
+    
+    if not crawl_success:
+        print_warning("Crawl test failed, but continuing with other tests")
+    
+    # Test 5: Check credits after crawl
+    post_crawl_credits = test_check_credits(base_url, api_token)
+    
+    if post_crawl_credits is not None and initial_credits is not None:
+        if post_crawl_credits == initial_credits - 1:
+            print_success("Credit deduction verified")
+        else:
+            print_warning(f"Unexpected credit change: {initial_credits} -> {post_crawl_credits}")
+    
+    # Test 6: Add credits
+    add_credits_amount = 50
+    if test_admin_update_credits(base_url, admin_token, api_token, add_credits_amount):
+        print_success(f"Added {add_credits_amount} credits")
+    
+    # Test 7: Check credits after addition
+    post_addition_credits = test_check_credits(base_url, api_token)
+    
+    if post_addition_credits is not None and post_crawl_credits is not None:
+        if post_addition_credits == post_crawl_credits + add_credits_amount:
+            print_success("Credit addition verified")
+        else:
+            print_warning(f"Unexpected credit change: {post_crawl_credits} -> {post_addition_credits}")
+    
+    # Test 8: Get all users
+    users = test_admin_get_users(base_url, admin_token)
+    
+    if users:
+        # Check if our test user is in the list
+        test_user = next((user for user in users if user.get("email") == email), None)
+        if test_user:
+            print_success("Test user found in users list")
+        else:
+            print_warning("Test user not found in users list")
+    
+    # Final report
+    print_header("Test Summary")
+    
+    print_success("All endpoints tested successfully")
+    print(f"Test user created with email: {email}")
+    print(f"API token: {api_token}")
+    print(f"Final credit balance: {post_addition_credits}")
+
+def main():
+    parser = argparse.ArgumentParser(description="Test Crawl4ai API endpoints")
+    parser.add_argument("--base-url", required=True, help="Base URL of the Crawl4ai API (e.g., https://username--crawl4ai-api.modal.run)")
+    parser.add_argument("--admin-token", required=True, help="Admin token for authentication")
+    
+    args = parser.parse_args()
+    
+    print_header("Crawl4ai API Test Script")
+    print(f"Testing API at: {args.base_url}")
+    
+    run_full_test(args.base_url, args.admin_token)
+
+if __name__ == "__main__":
+    main()
				`@@ -0,0 +1 @@`
				`{"file_format_version": "1.0.0", "ICD": {"library_path": "./libvk_swiftshader.so", "api_version": "1.0.5"}}`