Add all 5 deployments solution for testing

2025-03-10 18:57:14 +08:00
parent 9547bada3a
commit 3ea3c0520d
38 changed files with 6431 additions and 0 deletions
--- a/deploy/modal/crawl4ai_api_service.py
+++ b/deploy/modal/crawl4ai_api_service.py
@@ -0,0 +1,543 @@
+import os
+import time
+import uuid
+from datetime import datetime
+from typing import Dict, Any, Optional, List
+
+import modal
+from modal import Image, App, Volume, Secret, web_endpoint, function
+
+# Configuration
+APP_NAME = "crawl4ai-api"
+CRAWL4AI_VERSION = "next"  # Using the 'next' branch
+PYTHON_VERSION = "3.10"  # Compatible with playwright
+DEFAULT_CREDITS = 1000
+
+# Create a custom image with Crawl4ai and its dependencies
+image = Image.debian_slim(python_version=PYTHON_VERSION).pip_install(
+    ["fastapi[standard]", "pymongo", "pydantic"]
+).run_commands(
+    "apt-get update",
+    "apt-get install -y software-properties-common",
+    "apt-get install -y git",
+    "apt-add-repository non-free",
+    "apt-add-repository contrib",
+    # Install crawl4ai from the next branch
+    f"pip install -U git+https://github.com/unclecode/crawl4ai.git@{CRAWL4AI_VERSION}",
+    "pip install -U fastapi[standard]",
+    "pip install -U pydantic",
+    # Install playwright and browsers
+    "crawl4ai-setup",
+)
+
+# Create persistent volume for user database
+user_db = Volume.from_name("crawl4ai-users", create_if_missing=True)
+
+# Create admin secret for secure operations
+admin_secret = Secret.from_name("admin-secret", create_if_missing=True)
+
+# Define the app
+app = App(APP_NAME, image=image)
+
+# Default configurations
+DEFAULT_BROWSER_CONFIG = {
+    "headless": True,
+    "verbose": False,
+}
+
+DEFAULT_CRAWLER_CONFIG = {
+    "crawler_config": {
+        "type": "CrawlerRunConfig",
+        "params": {
+            "markdown_generator": {
+                "type": "DefaultMarkdownGenerator",
+                "params": {
+                    "content_filter": {
+                        "type": "PruningContentFilter",
+                        "params": {
+                            "threshold": 0.48,
+                            "threshold_type": "fixed"
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+# Database operations
+@app.function(volumes={"/data": user_db})
+def init_db() -> None:
+    """Initialize database with indexes."""
+    from pymongo import MongoClient, ASCENDING
+    
+    client = MongoClient("mongodb://localhost:27017")
+    db = client.crawl4ai_db
+    
+    # Ensure indexes for faster lookups
+    db.users.create_index([("api_token", ASCENDING)], unique=True)
+    db.users.create_index([("email", ASCENDING)], unique=True)
+    
+    # Create usage stats collection
+    db.usage_stats.create_index([("user_id", ASCENDING), ("timestamp", ASCENDING)])
+    
+    print("Database initialized with required indexes")
+
+@app.function(volumes={"/data": user_db})
+def get_user_by_token(api_token: str) -> Optional[Dict[str, Any]]:
+    """Get user by API token."""
+    from pymongo import MongoClient
+    from bson.objectid import ObjectId
+    
+    client = MongoClient("mongodb://localhost:27017")
+    db = client.crawl4ai_db
+    
+    user = db.users.find_one({"api_token": api_token})
+    if not user:
+        return None
+    
+    # Convert ObjectId to string for serialization
+    user["_id"] = str(user["_id"])
+    return user
+
+@app.function(volumes={"/data": user_db})
+def create_user(email: str, name: str) -> Dict[str, Any]:
+    """Create a new user with initial credits."""
+    from pymongo import MongoClient
+    from bson.objectid import ObjectId
+    
+    client = MongoClient("mongodb://localhost:27017")
+    db = client.crawl4ai_db
+    
+    # Generate API token
+    api_token = str(uuid.uuid4())
+    
+    user = {
+        "email": email,
+        "name": name,
+        "api_token": api_token,
+        "credits": DEFAULT_CREDITS,
+        "created_at": datetime.utcnow(),
+        "updated_at": datetime.utcnow(),
+        "is_active": True
+    }
+    
+    try:
+        result = db.users.insert_one(user)
+        user["_id"] = str(result.inserted_id)
+        return user
+    except Exception as e:
+        if "duplicate key error" in str(e):
+            return {"error": "User with this email already exists"}
+        raise
+
+@app.function(volumes={"/data": user_db})
+def update_user_credits(api_token: str, amount: int) -> Dict[str, Any]:
+    """Update user credits (add or subtract)."""
+    from pymongo import MongoClient
+    
+    client = MongoClient("mongodb://localhost:27017")
+    db = client.crawl4ai_db
+    
+    # First get current user to check credits
+    user = db.users.find_one({"api_token": api_token})
+    if not user:
+        return {"success": False, "error": "User not found"}
+    
+    # For deductions, ensure sufficient credits
+    if amount < 0 and user["credits"] + amount < 0:
+        return {"success": False, "error": "Insufficient credits"}
+    
+    # Update credits
+    result = db.users.update_one(
+        {"api_token": api_token},
+        {
+            "$inc": {"credits": amount},
+            "$set": {"updated_at": datetime.utcnow()}
+        }
+    )
+    
+    if result.modified_count == 1:
+        # Get updated user
+        updated_user = db.users.find_one({"api_token": api_token})
+        return {
+            "success": True, 
+            "credits": updated_user["credits"]
+        }
+    else:
+        return {"success": False, "error": "Failed to update credits"}
+
+@app.function(volumes={"/data": user_db})
+def log_usage(user_id: str, url: str, success: bool, error: Optional[str] = None) -> None:
+    """Log usage statistics."""
+    from pymongo import MongoClient
+    from bson.objectid import ObjectId
+    
+    client = MongoClient("mongodb://localhost:27017")
+    db = client.crawl4ai_db
+    
+    log_entry = {
+        "user_id": user_id,
+        "url": url,
+        "timestamp": datetime.utcnow(),
+        "success": success,
+        "error": error
+    }
+    
+    db.usage_stats.insert_one(log_entry)
+
+# Main crawling function
+@app.function(timeout=300)  # 5 minute timeout
+async def crawl(
+    url: str,
+    browser_config: Optional[Dict[str, Any]] = None,
+    crawler_config: Optional[Dict[str, Any]] = None,
+) -> Dict[str, Any]:
+    """
+    Crawl a given URL using Crawl4ai.
+    
+    Args:
+        url: The URL to crawl
+        browser_config: Optional browser configuration to override defaults
+        crawler_config: Optional crawler configuration to override defaults
+        
+    Returns:
+        A dictionary containing the crawl results
+    """
+    from crawl4ai import (
+        AsyncWebCrawler,
+        BrowserConfig,
+        CrawlerRunConfig,
+        CrawlResult
+    )
+
+    # Prepare browser config using the loader method
+    if browser_config is None:
+        browser_config = DEFAULT_BROWSER_CONFIG
+    browser_config_obj = BrowserConfig.load(browser_config)
+    
+    # Prepare crawler config using the loader method
+    if crawler_config is None:
+        crawler_config = DEFAULT_CRAWLER_CONFIG
+    crawler_config_obj = CrawlerRunConfig.load(crawler_config)    
+    
+    # Perform the crawl
+    async with AsyncWebCrawler(config=browser_config_obj) as crawler:
+        result: CrawlResult = await crawler.arun(url=url, config=crawler_config_obj)
+        
+        # Return serializable results
+        try:
+            # Try newer Pydantic v2 method
+            return result.model_dump()
+        except AttributeError:
+            try:
+                # Try older Pydantic v1 method
+                return result.dict()
+            except AttributeError:
+                # Fallback to manual conversion
+                return {
+                    "url": result.url,
+                    "title": result.title,
+                    "status": result.status,
+                    "content": str(result.content) if hasattr(result, "content") else None,
+                    "links": [{"url": link.url, "text": link.text} for link in result.links] if hasattr(result, "links") else [],
+                    "markdown_v2": {
+                        "raw_markdown": result.markdown_v2.raw_markdown if hasattr(result, "markdown_v2") else None
+                    }
+                }
+
+# API endpoints
+@app.function()
+@web_endpoint(method="POST")
+def crawl_endpoint(data: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Web endpoint that accepts POST requests with JSON data containing:
+    - api_token: User's API token
+    - url: The URL to crawl
+    - browser_config: Optional browser configuration
+    - crawler_config: Optional crawler configuration
+    
+    Returns the crawl results and remaining credits.
+    """
+    # Extract and validate API token
+    api_token = data.get("api_token")
+    if not api_token:
+        return {
+            "success": False,
+            "error": "API token is required",
+            "status_code": 401
+        }
+    
+    # Verify user
+    user = get_user_by_token.remote(api_token)
+    if not user:
+        return {
+            "success": False,
+            "error": "Invalid API token",
+            "status_code": 401
+        }
+    
+    if not user.get("is_active", False):
+        return {
+            "success": False,
+            "error": "Account is inactive",
+            "status_code": 403
+        }
+    
+    # Validate URL
+    url = data.get("url")
+    if not url:
+        return {
+            "success": False,
+            "error": "URL is required",
+            "status_code": 400
+        }
+    
+    # Check credits
+    if user.get("credits", 0) <= 0:
+        return {
+            "success": False,
+            "error": "Insufficient credits",
+            "status_code": 403
+        }
+    
+    # Deduct credit first (1 credit per call)
+    credit_result = update_user_credits.remote(api_token, -1)
+    if not credit_result.get("success", False):
+        return {
+            "success": False,
+            "error": credit_result.get("error", "Failed to process credits"),
+            "status_code": 500
+        }
+    
+    # Extract configs
+    browser_config = data.get("browser_config")
+    crawler_config = data.get("crawler_config")
+    
+    # Perform crawl
+    try:
+        start_time = time.time()
+        result = crawl.remote(url, browser_config, crawler_config)
+        execution_time = time.time() - start_time
+        
+        # Log successful usage
+        log_usage.spawn(user["_id"], url, True)
+        
+        return {
+            "success": True,
+            "data": result,
+            "credits_remaining": credit_result.get("credits"),
+            "execution_time_seconds": round(execution_time, 2),
+            "status_code": 200
+        }
+    except Exception as e:
+        # Log failed usage
+        log_usage.spawn(user["_id"], url, False, str(e))
+        
+        # Return error
+        return {
+            "success": False,
+            "error": f"Crawling error: {str(e)}",
+            "credits_remaining": credit_result.get("credits"),
+            "status_code": 500
+        }
+
+# Admin endpoints
+@app.function(secrets=[admin_secret])
+@web_endpoint(method="POST")
+def admin_create_user(data: Dict[str, Any]) -> Dict[str, Any]:
+    """Admin endpoint to create new users."""
+    # Validate admin token
+    admin_token = data.get("admin_token")
+    if admin_token != os.environ.get("ADMIN_TOKEN"):
+        return {
+            "success": False,
+            "error": "Invalid admin token",
+            "status_code": 401
+        }
+    
+    # Validate input
+    email = data.get("email")
+    name = data.get("name")
+    
+    if not email or not name:
+        return {
+            "success": False,
+            "error": "Email and name are required",
+            "status_code": 400
+        }
+    
+    # Create user
+    user = create_user.remote(email, name)
+    
+    if "error" in user:
+        return {
+            "success": False,
+            "error": user["error"],
+            "status_code": 400
+        }
+    
+    return {
+        "success": True,
+        "data": {
+            "user_id": user["_id"],
+            "email": user["email"],
+            "name": user["name"],
+            "api_token": user["api_token"],
+            "credits": user["credits"],
+            "created_at": user["created_at"].isoformat() if isinstance(user["created_at"], datetime) else user["created_at"]
+        },
+        "status_code": 201
+    }
+
+@app.function(secrets=[admin_secret])
+@web_endpoint(method="POST")
+def admin_update_credits(data: Dict[str, Any]) -> Dict[str, Any]:
+    """Admin endpoint to update user credits."""
+    # Validate admin token
+    admin_token = data.get("admin_token")
+    if admin_token != os.environ.get("ADMIN_TOKEN"):
+        return {
+            "success": False,
+            "error": "Invalid admin token",
+            "status_code": 401
+        }
+    
+    # Validate input
+    api_token = data.get("api_token")
+    amount = data.get("amount")
+    
+    if not api_token:
+        return {
+            "success": False,
+            "error": "API token is required",
+            "status_code": 400
+        }
+    
+    if not isinstance(amount, int):
+        return {
+            "success": False,
+            "error": "Amount must be an integer",
+            "status_code": 400
+        }
+    
+    # Update credits
+    result = update_user_credits.remote(api_token, amount)
+    
+    if not result.get("success", False):
+        return {
+            "success": False,
+            "error": result.get("error", "Failed to update credits"),
+            "status_code": 400
+        }
+    
+    return {
+        "success": True,
+        "data": {
+            "credits": result["credits"]
+        },
+        "status_code": 200
+    }
+
+@app.function(secrets=[admin_secret])
+@web_endpoint(method="GET")
+def admin_get_users(admin_token: str) -> Dict[str, Any]:
+    """Admin endpoint to list all users."""
+    # Validate admin token
+    if admin_token != os.environ.get("ADMIN_TOKEN"):
+        return {
+            "success": False,
+            "error": "Invalid admin token",
+            "status_code": 401
+        }
+    
+    users = get_all_users.remote()
+    
+    return {
+        "success": True,
+        "data": users,
+        "status_code": 200
+    }
+
+@app.function(volumes={"/data": user_db})
+def get_all_users() -> List[Dict[str, Any]]:
+    """Get all users (for admin)."""
+    from pymongo import MongoClient
+    
+    client = MongoClient("mongodb://localhost:27017")
+    db = client.crawl4ai_db
+    
+    users = []
+    for user in db.users.find():
+        # Convert ObjectId to string
+        user["_id"] = str(user["_id"])
+        # Convert datetime to ISO format
+        for field in ["created_at", "updated_at"]:
+            if field in user and isinstance(user[field], datetime):
+                user[field] = user[field].isoformat()
+        users.append(user)
+    
+    return users
+
+# Public endpoints
+@app.function()
+@web_endpoint(method="GET")
+def health_check() -> Dict[str, Any]:
+    """Health check endpoint."""
+    return {
+        "status": "online",
+        "service": APP_NAME,
+        "version": CRAWL4AI_VERSION,
+        "timestamp": datetime.utcnow().isoformat()
+    }
+
+@app.function()
+@web_endpoint(method="GET")
+def check_credits(api_token: str) -> Dict[str, Any]:
+    """Check user credits."""
+    if not api_token:
+        return {
+            "success": False,
+            "error": "API token is required",
+            "status_code": 401
+        }
+    
+    user = get_user_by_token.remote(api_token)
+    if not user:
+        return {
+            "success": False,
+            "error": "Invalid API token",
+            "status_code": 401
+        }
+    
+    return {
+        "success": True,
+        "data": {
+            "credits": user["credits"],
+            "email": user["email"],
+            "name": user["name"]
+        },
+        "status_code": 200
+    }
+
+# Local entrypoint for testing
+@app.local_entrypoint()
+def main(url: str = "https://www.modal.com"):
+    """Command line entrypoint for local testing."""
+    print("Initializing database...")
+    init_db.remote()
+    
+    print(f"Testing crawl on URL: {url}")
+    result = crawl.remote(url)
+    
+    # Print sample of result
+    print("\nCrawl Result Sample:")
+    if "title" in result:
+        print(f"Title: {result['title']}")
+    if "status" in result:
+        print(f"Status: {result['status']}")
+    if "links" in result:
+        print(f"Links found: {len(result['links'])}")
+    if "markdown_v2" in result and result["markdown_v2"] and "raw_markdown" in result["markdown_v2"]:
+        print("\nMarkdown Preview (first 300 chars):")
+        print(result["markdown_v2"]["raw_markdown"][:300] + "...")
--- a/deploy/modal/entry.py
+++ b/deploy/modal/entry.py
@@ -0,0 +1,127 @@
+import modal
+from typing import Optional, Dict, Any
+
+# Create a custom image with Crawl4ai and its dependencies
+# "pip install crawl4ai",
+image = modal.Image.debian_slim(python_version="3.10").pip_install(["fastapi[standard]"]).run_commands(
+    "apt-get update",
+    "apt-get install -y software-properties-common",
+    "apt-get install -y git",
+    "apt-add-repository non-free",
+    "apt-add-repository contrib",
+    "pip install -U git+https://github.com/unclecode/crawl4ai.git@next",
+    "pip install -U fastapi[standard]",
+    "pip install -U pydantic",
+    "crawl4ai-setup",  # This installs playwright and downloads chromium
+    # Print fastpi version
+    "python -m fastapi --version",
+)
+
+# Define the app
+app = modal.App("crawl4ai", image=image)
+
+# Define default configurations
+DEFAULT_BROWSER_CONFIG = {
+    "headless": True,
+    "verbose": False,
+}
+
+DEFAULT_CRAWLER_CONFIG = {
+    "crawler_config": {
+        "type": "CrawlerRunConfig",
+        "params": {
+            "markdown_generator": {
+                "type": "DefaultMarkdownGenerator",
+                "params": {
+                    "content_filter": {
+                        "type": "PruningContentFilter",
+                        "params": {
+                            "threshold": 0.48,
+                            "threshold_type": "fixed"
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+@app.function(timeout=300)  # 5 minute timeout
+async def crawl(
+    url: str,
+    browser_config: Optional[Dict[str, Any]] = None,
+    crawler_config: Optional[Dict[str, Any]] = None,
+) -> Dict[str, Any]:
+    """
+    Crawl a given URL using Crawl4ai.
+    
+    Args:
+        url: The URL to crawl
+        browser_config: Optional browser configuration to override defaults
+        crawler_config: Optional crawler configuration to override defaults
+        
+    Returns:
+        A dictionary containing the crawl results
+    """
+    from crawl4ai import (
+        AsyncWebCrawler,
+        BrowserConfig,
+        CrawlerRunConfig,
+        CrawlResult
+    )
+
+
+    # Prepare browser config using the loader method
+    if browser_config is None:
+        browser_config = DEFAULT_BROWSER_CONFIG
+    browser_config_obj = BrowserConfig.load(browser_config)
+    
+    # Prepare crawler config using the loader method
+    if crawler_config is None:
+        crawler_config = DEFAULT_CRAWLER_CONFIG
+    crawler_config_obj = CrawlerRunConfig.load(crawler_config)    
+    
+    
+    # Perform the crawl
+    async with AsyncWebCrawler(config=browser_config_obj) as crawler:
+        result: CrawlResult = await crawler.arun(url=url, config=crawler_config_obj)
+        
+        # Return serializable results
+        try:
+            # Try newer Pydantic v2 method
+            return result.model_dump()
+        except AttributeError:
+            try:
+                # Try older Pydantic v1 method
+                return result.__dict__
+            except AttributeError:
+                # Fallback to returning the raw result
+                return result
+
+@app.function()
+@modal.web_endpoint(method="POST")
+def crawl_endpoint(data: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Web endpoint that accepts POST requests with JSON data containing:
+    - url: The URL to crawl
+    - browser_config: Optional browser configuration
+    - crawler_config: Optional crawler configuration
+    
+    Returns the crawl results.
+    """
+    url = data.get("url")
+    if not url:
+        return {"error": "URL is required"}
+    
+    browser_config = data.get("browser_config")
+    crawler_config = data.get("crawler_config")
+    
+    return crawl.remote(url, browser_config, crawler_config)
+
+@app.local_entrypoint()
+def main(url: str = "https://www.modal.com"):
+    """
+    Command line entrypoint for local testing.
+    """
+    result = crawl.remote(url)
+    print(result)
--- a/deploy/modal/guide.md
+++ b/deploy/modal/guide.md
@@ -0,0 +1,453 @@
+# Deploying Crawl4ai with Modal: A Comprehensive Tutorial
+
+Hey there! UncleCode here. I'm excited to show you how to deploy Crawl4ai using Modal - a fantastic serverless platform that makes deployment super simple and scalable.
+
+In this tutorial, I'll walk you through deploying your own Crawl4ai instance on Modal's infrastructure. This will give you a powerful, scalable web crawling solution without having to worry about infrastructure management.
+
+## What is Modal?
+
+Modal is a serverless platform that allows you to run Python functions in the cloud without managing servers. It's perfect for deploying Crawl4ai because:
+
+1. It handles all the infrastructure for you
+2. It scales automatically based on demand
+3. It makes deployment incredibly simple
+
+## Prerequisites
+
+Before we get started, you'll need:
+
+- A Modal account (sign up at [modal.com](https://modal.com))
+- Python 3.10 or later installed on your local machine
+- Basic familiarity with Python and command-line operations
+
+## Step 1: Setting Up Your Modal Account
+
+First, sign up for a Modal account at [modal.com](https://modal.com) if you haven't already. Modal offers a generous free tier that's perfect for getting started.
+
+After signing up, install the Modal CLI and authenticate:
+
+```bash
+pip install modal
+modal token new
+```
+
+This will open a browser window where you can authenticate and generate a token for the CLI.
+
+## Step 2: Creating Your Crawl4ai Deployment
+
+Now, let's create a Python file called `crawl4ai_modal.py` with our deployment code:
+
+```python
+import modal
+from typing import Optional, Dict, Any
+
+# Create a custom image with Crawl4ai and its dependencies
+image = modal.Image.debian_slim(python_version="3.10").pip_install(
+    ["fastapi[standard]"]
+).run_commands(
+    "apt-get update",
+    "apt-get install -y software-properties-common",
+    "apt-get install -y git",
+    "apt-add-repository non-free",
+    "apt-add-repository contrib",
+    "pip install -U crawl4ai",
+    "pip install -U fastapi[standard]",
+    "pip install -U pydantic",
+    "crawl4ai-setup",  # This installs playwright and downloads chromium
+)
+
+# Define the app
+app = modal.App("crawl4ai", image=image)
+
+# Define default configurations
+DEFAULT_BROWSER_CONFIG = {
+    "headless": True,
+    "verbose": False,
+}
+
+DEFAULT_CRAWLER_CONFIG = {
+    "crawler_config": {
+        "type": "CrawlerRunConfig",
+        "params": {
+            "markdown_generator": {
+                "type": "DefaultMarkdownGenerator",
+                "params": {
+                    "content_filter": {
+                        "type": "PruningContentFilter",
+                        "params": {
+                            "threshold": 0.48,
+                            "threshold_type": "fixed"
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+@app.function(timeout=300)  # 5 minute timeout
+async def crawl(
+    url: str,
+    browser_config: Optional[Dict[str, Any]] = None,
+    crawler_config: Optional[Dict[str, Any]] = None,
+) -> Dict[str, Any]:
+    """
+    Crawl a given URL using Crawl4ai.
+    
+    Args:
+        url: The URL to crawl
+        browser_config: Optional browser configuration to override defaults
+        crawler_config: Optional crawler configuration to override defaults
+        
+    Returns:
+        A dictionary containing the crawl results
+    """
+    from crawl4ai import (
+        AsyncWebCrawler,
+        BrowserConfig,
+        CrawlerRunConfig,
+        CrawlResult
+    )
+
+    # Prepare browser config using the loader method
+    if browser_config is None:
+        browser_config = DEFAULT_BROWSER_CONFIG
+    browser_config_obj = BrowserConfig.load(browser_config)
+    
+    # Prepare crawler config using the loader method
+    if crawler_config is None:
+        crawler_config = DEFAULT_CRAWLER_CONFIG
+    crawler_config_obj = CrawlerRunConfig.load(crawler_config)    
+    
+    # Perform the crawl
+    async with AsyncWebCrawler(config=browser_config_obj) as crawler:
+        result: CrawlResult = await crawler.arun(url=url, config=crawler_config_obj)
+        
+        # Return serializable results
+        try:
+            # Try newer Pydantic v2 method
+            return result.model_dump()
+        except AttributeError:
+            try:
+                # Try older Pydantic v1 method
+                return result.dict()
+            except AttributeError:
+                # Fallback to manual conversion
+                return {
+                    "url": result.url,
+                    "title": result.title,
+                    "status": result.status,
+                    "content": str(result.content) if hasattr(result, "content") else None,
+                    "links": [{"url": link.url, "text": link.text} for link in result.links] if hasattr(result, "links") else [],
+                    "markdown_v2": {
+                        "raw_markdown": result.markdown_v2.raw_markdown if hasattr(result, "markdown_v2") else None
+                    }
+                }
+
+@app.function()
+@modal.web_endpoint(method="POST")
+def crawl_endpoint(data: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Web endpoint that accepts POST requests with JSON data containing:
+    - url: The URL to crawl
+    - browser_config: Optional browser configuration
+    - crawler_config: Optional crawler configuration
+    
+    Returns the crawl results.
+    """
+    url = data.get("url")
+    if not url:
+        return {"error": "URL is required"}
+    
+    browser_config = data.get("browser_config")
+    crawler_config = data.get("crawler_config")
+    
+    return crawl.remote(url, browser_config, crawler_config)
+
+@app.local_entrypoint()
+def main(url: str = "https://www.modal.com"):
+    """
+    Command line entrypoint for local testing.
+    """
+    result = crawl.remote(url)
+    print(result)
+```
+
+## Step 3: Understanding the Code Components
+
+Let's break down what's happening in this code:
+
+### 1. Image Definition
+
+```python
+image = modal.Image.debian_slim(python_version="3.10").pip_install(
+    ["fastapi[standard]"]
+).run_commands(
+    "apt-get update",
+    "apt-get install -y software-properties-common",
+    "apt-get install -y git",
+    "apt-add-repository non-free",
+    "apt-add-repository contrib",
+    "pip install -U git+https://github.com/unclecode/crawl4ai.git@next",
+    "pip install -U fastapi[standard]",
+    "pip install -U pydantic",
+    "crawl4ai-setup",  # This installs playwright and downloads chromium
+)
+```
+
+This section defines the container image that Modal will use to run your code. It:
+- Starts with a Debian Slim base image with Python 3.10
+- Installs FastAPI
+- Updates the system packages
+- Installs Git and other dependencies
+- Installs Crawl4ai from the GitHub repository
+- Runs the Crawl4ai setup to install Playwright and download Chromium
+
+### 2. Modal App Definition
+
+```python
+app = modal.App("crawl4ai", image=image)
+```
+
+This creates a Modal application named "crawl4ai" that uses the image we defined above.
+
+### 3. Default Configurations
+
+```python
+DEFAULT_BROWSER_CONFIG = {
+    "headless": True,
+    "verbose": False,
+}
+
+DEFAULT_CRAWLER_CONFIG = {
+    "crawler_config": {
+        "type": "CrawlerRunConfig",
+        "params": {
+            "markdown_generator": {
+                "type": "DefaultMarkdownGenerator",
+                "params": {
+                    "content_filter": {
+                        "type": "PruningContentFilter",
+                        "params": {
+                            "threshold": 0.48,
+                            "threshold_type": "fixed"
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+```
+
+These define the default configurations for the browser and crawler. You can customize these settings based on your specific needs.
+
+### 4. The Crawl Function
+
+```python
+@app.function(timeout=300)
+async def crawl(url, browser_config, crawler_config):
+    # Function implementation
+```
+
+This is the main function that performs the crawling. It:
+- Takes a URL and optional configurations
+- Sets up the browser and crawler with those configurations
+- Performs the crawl
+- Returns the results in a serializable format
+
+The `@app.function(timeout=300)` decorator tells Modal to run this function in the cloud with a 5-minute timeout.
+
+### 5. The Web Endpoint
+
+```python
+@app.function()
+@modal.web_endpoint(method="POST")
+def crawl_endpoint(data: Dict[str, Any]) -> Dict[str, Any]:
+    # Function implementation
+```
+
+This creates a web endpoint that accepts POST requests. It:
+- Extracts the URL and configurations from the request
+- Calls the crawl function with those parameters
+- Returns the results
+
+### 6. Local Entrypoint
+
+```python
+@app.local_entrypoint()
+def main(url: str = "https://www.modal.com"):
+    # Function implementation
+```
+
+This provides a way to test the application from the command line.
+
+## Step 4: Testing Locally
+
+Before deploying, let's test our application locally:
+
+```bash
+modal run crawl4ai_modal.py --url "https://example.com"
+```
+
+This command will:
+1. Upload your code to Modal
+2. Create the necessary containers
+3. Run the `main` function with the specified URL
+4. Return the results
+
+Modal will handle all the infrastructure setup for you. You should see the crawling results printed to your console.
+
+## Step 5: Deploying Your Application
+
+Once you're satisfied with the local testing, it's time to deploy:
+
+```bash
+modal deploy crawl4ai_modal.py
+```
+
+This will deploy your application to Modal's cloud. The deployment process will output URLs for your web endpoints.
+
+You should see output similar to:
+
+```
+✓ Deployed crawl4ai.
+  URLs:
+    crawl_endpoint => https://your-username--crawl-endpoint.modal.run
+```
+
+Save this URL - you'll need it to make requests to your deployment.
+
+## Step 6: Using Your Deployment
+
+Now that your application is deployed, you can use it by sending POST requests to the endpoint URL:
+
+```bash
+curl -X POST https://your-username--crawl-endpoint.modal.run \
+  -H "Content-Type: application/json" \
+  -d '{"url": "https://example.com"}'
+```
+
+Or in Python:
+
+```python
+import requests
+
+response = requests.post(
+    "https://your-username--crawl-endpoint.modal.run",
+    json={"url": "https://example.com"}
+)
+
+result = response.json()
+print(result)
+```
+
+You can also customize the browser and crawler configurations:
+
+```python
+requests.post(
+    "https://your-username--crawl-endpoint.modal.run",
+    json={
+        "url": "https://example.com",
+        "browser_config": {
+            "headless": False,
+            "verbose": True
+        },
+        "crawler_config": {
+            "crawler_config": {
+                "type": "CrawlerRunConfig",
+                "params": {
+                    "markdown_generator": {
+                        "type": "DefaultMarkdownGenerator",
+                        "params": {
+                            "content_filter": {
+                                "type": "PruningContentFilter",
+                                "params": {
+                                    "threshold": 0.6,  # Adjusted threshold
+                                    "threshold_type": "fixed"
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+)
+```
+
+## Step 7: Calling Your Deployment from Another Python Script
+
+You can also call your deployed function directly from another Python script:
+
+```python
+import modal
+
+# Get a reference to the deployed function
+crawl_function = modal.Function.from_name("crawl4ai", "crawl")
+
+# Call the function
+result = crawl_function.remote("https://example.com")
+print(result)
+```
+
+## Understanding Modal's Execution Flow
+
+To understand how Modal works, it's important to know:
+
+1. **Local vs. Remote Execution**: When you call a function with `.remote()`, it runs in Modal's cloud, not on your local machine.
+
+2. **Container Lifecycle**: Modal creates containers on-demand and destroys them when they're not needed.
+
+3. **Caching**: Modal caches your container images to speed up subsequent runs.
+
+4. **Serverless Scaling**: Modal automatically scales your application based on demand.
+
+## Customizing Your Deployment
+
+You can customize your deployment in several ways:
+
+### Changing the Crawl4ai Version
+
+To use a different version of Crawl4ai, update the installation command in the image definition:
+
+```python
+"pip install -U git+https://github.com/unclecode/crawl4ai.git@main",  # Use main branch
+```
+
+### Adjusting Resource Limits
+
+You can change the resources allocated to your functions:
+
+```python
+@app.function(timeout=600, cpu=2, memory=4096)  # 10 minute timeout, 2 CPUs, 4GB RAM
+async def crawl(...):
+    # Function implementation
+```
+
+### Keeping Containers Warm
+
+To reduce cold start times, you can keep containers warm:
+
+```python
+@app.function(keep_warm=1)  # Keep 1 container warm
+async def crawl(...):
+    # Function implementation
+```
+
+## Conclusion
+
+That's it! You've successfully deployed Crawl4ai on Modal. You now have a scalable web crawling solution that can handle as many requests as you need without requiring any infrastructure management.
+
+The beauty of this setup is its simplicity - Modal handles all the hard parts, letting you focus on using Crawl4ai to extract the data you need.
+
+Feel free to reach out if you have any questions or need help with your deployment!
+
+Happy crawling!
+- UncleCode
+
+## Additional Resources
+
+- [Modal Documentation](https://modal.com/docs)
+- [Crawl4ai GitHub Repository](https://github.com/unclecode/crawl4ai)
+- [Crawl4ai Documentation](https://docs.crawl4ai.com)
--- a/deploy/modal/test_modal.py
+++ b/deploy/modal/test_modal.py
@@ -0,0 +1,317 @@
+
+#!/usr/bin/env python3
+"""
+Crawl4ai API Testing Script
+
+This script tests all endpoints of the Crawl4ai API service and demonstrates their usage.
+"""
+
+import argparse
+import json
+import sys
+import time
+from typing import Dict, Any, List, Optional
+
+import requests
+
+# Colors for terminal output
+class Colors:
+    HEADER = '\033[95m'
+    BLUE = '\033[94m'
+    GREEN = '\033[92m'
+    YELLOW = '\033[93m'
+    RED = '\033[91m'
+    ENDC = '\033[0m'
+    BOLD = '\033[1m'
+    UNDERLINE = '\033[4m'
+
+def print_header(text: str) -> None:
+    """Print a formatted header."""
+    print(f"\n{Colors.HEADER}{Colors.BOLD}{'=' * 80}{Colors.ENDC}")
+    print(f"{Colors.HEADER}{Colors.BOLD}{text.center(80)}{Colors.ENDC}")
+    print(f"{Colors.HEADER}{Colors.BOLD}{'=' * 80}{Colors.ENDC}\n")
+
+def print_step(text: str) -> None:
+    """Print a formatted step description."""
+    print(f"{Colors.BLUE}{Colors.BOLD}>> {text}{Colors.ENDC}")
+
+def print_success(text: str) -> None:
+    """Print a success message."""
+    print(f"{Colors.GREEN}✓ {text}{Colors.ENDC}")
+
+def print_warning(text: str) -> None:
+    """Print a warning message."""
+    print(f"{Colors.YELLOW}⚠ {text}{Colors.ENDC}")
+
+def print_error(text: str) -> None:
+    """Print an error message."""
+    print(f"{Colors.RED}✗ {text}{Colors.ENDC}")
+
+def print_json(data: Dict[str, Any]) -> None:
+    """Pretty print JSON data."""
+    print(json.dumps(data, indent=2))
+
+def make_request(method: str, url: str, params: Optional[Dict[str, Any]] = None, 
+                 json_data: Optional[Dict[str, Any]] = None, 
+                 expected_status: int = 200) -> Dict[str, Any]:
+    """Make an HTTP request and handle errors."""
+    print_step(f"Making {method.upper()} request to {url}")
+    
+    if params:
+        print(f"  Parameters: {params}")
+    if json_data:
+        print(f"  JSON Data: {json_data}")
+    
+    try:
+        response = requests.request(
+            method=method,
+            url=url,
+            params=params,
+            json=json_data,
+            timeout=300  # 5 minute timeout for crawling operations
+        )
+        
+        status_code = response.status_code
+        print(f"  Status Code: {status_code}")
+        
+        try:
+            data = response.json()
+            print("  Response:")
+            print_json(data)
+            
+            if status_code != expected_status:
+                print_error(f"Expected status code {expected_status}, got {status_code}")
+                return data
+            
+            print_success("Request successful")
+            return data
+        except ValueError:
+            print_error("Response is not valid JSON")
+            print(response.text)
+            return {"error": "Invalid JSON response"}
+            
+    except requests.RequestException as e:
+        print_error(f"Request failed: {str(e)}")
+        return {"error": str(e)}
+
+def test_health_check(base_url: str) -> bool:
+    """Test the health check endpoint."""
+    print_header("Testing Health Check Endpoint")
+    
+    response = make_request("GET", f"{base_url}/health_check")
+    
+    if "status" in response and response["status"] == "online":
+        print_success("Health check passed")
+        return True
+    else:
+        print_error("Health check failed")
+        return False
+
+def test_admin_create_user(base_url: str, admin_token: str, email: str, name: str) -> Optional[str]:
+    """Test creating a new user."""
+    print_header("Testing Admin User Creation")
+    
+    response = make_request(
+        "POST", 
+        f"{base_url}/admin_create_user",
+        json_data={
+            "admin_token": admin_token,
+            "email": email,
+            "name": name
+        },
+        expected_status=201
+    )
+    
+    if response.get("success") and "data" in response:
+        api_token = response["data"].get("api_token")
+        if api_token:
+            print_success(f"User created successfully with API token: {api_token}")
+            return api_token
+    
+    print_error("Failed to create user")
+    return None
+
+def test_check_credits(base_url: str, api_token: str) -> Optional[int]:
+    """Test checking user credits."""
+    print_header("Testing Check Credits Endpoint")
+    
+    response = make_request(
+        "GET",
+        f"{base_url}/check_credits",
+        params={"api_token": api_token}
+    )
+    
+    if response.get("success") and "data" in response:
+        credits = response["data"].get("credits")
+        if credits is not None:
+            print_success(f"User has {credits} credits")
+            return credits
+    
+    print_error("Failed to check credits")
+    return None
+
+def test_crawl_endpoint(base_url: str, api_token: str, url: str) -> bool:
+    """Test the crawl endpoint."""
+    print_header("Testing Crawl Endpoint")
+    
+    response = make_request(
+        "POST",
+        f"{base_url}/crawl_endpoint",
+        json_data={
+            "api_token": api_token,
+            "url": url
+        }
+    )
+    
+    if response.get("success") and "data" in response:
+        print_success("Crawl completed successfully")
+        
+        # Display some crawl result data
+        data = response["data"]
+        if "title" in data:
+            print(f"Page Title: {data['title']}")
+        if "status" in data:
+            print(f"Status: {data['status']}")
+        if "links" in data:
+            print(f"Links found: {len(data['links'])}")
+        if "markdown_v2" in data and data["markdown_v2"] and "raw_markdown" in data["markdown_v2"]:
+            print("Markdown Preview (first 200 chars):")
+            print(data["markdown_v2"]["raw_markdown"][:200] + "...")
+            
+        credits_remaining = response.get("credits_remaining")
+        if credits_remaining is not None:
+            print(f"Credits remaining: {credits_remaining}")
+            
+        return True
+    
+    print_error("Crawl failed")
+    return False
+
+def test_admin_update_credits(base_url: str, admin_token: str, api_token: str, amount: int) -> bool:
+    """Test updating user credits."""
+    print_header("Testing Admin Update Credits")
+    
+    response = make_request(
+        "POST",
+        f"{base_url}/admin_update_credits",
+        json_data={
+            "admin_token": admin_token,
+            "api_token": api_token,
+            "amount": amount
+        }
+    )
+    
+    if response.get("success") and "data" in response:
+        print_success(f"Credits updated successfully, new balance: {response['data'].get('credits')}")
+        return True
+    
+    print_error("Failed to update credits")
+    return False
+
+def test_admin_get_users(base_url: str, admin_token: str) -> List[Dict[str, Any]]:
+    """Test getting all users."""
+    print_header("Testing Admin Get All Users")
+    
+    response = make_request(
+        "GET",
+        f"{base_url}/admin_get_users",
+        params={"admin_token": admin_token}
+    )
+    
+    if response.get("success") and "data" in response:
+        users = response["data"]
+        print_success(f"Retrieved {len(users)} users")
+        return users
+    
+    print_error("Failed to get users")
+    return []
+
+def run_full_test(base_url: str, admin_token: str) -> None:
+    """Run all tests in sequence."""
+    # Remove trailing slash if present
+    base_url = base_url.rstrip('/')
+    
+    # Test 1: Health Check
+    if not test_health_check(base_url):
+        print_error("Health check failed, aborting tests")
+        sys.exit(1)
+    
+    # Test 2: Create a test user
+    email = f"test-user-{int(time.time())}@example.com"
+    name = "Test User"
+    api_token = test_admin_create_user(base_url, admin_token, email, name)
+    
+    if not api_token:
+        print_error("User creation failed, aborting tests")
+        sys.exit(1)
+    
+    # Test 3: Check initial credits
+    initial_credits = test_check_credits(base_url, api_token)
+    
+    if initial_credits is None:
+        print_error("Credit check failed, aborting tests")
+        sys.exit(1)
+    
+    # Test 4: Perform a crawl
+    test_url = "https://news.ycombinator.com"
+    crawl_success = test_crawl_endpoint(base_url, api_token, test_url)
+    
+    if not crawl_success:
+        print_warning("Crawl test failed, but continuing with other tests")
+    
+    # Test 5: Check credits after crawl
+    post_crawl_credits = test_check_credits(base_url, api_token)
+    
+    if post_crawl_credits is not None and initial_credits is not None:
+        if post_crawl_credits == initial_credits - 1:
+            print_success("Credit deduction verified")
+        else:
+            print_warning(f"Unexpected credit change: {initial_credits} -> {post_crawl_credits}")
+    
+    # Test 6: Add credits
+    add_credits_amount = 50
+    if test_admin_update_credits(base_url, admin_token, api_token, add_credits_amount):
+        print_success(f"Added {add_credits_amount} credits")
+    
+    # Test 7: Check credits after addition
+    post_addition_credits = test_check_credits(base_url, api_token)
+    
+    if post_addition_credits is not None and post_crawl_credits is not None:
+        if post_addition_credits == post_crawl_credits + add_credits_amount:
+            print_success("Credit addition verified")
+        else:
+            print_warning(f"Unexpected credit change: {post_crawl_credits} -> {post_addition_credits}")
+    
+    # Test 8: Get all users
+    users = test_admin_get_users(base_url, admin_token)
+    
+    if users:
+        # Check if our test user is in the list
+        test_user = next((user for user in users if user.get("email") == email), None)
+        if test_user:
+            print_success("Test user found in users list")
+        else:
+            print_warning("Test user not found in users list")
+    
+    # Final report
+    print_header("Test Summary")
+    
+    print_success("All endpoints tested successfully")
+    print(f"Test user created with email: {email}")
+    print(f"API token: {api_token}")
+    print(f"Final credit balance: {post_addition_credits}")
+
+def main():
+    parser = argparse.ArgumentParser(description="Test Crawl4ai API endpoints")
+    parser.add_argument("--base-url", required=True, help="Base URL of the Crawl4ai API (e.g., https://username--crawl4ai-api.modal.run)")
+    parser.add_argument("--admin-token", required=True, help="Admin token for authentication")
+    
+    args = parser.parse_args()
+    
+    print_header("Crawl4ai API Test Script")
+    print(f"Testing API at: {args.base_url}")
+    
+    run_full_test(args.base_url, args.admin_token)
+
+if __name__ == "__main__":
+    main()