refactor(docker): improve server architecture and configuration

Complete overhaul of Docker deployment setup with improved architecture: - Add Redis integration for task management - Implement rate limiting and security middleware - Add Prometheus metrics and health checks - Improve error handling and logging - Add support for streaming responses - Implement proper configuration management - Add platform-specific optimizations for ARM64/AMD64 BREAKING CHANGE: Docker deployment now requires Redis and new config.yml structure
2025-02-02 20:19:51 +08:00
parent 7b1ef07c41
commit 33a21d6a7a
16 changed files with 1918 additions and 344 deletions
--- a/148
+++ b/148
@@ -1,32 +1,31 @@
-# syntax=docker/dockerfile:1.4
+FROM python:3.10-slim
-ARG TARGETPLATFORM
+# Set build arguments
-ARG BUILDPLATFORM
+ARG APP_HOME=/app
 ARG GITHUB_REPO=https://github.com/yourusername/crawl4ai.git
 ARG GITHUB_BRANCH=main
 ARG USE_LOCAL=true
-# Other build arguments
+ENV PYTHONFAULTHANDLER=1 \
-ARG PYTHON_VERSION=3.10
+    PYTHONHASHSEED=random \
-
+    PYTHONUNBUFFERED=1 \
 # Base stage with system dependencies
 FROM python:${PYTHON_VERSION}-slim as base
 # Declare ARG variables again within the build stage
 ARG INSTALL_TYPE=all
 ARG ENABLE_GPU=false
 # Platform-specific labels
 LABEL maintainer="unclecode"
 LABEL description="🔥🕷️ Crawl4AI: Open-source LLM Friendly Web Crawler & scraper"
 LABEL version="1.0"
 # Environment setup
 ENV PYTHONUNBUFFERED=1 \
    PYTHONDONTWRITEBYTECODE=1 \
    PIP_NO_CACHE_DIR=1 \
    PYTHONDONTWRITEBYTECODE=1 \
    PIP_DISABLE_PIP_VERSION_CHECK=1 \
    PIP_DEFAULT_TIMEOUT=100 \
-    DEBIAN_FRONTEND=noninteractive
+    DEBIAN_FRONTEND=noninteractive \
    REDIS_HOST=localhost \
    REDIS_PORT=6379
 ARG PYTHON_VERSION=3.10
 ARG INSTALL_TYPE=default
 ARG ENABLE_GPU=false
 ARG TARGETARCH
 LABEL maintainer="unclecode"
 LABEL description="🔥🕷️ Crawl4AI: Open-source LLM Friendly Web Crawler & scraper"
 LABEL version="1.0"    
 # Install system dependencies
 RUN apt-get update && apt-get install -y --no-install-recommends \
    build-essential \
    curl \
@@ -37,10 +36,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
    pkg-config \
    python3-dev \
    libjpeg-dev \
-    libpng-dev \
+    redis-server \
    && rm -rf /var/lib/apt/lists/*
 # Playwright system dependencies for Linux
 RUN apt-get update && apt-get install -y --no-install-recommends \
    libglib2.0-0 \
    libnss3 \
@@ -65,8 +63,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
    libatspi2.0-0 \
    && rm -rf /var/lib/apt/lists/*
-# GPU support if enabled and architecture is supported
+RUN if [ "$ENABLE_GPU" = "true" ] && [ "$TARGETARCH" = "amd64" ] ; then \
 RUN if [ "$ENABLE_GPU" = "true" ] && [ "$TARGETPLATFORM" = "linux/amd64" ] ; then \
    apt-get update && apt-get install -y --no-install-recommends \
    nvidia-cuda-toolkit \
    && rm -rf /var/lib/apt/lists/* ; \
@@ -74,19 +71,40 @@ else \
    echo "Skipping NVIDIA CUDA Toolkit installation (unsupported platform or GPU disabled)"; \
 fi
-# Create and set working directory
+RUN if [ "$TARGETARCH" = "arm64" ]; then \
-WORKDIR /app
+    echo "🦾 Installing ARM-specific optimizations"; \
    apt-get update && apt-get install -y --no-install-recommends \
    libopenblas-dev \
    && rm -rf /var/lib/apt/lists/*; \
 elif [ "$TARGETARCH" = "amd64" ]; then \
    echo "🖥️ Installing AMD64-specific optimizations"; \
    apt-get update && apt-get install -y --no-install-recommends \
    libomp-dev \
    && rm -rf /var/lib/apt/lists/*; \
 else \
    echo "Skipping platform-specific optimizations (unsupported platform)"; \
 fi
-# Copy the entire project
+WORKDIR ${APP_HOME}
 COPY . .
-# Install base requirements
+RUN echo '#!/bin/bash\n\
 if [ "$USE_LOCAL" = "true" ]; then\n\
    echo "📦 Installing from local source..."\n\
    pip install --no-cache-dir /tmp/project/\n\
 else\n\
    echo "🌐 Installing from GitHub..."\n\
    for i in {1..3}; do \n\
        git clone --branch ${GITHUB_BRANCH} ${GITHUB_REPO} /tmp/crawl4ai && break || \n\
        { echo "Attempt $i/3 failed! Taking a short break... ☕"; sleep 5; }; \n\
    done\n\
    pip install --no-cache-dir /tmp/crawl4ai\n\
 fi' > /tmp/install.sh && chmod +x /tmp/install.sh
 COPY . /tmp/project/
 COPY deploy/docker/requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 # Install required library for FastAPI
 RUN pip install fastapi uvicorn psutil
 # Install ML dependencies first for better layer caching
 RUN if [ "$INSTALL_TYPE" = "all" ] ; then \
        pip install --no-cache-dir \
            torch \
@@ -99,38 +117,50 @@ RUN if [ "$INSTALL_TYPE" = "all" ] ; then \
        python -m nltk.downloader punkt stopwords ; \
    fi
 # Install the package
 RUN if [ "$INSTALL_TYPE" = "all" ] ; then \
-        pip install ".[all]" && \
+        pip install "/tmp/project/[all]" && \
        python -m crawl4ai.model_loader ; \
    elif [ "$INSTALL_TYPE" = "torch" ] ; then \
-        pip install ".[torch]" ; \
+        pip install "/tmp/project/[torch]" ; \
    elif [ "$INSTALL_TYPE" = "transformer" ] ; then \
-        pip install ".[transformer]" && \
+        pip install "/tmp/project/[transformer]" && \
        python -m crawl4ai.model_loader ; \
    else \
-        pip install "." ; \
+        pip install "/tmp/project" ; \
    fi
 RUN pip install --no-cache-dir --upgrade pip && \
    /tmp/install.sh && \
    python -c "import crawl4ai; print('✅ crawl4ai is ready to rock!')" && \
    python -c "from playwright.sync_api import sync_playwright; print('✅ Playwright is feeling dramatic!')"
 RUN playwright install --with-deps chromium
-    # Install MkDocs and required plugins
+COPY deploy/docker/* ${APP_HOME}/
 RUN pip install --no-cache-dir \
    mkdocs \
    mkdocs-material \
    mkdocs-terminal \
    pymdown-extensions
-# Build MkDocs documentation
+HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
-RUN mkdocs build
+    CMD bash -c '\
    MEM=$(free -m | awk "/^Mem:/{print \$2}"); \
    if [ $MEM -lt 2048 ]; then \
        echo "⚠️ Warning: Less than 2GB RAM available! Your container might need a memory boost! 🚀"; \
        exit 1; \
    fi && \
    redis-cli ping > /dev/null && \
    curl -f http://localhost:8000/health || exit 1'
-# Install Playwright and browsers
+COPY deploy/docker/docker-entrypoint.sh /usr/local/bin/
-RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
+RUN chmod +x /usr/local/bin/docker-entrypoint.sh
    playwright install chromium; \
    elif [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
    playwright install chromium; \
    fi
-# Expose port
+EXPOSE 6379
 EXPOSE 8000 11235 9222 8080
-# Start the FastAPI server
+ENTRYPOINT ["docker-entrypoint.sh"]
-CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "11235"]
+
 CMD service redis-server start && gunicorn \
    --bind 0.0.0.0:8000 \
    --workers 4 \
    --threads 2 \
    --timeout 120 \
    --graceful-timeout 30 \
    --log-level info \
    --worker-class uvicorn.workers.UvicornWorker \
    server:app
--- a/136
+++ b/136
@@ -0,0 +1,136 @@
 # syntax=docker/dockerfile:1.4
 ARG TARGETPLATFORM
 ARG BUILDPLATFORM
 # Other build arguments
 ARG PYTHON_VERSION=3.10
 # Base stage with system dependencies
 FROM python:${PYTHON_VERSION}-slim as base
 # Declare ARG variables again within the build stage
 ARG INSTALL_TYPE=all
 ARG ENABLE_GPU=false
 # Platform-specific labels
 LABEL maintainer="unclecode"
 LABEL description="🔥🕷️ Crawl4AI: Open-source LLM Friendly Web Crawler & scraper"
 LABEL version="1.0"
 # Environment setup
 ENV PYTHONUNBUFFERED=1 \
    PYTHONDONTWRITEBYTECODE=1 \
    PIP_NO_CACHE_DIR=1 \
    PIP_DISABLE_PIP_VERSION_CHECK=1 \
    PIP_DEFAULT_TIMEOUT=100 \
    DEBIAN_FRONTEND=noninteractive
 # Install system dependencies
 RUN apt-get update && apt-get install -y --no-install-recommends \
    build-essential \
    curl \
    wget \
    gnupg \
    git \
    cmake \
    pkg-config \
    python3-dev \
    libjpeg-dev \
    libpng-dev \
    && rm -rf /var/lib/apt/lists/*
 # Playwright system dependencies for Linux
 RUN apt-get update && apt-get install -y --no-install-recommends \
    libglib2.0-0 \
    libnss3 \
    libnspr4 \
    libatk1.0-0 \
    libatk-bridge2.0-0 \
    libcups2 \
    libdrm2 \
    libdbus-1-3 \
    libxcb1 \
    libxkbcommon0 \
    libx11-6 \
    libxcomposite1 \
    libxdamage1 \
    libxext6 \
    libxfixes3 \
    libxrandr2 \
    libgbm1 \
    libpango-1.0-0 \
    libcairo2 \
    libasound2 \
    libatspi2.0-0 \
    && rm -rf /var/lib/apt/lists/*
 # GPU support if enabled and architecture is supported
 RUN if [ "$ENABLE_GPU" = "true" ] && [ "$TARGETPLATFORM" = "linux/amd64" ] ; then \
    apt-get update && apt-get install -y --no-install-recommends \
    nvidia-cuda-toolkit \
    && rm -rf /var/lib/apt/lists/* ; \
 else \
    echo "Skipping NVIDIA CUDA Toolkit installation (unsupported platform or GPU disabled)"; \
 fi
 # Create and set working directory
 WORKDIR /app
 # Copy the entire project
 COPY . .
 # Install base requirements
 RUN pip install --no-cache-dir -r requirements.txt
 # Install required library for FastAPI
 RUN pip install fastapi uvicorn psutil
 # Install ML dependencies first for better layer caching
 RUN if [ "$INSTALL_TYPE" = "all" ] ; then \
        pip install --no-cache-dir \
            torch \
            torchvision \
            torchaudio \
            scikit-learn \
            nltk \
            transformers \
            tokenizers && \
        python -m nltk.downloader punkt stopwords ; \
    fi
 # Install the package
 RUN if [ "$INSTALL_TYPE" = "all" ] ; then \
        pip install ".[all]" && \
        python -m crawl4ai.model_loader ; \
    elif [ "$INSTALL_TYPE" = "torch" ] ; then \
        pip install ".[torch]" ; \
    elif [ "$INSTALL_TYPE" = "transformer" ] ; then \
        pip install ".[transformer]" && \
        python -m crawl4ai.model_loader ; \
    else \
        pip install "." ; \
    fi
    # Install MkDocs and required plugins
 RUN pip install --no-cache-dir \
    mkdocs \
    mkdocs-material \
    mkdocs-terminal \
    pymdown-extensions
 # Build MkDocs documentation
 RUN mkdocs build
 # Install Playwright and browsers
 RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
    playwright install chromium; \
    elif [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
    playwright install chromium; \
    fi
 # Expose port
 EXPOSE 8000 11235 9222 8080
 # Start the FastAPI server
 CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "11235"]
--- a/crawl4ai/content_filter_strategy.py
+++ b/crawl4ai/content_filter_strategy.py
@@ -5,7 +5,7 @@ from typing import List, Tuple, Dict, Optional
 from rank_bm25 import BM25Okapi
 from collections import deque
 from bs4 import NavigableString, Comment
-from .utils import clean_tokens, perform_completion_with_backoff, escape_json_string, sanitize_html, get_home_folder, extract_xml_data
+from .utils import clean_tokens, perform_completion_with_backoff, escape_json_string, sanitize_html, get_home_folder, extract_xml_data, merge_chunks
 from abc import ABC, abstractmethod
 import math
 from snowballstemmer import stemmer
@@ -23,7 +23,14 @@ from colorama import Fore, Style
 class RelevantContentFilter(ABC):
    """Abstract base class for content filtering strategies"""
-    def __init__(self, user_query: str = None):
+    def __init__(self, user_query: str = None, verbose: bool = False, logger: Optional[AsyncLogger] = None):
        """
        Initializes the RelevantContentFilter class with optional user query.
        Args:
            user_query (str): User query for filtering (optional).
            verbose (bool): Enable verbose logging (default: False).
        """
        self.user_query = user_query
        self.included_tags = {
            # Primary structure
@@ -92,6 +99,8 @@ class RelevantContentFilter(ABC):
            r"nav|footer|header|sidebar|ads|comment|promo|advert|social|share", re.I
        )
        self.min_word_count = 2
        self.verbose = False
        self.logger = logger
    @abstractmethod
    def filter_content(self, html: str) -> List[str]:
@@ -755,8 +764,11 @@ class LLMContentFilter(RelevantContentFilter):
        base_url: Optional[str] = None,
        api_base: Optional[str] = None,
        extra_args: Dict = None,
        # char_token_rate: float = WORD_TOKEN_RATE * 5,
        # chunk_mode: str = "char",
        verbose: bool = False,
        logger: Optional[AsyncLogger] = None,
        ignore_cache: bool = False,
    ):
        super().__init__(None)
        self.provider = provider
@@ -768,10 +780,15 @@ class LLMContentFilter(RelevantContentFilter):
        self.instruction = instruction
        self.chunk_token_threshold = chunk_token_threshold
        self.overlap_rate = overlap_rate
-        self.word_token_rate = word_token_rate
+        self.word_token_rate = word_token_rate or WORD_TOKEN_RATE
        # self.chunk_mode: str = chunk_mode
        # self.char_token_rate = char_token_rate or word_token_rate / 5
        # self.token_rate = word_token_rate if chunk_mode == "word" else self.char_token_rate
        self.token_rate = word_token_rate or WORD_TOKEN_RATE
        self.base_url = base_url
        self.api_base = api_base or base_url
        self.extra_args = extra_args or {}
        self.ignore_cache = ignore_cache
        self.verbose = verbose
        # Setup logger with custom styling for LLM operations
@@ -779,7 +796,7 @@ class LLMContentFilter(RelevantContentFilter):
            self.logger = logger
        elif verbose:
            self.logger = AsyncLogger(
-                verbose=True,
+                verbose=verbose,
                icons={
                    **AsyncLogger.DEFAULT_ICONS,
                    "LLM": "★",  # Star for LLM operations
@@ -803,45 +820,25 @@ class LLMContentFilter(RelevantContentFilter):
        return hashlib.md5(content.encode()).hexdigest()
    def _merge_chunks(self, text: str) -> List[str]:
-        """Split text into chunks with overlap"""
+        """Split text into chunks with overlap using char or word mode."""
-        # Calculate tokens and sections
+        ov = int(self.chunk_token_threshold * self.overlap_rate)
-        total_tokens = len(text.split()) * self.word_token_rate
+        sections = merge_chunks(
-        num_sections = max(1, math.floor(total_tokens / self.chunk_token_threshold))
+            docs = [text],
-        adjusted_chunk_threshold = total_tokens / num_sections
+            target_size= self.chunk_token_threshold,
            overlap=ov,
            word_token_ratio=self.word_token_rate
        )
        return sections
-        # Split into words
+    def filter_content(self, html: str, ignore_cache: bool = True) -> List[str]:
        words = text.split()
        chunks = []
        current_chunk = []
        current_token_count = 0
        for word in words:
            word_tokens = len(word) * self.word_token_rate
            if current_token_count + word_tokens <= adjusted_chunk_threshold:
                current_chunk.append(word)
                current_token_count += word_tokens
            else:
                # Add overlap if not the last chunk
                if chunks and self.overlap_rate > 0:
                    overlap_size = int(len(current_chunk) * self.overlap_rate)
                    current_chunk.extend(current_chunk[-overlap_size:])
                chunks.append(" ".join(current_chunk))
                current_chunk = [word]
                current_token_count = word_tokens
        if current_chunk:
            chunks.append(" ".join(current_chunk))
        return chunks
    def filter_content(self, html: str, ignore_cache: bool = False) -> List[str]:
        if not html or not isinstance(html, str):
            return []
        if self.logger:
            self.logger.info(
-                "Starting LLM content filtering process", 
+                "Starting LLM markdown content filtering process", 
                tag="LLM",
                params={"provider": self.provider},
                colors={"provider": Fore.CYAN}
@@ -853,9 +850,12 @@ class LLMContentFilter(RelevantContentFilter):
        cache_key = self._get_cache_key(html, self.instruction or "")
        cache_file = cache_dir / f"{cache_key}.json"
        # if ignore_cache == None:
        ignore_cache = self.ignore_cache
        if not ignore_cache and cache_file.exists():
            if self.logger:
-                self.logger.info("Found cached result", tag="CACHE")
+                self.logger.info("Found  cached markdown result", tag="CACHE")
            try:
                with cache_file.open('r') as f:
                    cached_data = json.load(f)
@@ -867,13 +867,13 @@ class LLMContentFilter(RelevantContentFilter):
                    return cached_data['blocks']
            except Exception as e:
                if self.logger:
-                    self.logger.error(f"Cache read error: {str(e)}", tag="CACHE")
+                    self.logger.error(f"LLM markdown: Cache read error: {str(e)}", tag="CACHE")
        # Split into chunks
        html_chunks = self._merge_chunks(html)
        if self.logger:
            self.logger.info(
-                "Split content into {chunk_count} chunks", 
+                "LLM markdown: Split content into {chunk_count} chunks", 
                tag="CHUNK",
                params={"chunk_count": len(html_chunks)},
                colors={"chunk_count": Fore.YELLOW}
@@ -887,7 +887,7 @@ class LLMContentFilter(RelevantContentFilter):
            for i, chunk in enumerate(html_chunks):
                if self.logger:
                    self.logger.debug(
-                        "Processing chunk {chunk_num}/{total_chunks}", 
+                        "LLM markdown: Processing chunk {chunk_num}/{total_chunks}", 
                        tag="CHUNK",
                        params={
                            "chunk_num": i + 1,
@@ -904,16 +904,38 @@ class LLMContentFilter(RelevantContentFilter):
                for var, value in prompt_variables.items():
                    prompt = prompt.replace("{" + var + "}", value)
                def _proceed_with_chunk(
                        provider: str,
                        prompt: str,
                        api_token: str,
                        base_url: Optional[str] = None,
                        extra_args: Dict = {}
                    ) -> List[str]:
                    if self.logger:
                        self.logger.info(
                            "LLM Markdown: Processing chunk {chunk_num}", 
                            tag="CHUNK",
                            params={"chunk_num": i + 1}
                        )
                    return perform_completion_with_backoff(
                        provider,
                        prompt,
                        api_token,
                        base_url=base_url,
                        extra_args=extra_args
                    )
                future = executor.submit(
-                    perform_completion_with_backoff,
+                    _proceed_with_chunk,
                    self.provider,
                    prompt,
                    self.api_token,
-                    base_url=self.api_base,
+                    self.api_base,
-                    extra_args=self.extra_args
+                    self.extra_args
                )
                futures.append((i, future))
            # Collect results in order
            ordered_results = []
            for i, future in sorted(futures):
@@ -940,14 +962,14 @@ class LLMContentFilter(RelevantContentFilter):
                        ordered_results.append(blocks)
                        if self.logger:
                            self.logger.success(
-                                "Successfully processed chunk {chunk_num}", 
+                                "LLM markdown: Successfully processed chunk {chunk_num}", 
                                tag="CHUNK",
                                params={"chunk_num": i + 1}
                            )
                except Exception as e:
                    if self.logger:
                        self.logger.error(
-                            "Error processing chunk {chunk_num}: {error}", 
+                            "LLM markdown: Error processing chunk {chunk_num}: {error}", 
                            tag="CHUNK",
                            params={
                                "chunk_num": i + 1,
@@ -958,7 +980,7 @@ class LLMContentFilter(RelevantContentFilter):
        end_time = time.time()
        if self.logger:
            self.logger.success(
-                "Completed processing in {time:.2f}s", 
+                "LLM markdown: Completed processing in {time:.2f}s", 
                tag="LLM",
                params={"time": end_time - start_time},
                colors={"time": Fore.YELLOW}
--- a/crawl4ai/extraction_strategy.py
+++ b/crawl4ai/extraction_strategy.py
@@ -21,6 +21,9 @@ from .utils import (
    extract_xml_data,
    split_and_parse_json_objects,
    sanitize_input_encode,
    chunk_documents,
    merge_chunks,
    advanced_split,
 )
 from .models import * # noqa: F403
@@ -501,6 +504,10 @@ class LLMExtractionStrategy(ExtractionStrategy):
        instruction: str = None,
        schema: Dict = None,
        extraction_type="block",
        chunk_token_threshold=CHUNK_TOKEN_THRESHOLD,
        overlap_rate=OVERLAP_RATE,
        word_token_rate=WORD_TOKEN_RATE,
        apply_chunking=True,
        **kwargs,
    ):
        """
@@ -652,53 +659,16 @@ class LLMExtractionStrategy(ExtractionStrategy):
            )
        return blocks
-    def _merge(self, documents, chunk_token_threshold, overlap):
+    def _merge(self, documents, chunk_token_threshold, overlap) -> List[str]:
        """
        Merge documents into sections based on chunk_token_threshold and overlap.
        """
-        # chunks = []
+        sections =  merge_chunks(
-        sections = []
+            docs = documents,
-        total_tokens = 0
+            target_size= chunk_token_threshold,
-
+            overlap=overlap,
-        # Calculate the total tokens across all documents
+            word_token_ratio=self.word_token_rate
-        for document in documents:
+        )
            total_tokens += len(document.split(" ")) * self.word_token_rate
        # Calculate the number of sections needed
        num_sections = math.floor(total_tokens / chunk_token_threshold)
        if num_sections < 1:
            num_sections = 1  # Ensure there is at least one section
        adjusted_chunk_threshold = total_tokens / num_sections
        total_token_so_far = 0
        current_chunk = []
        for document in documents:
            tokens = document.split(" ")
            token_count = len(tokens) * self.word_token_rate
            if total_token_so_far + token_count <= adjusted_chunk_threshold:
                current_chunk.extend(tokens)
                total_token_so_far += token_count
            else:
                # Ensure to handle the last section properly
                if len(sections) == num_sections - 1:
                    current_chunk.extend(tokens)
                    continue
                # Add overlap if specified
                if overlap > 0 and current_chunk:
                    overlap_tokens = current_chunk[-overlap:]
                    current_chunk.extend(overlap_tokens)
                sections.append(" ".join(current_chunk))
                current_chunk = tokens
                total_token_so_far = token_count
        # Add the last chunk
        if current_chunk:
            sections.append(" ".join(current_chunk))
        return sections
    def run(self, url: str, sections: List[str]) -> List[Dict[str, Any]]:
--- a/crawl4ai/markdown_generation_strategy.py
+++ b/crawl4ai/markdown_generation_strategy.py
@@ -1,4 +1,5 @@
 from abc import ABC, abstractmethod
 from tabnanny import verbose
 from typing import Optional, Dict, Any, Tuple
 from .models import MarkdownGenerationResult
 from .html2text import CustomHTML2Text
@@ -29,9 +30,11 @@ class MarkdownGenerationStrategy(ABC):
        self,
        content_filter: Optional[RelevantContentFilter] = None,
        options: Optional[Dict[str, Any]] = None,
        verbose: bool = False,
    ):
        self.content_filter = content_filter
        self.options = options or {}
        self.verbose = verbose
    @abstractmethod
    def generate_markdown(
--- a/crawl4ai/prompts.py
+++ b/crawl4ai/prompts.py
@@ -206,17 +206,6 @@ Output the final list of JSON objects, wrapped in <blocks>...</blocks> XML tags.
 PROMPT_FILTER_CONTENT = """Your task is to filter and convert HTML content into clean, focused markdown that's optimized for use with LLMs and information retrieval systems.
 INPUT HTML: 
 <|HTML_CONTENT_START|>
 {HTML}
 <|HTML_CONTENT_END|>
 SPECIFIC INSTRUCTION: 
 <|USER_INSTRUCTION_START|>
 {REQUEST}
 <|USER_INSTRUCTION_END|>
 TASK DETAILS:
 1. Content Selection
 - DO: Keep essential information, main content, key details
@@ -240,15 +229,7 @@ TASK DETAILS:
 - DON'T: Fragment related content
 - DON'T: Duplicate information
-Example Input:
+IMPORTANT: If user specific instruction is provided, ignore above guideline and prioritize those requirements over these general guidelines.
 <div class="main-content"><h1>Setup Guide</h1><p>Follow these steps...</p></div>
 <div class="sidebar">Related articles...</div>
 Example Output:
 # Setup Guide
 Follow these steps...
 IMPORTANT: If specific instruction is provided above, prioritize those requirements over these general guidelines.
 OUTPUT FORMAT: 
 Wrap your response in <content> tags. Use proper markdown throughout.
@@ -256,7 +237,18 @@ Wrap your response in <content> tags. Use proper markdown throughout.
 [Your markdown content here]
 </content>
-Begin filtering now."""
+Begin filtering now.
 --------------------------------------------
 <|HTML_CONTENT_START|>
 {HTML}
 <|HTML_CONTENT_END|>
 <|USER_INSTRUCTION_START|>
 {REQUEST}
 <|USER_INSTRUCTION_END|>
 """
 JSON_SCHEMA_BUILDER= """
 # HTML Schema Generation Instructions
--- a/crawl4ai/utils.py
+++ b/crawl4ai/utils.py
@@ -1,3 +1,4 @@
 from ast import Call
 import time
 from urllib.parse import urlparse
 from concurrent.futures import ThreadPoolExecutor, as_completed
@@ -8,9 +9,10 @@ import re
 import os
 import platform
 from .prompts import PROMPT_EXTRACT_BLOCKS
 from array import array
 from .config import *
 from pathlib import Path
-from typing import Dict, Any
+from typing import Dict, Any, List, Tuple, Union, Optional, Callable
 from urllib.parse import urljoin
 import requests
 from requests.exceptions import InvalidSchema
@@ -31,6 +33,154 @@ import aiohttp
 from pathlib import Path
 from packaging import version
 from . import __version__
 from typing import Sequence, List
 from array import array
 from itertools import chain
 from collections import deque
 from typing import Callable, Generator, Iterable, List, Optional
 def chunk_documents(
    documents: Iterable[str],
    chunk_token_threshold: int,
    overlap: int,
    word_token_rate: float = 0.75,
    tokenizer: Optional[Callable[[str], List[str]]] = None,
 ) -> Generator[str, None, None]:
    """
    Efficiently chunks documents into token-limited sections with overlap between chunks.
    Args:
        documents: Iterable of document strings
        chunk_token_threshold: Maximum tokens per chunk
        overlap: Number of tokens to overlap between chunks
        word_token_rate: Token estimate per word when not using a tokenizer
        tokenizer: Function that splits text into tokens (if available)
    Yields:
        Text chunks as strings
    """
    token_queue = deque()
    contribution_queue = deque()
    current_token_count = 0.0
    for doc in documents:
        # Tokenize document
        if tokenizer:
            tokens = tokenizer(doc)
            contributions = [1.0] * len(tokens)
        else:
            tokens = doc.split()
            contributions = [word_token_rate] * len(tokens)
        # Add to processing queues
        token_queue.extend(tokens)
        contribution_queue.extend(contributions)
        current_token_count += sum(contributions)
        # Process full chunks
        while current_token_count >= chunk_token_threshold:
            # Find chunk split point
            chunk_tokens = []
            chunk_contrib = []
            chunk_total = 0.0
            # Build chunk up to threshold
            while contribution_queue:
                next_contrib = contribution_queue[0]
                if chunk_total + next_contrib > chunk_token_threshold:
                    break
                chunk_total += next_contrib
                chunk_contrib.append(contribution_queue.popleft())
                chunk_tokens.append(token_queue.popleft())
            # Handle edge case where first token exceeds threshold
            if not chunk_contrib:  # Single token exceeds threshold
                chunk_contrib.append(contribution_queue.popleft())
                chunk_tokens.append(token_queue.popleft())
            # Calculate overlap
            overlap_total = 0.0
            overlap_idx = 0
            for contrib in reversed(chunk_contrib):
                if overlap_total + contrib > overlap:
                    break
                overlap_total += contrib
                overlap_idx += 1
            # Prepend overlap to queues
            if overlap_idx > 0:
                overlap_tokens = chunk_tokens[-overlap_idx:]
                overlap_contrib = chunk_contrib[-overlap_idx:]
                token_queue.extendleft(reversed(overlap_tokens))
                contribution_queue.extendleft(reversed(overlap_contrib))
                current_token_count += overlap_total
            # Update current token count and yield chunk
            current_token_count -= sum(chunk_contrib)
            yield " ".join(chunk_tokens[:len(chunk_tokens)-overlap_idx] if overlap_idx else chunk_tokens)
    # Yield remaining tokens
    if token_queue:
        yield " ".join(token_queue)
 def merge_chunks(
    docs: Sequence[str], 
    target_size: int,
    overlap: int = 0,
    word_token_ratio: float = 1.0,
    splitter: Callable = None
 ) -> List[str]:
    """Merges documents into chunks of specified token size.
    Args:
        docs: Input documents
        target_size: Desired token count per chunk
        overlap: Number of tokens to overlap between chunks
        word_token_ratio: Multiplier for word->token conversion
    """
    # Pre-tokenize all docs and store token counts
    splitter = splitter or str.split
    token_counts = array('I')
    all_tokens: List[List[str]] = []
    total_tokens = 0
    for doc in docs:
        tokens = doc.split()
        count = int(len(tokens) * word_token_ratio)
        if count:  # Skip empty docs
            token_counts.append(count)
            all_tokens.append(tokens)
            total_tokens += count
    if not total_tokens:
        return []
    # Pre-allocate chunks
    num_chunks = max(1, (total_tokens + target_size - 1) // target_size)
    chunks: List[List[str]] = [[] for _ in range(num_chunks)]
    curr_chunk = 0
    curr_size = 0
    # Distribute tokens
    for tokens in chain.from_iterable(all_tokens):
        if curr_size >= target_size and curr_chunk < num_chunks - 1:
            if overlap > 0:
                overlap_tokens = chunks[curr_chunk][-overlap:]
                curr_chunk += 1
                chunks[curr_chunk].extend(overlap_tokens)
                curr_size = len(overlap_tokens)
            else:
                curr_chunk += 1
                curr_size = 0
        chunks[curr_chunk].append(tokens)
        curr_size += 1
    # Return only non-empty chunks
    return [' '.join(chunk) for chunk in chunks if chunk]
 class VersionManager:
@@ -189,6 +339,77 @@ class InvalidCSSSelectorError(Exception):
    pass
 SPLITS = bytearray([
    # Control chars (0-31) + space (32)
    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
    # Special chars (33-47): ! " # $ % & ' ( ) * + , - . /
    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
    # Numbers (48-57): Treat as non-splits
    0,0,0,0,0,0,0,0,0,0,
    # More special chars (58-64): : ; < = > ? @
    1,1,1,1,1,1,1,
    # Uppercase (65-90): Keep
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    # More special chars (91-96): [ \ ] ^ _ `
    1,1,1,1,1,1,
    # Lowercase (97-122): Keep
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    # Special chars (123-126): { | } ~
    1,1,1,1,
    # Extended ASCII
    *([1] * 128)
 ])
 # Additional split chars for HTML/code
 HTML_CODE_CHARS = {
    # HTML specific
    '•', '►', '▼', '©', '®', '™', '→', '⇒', '≈', '≤', '≥',
    # Programming symbols  
    '+=', '-=', '*=', '/=', '=>', '<=>', '!=', '==', '===',
    '++', '--', '<<', '>>', '&&', '||', '??', '?:', '?.', 
    # Common Unicode
    '…', '"', '"', ''', ''', '«', '»', '—', '–',
    # Additional splits
    '+', '=', '~', '@', '#', '$', '%', '^', '&', '*',
    '(', ')', '{', '}', '[', ']', '|', '\\', '/', '`',
    '<', '>', ',', '.', '?', '!', ':', ';', '-', '_'
 }
 def advanced_split(text: str) -> list[str]:
    result = []
    word = array('u')
    i = 0
    text_len = len(text)
    while i < text_len:
        char = text[i]
        o = ord(char)
        # Fast path for ASCII
        if o < 256 and SPLITS[o]:
            if word:
                result.append(word.tounicode())
                word = array('u')
        # Check for multi-char symbols
        elif i < text_len - 1:
            two_chars = char + text[i + 1]
            if two_chars in HTML_CODE_CHARS:
                if word:
                    result.append(word.tounicode())
                    word = array('u')
                i += 1  # Skip next char since we used it
            else:
                word.append(char)
        else:
            word.append(char)
        i += 1
    if word:
        result.append(word.tounicode())
    return result
 def create_box_message(
    message: str,
    type: str = "info",
--- a/deploy/docker/Dockerfile.bak
+++ b/deploy/docker/Dockerfile.bak
--- a/deploy/docker/README.md
+++ b/deploy/docker/README.md
@@ -1,113 +1,764 @@
-# Crawl4AI Docker Setup
+# Crawl4AI Docker Guide 🐳
-## Quick Start
+## Table of Contents
-1. Build the Docker image:
+- [Prerequisites](#prerequisites)
-   ```bash
+- [Installation](#installation)
-   docker build -t crawl4ai-server:prod .
+  - [Local Build](#local-build)
-   ```
+  - [Docker Hub](#docker-hub)
 - [Dockerfile Parameters](#dockerfile-parameters)
 - [Using the API](#using-the-api)
  - [Understanding Request Schema](#understanding-request-schema)
  - [REST API Examples](#rest-api-examples)
  - [Python SDK](#python-sdk)
 - [Metrics & Monitoring](#metrics--monitoring)
 - [Deployment Scenarios](#deployment-scenarios)
 - [Complete Examples](#complete-examples)
 - [Getting Help](#getting-help)
-2. Run the container:
+## Prerequisites
   ```bash
   docker run -d -p 8000:8000 \
     --env-file .llm.env \
     --name crawl4ai \
     crawl4ai-server:prod
   ```
---
+Before we dive in, make sure you have:
 - Docker installed and running (version 20.10.0 or higher)
 - At least 4GB of RAM available for the container
 - Python 3.10+ (if using the Python SDK)
 - Node.js 16+ (if using the Node.js examples)
-## Configuration Options
+> 💡 **Pro tip**: Run `docker info` to check your Docker installation and available resources.
 ## Installation
 ### Local Build
 Let's get your local environment set up step by step!
 #### 1. Building the Image
 First, clone the repository and build the Docker image:
 ### 1. **Using .llm.env File**
 Create a `.llm.env` file with your API keys:
 ```bash
-OPENAI_API_KEY=sk-your-key
+# Clone the repository
-DEEPSEEK_API_KEY=your-deepseek-key
+git clone https://github.com/unclecode/crawl4ai.git
 cd crawl4ai
 # Build the Docker image
 docker build -t crawl4ai-server:prod \
  --build-arg PYTHON_VERSION=3.10 \
  --build-arg INSTALL_TYPE=all \
  --build-arg ENABLE_GPU=false \
  deploy/docker/
 ```
-Run with:
+#### 2. Environment Setup
 If you plan to use LLMs (Language Models), you'll need to set up your API keys. Create a `.llm.env` file:
 ```env
 # OpenAI
 OPENAI_API_KEY=sk-your-key
 # Anthropic
 ANTHROPIC_API_KEY=your-anthropic-key
 # DeepSeek
 DEEPSEEK_API_KEY=your-deepseek-key
 # Check out https://docs.litellm.ai/docs/providers for more providers!
 ```
 > 🔑 **Note**: Keep your API keys secure! Never commit them to version control.
 #### 3. Running the Container
 You have several options for running the container:
 Basic run (no LLM support):
 ```bash
 docker run -d -p 8000:8000 --name crawl4ai crawl4ai-server:prod
 ```
 With LLM support:
 ```bash
 docker run -d -p 8000:8000 \
  --env-file .llm.env \
  --name crawl4ai \
  crawl4ai-server:prod
 ```
-### 2. **Direct Environment Variables**
+Using host environment variables (Not a good practice, but works for local testing):
 Pass keys directly:
 ```bash
 docker run -d -p 8000:8000 \
-  -e OPENAI_API_KEY="sk-your-key" \
+  --env-file .llm.env \
-  -e DEEPSEEK_API_KEY="your-deepseek-key" \
+  --env-from "$(env)" \
  --name crawl4ai \
  crawl4ai-server:prod
 ```
-### 3. **Copy Host Environment Variables**
+### More on Building
-Use the `--copy-env` flag to copy `.llm.env` from the host:
+
 You have several options for building the Docker image based on your needs:
 #### Basic Build
 ```bash
-docker run -d -p 8000:8000 \
+# Clone the repository
-  --copy-env \
+git clone https://github.com/unclecode/crawl4ai.git
-  crawl4ai-server:prod
+cd crawl4ai
 # Simple build with defaults
 docker build -t crawl4ai-server:prod deploy/docker/
 ```
-### 4. **Advanced: Docker Compose**
+#### Advanced Build Options
 Create a `docker-compose.yml`:
 ```yaml
 version: '3.8'
 services:
  crawl4ai:
    image: crawl4ai-server:prod
    ports:
      - "8000:8000"
    env_file:
      - .llm.env
    restart: unless-stopped
 ```
 Run with:
 ```bash
-docker-compose up -d
+# Build with custom parameters
 docker build -t crawl4ai-server:prod \
  --build-arg PYTHON_VERSION=3.10 \
  --build-arg INSTALL_TYPE=all \
  --build-arg ENABLE_GPU=false \
  deploy/docker/
 ```
---
+#### Platform-Specific Builds
 The Dockerfile includes optimizations for different architectures (ARM64 and AMD64). Docker automatically detects your platform, but you can specify it explicitly:
-## Supported Environment Variables
+```bash
-| Variable               | Description                          |
+# Build for ARM64
-|------------------------|--------------------------------------|
+docker build --platform linux/arm64 -t crawl4ai-server:arm64 deploy/docker/
 | `OPENAI_API_KEY`       | OpenAI API key                       |
 | `DEEPSEEK_API_KEY`     | DeepSeek API key                     |
 | `ANTHROPIC_API_KEY`    | Anthropic API key                    |
 | `GROQ_API_KEY`         | Groq API key                         |
 | `TOGETHER_API_KEY`     | Together API key                     |
 | `LLAMA_CLOUD_API_KEY`  | Llama Cloud API key                  |
 | `COHERE_API_KEY`       | Cohere API key                       |
 | `MISTRAL_API_KEY`      | Mistral API key                      |
 | `PERPLEXITY_API_KEY`   | Perplexity API key                   |
 | `VERTEXAI_PROJECT_ID`  | Google Vertex AI project ID          |
 | `VERTEXAI_LOCATION`    | Google Vertex AI location            |
---
+# Build for AMD64
 docker build --platform linux/amd64 -t crawl4ai-server:amd64 deploy/docker/
 ```
-## Healthcheck
+#### Multi-Platform Build
-The container includes a healthcheck:
+For distributing your image across different architectures, use `buildx`:
 ```bash
 # Set up buildx builder
 docker buildx create --use
 # Build for multiple platforms
 docker buildx build \
  --platform linux/amd64,linux/arm64 \
  -t yourusername/crawl4ai-server:multi \
  --push \
  deploy/docker/
 ```
 > 💡 **Note**: Multi-platform builds require Docker Buildx and need to be pushed to a registry.
 #### Development Build
 For development, you might want to enable all features:
 ```bash
 docker build -t crawl4ai-server:dev \
  --build-arg INSTALL_TYPE=all \
  --build-arg PYTHON_VERSION=3.10 \
  --build-arg ENABLE_GPU=true \
  deploy/docker/
 ```
 #### GPU-Enabled Build
 If you plan to use GPU acceleration:
 ```bash
 docker build -t crawl4ai-server:gpu \
  --build-arg ENABLE_GPU=true \
  deploy/docker/
 ```
 ### Build Arguments Explained
 | Argument | Description | Default | Options |
 |----------|-------------|---------|----------|
 | PYTHON_VERSION | Python version | 3.10 | 3.8, 3.9, 3.10 |
 | INSTALL_TYPE | Feature set | default | default, all, torch, transformer |
 | ENABLE_GPU | GPU support | false | true, false |
 | APP_HOME | Install path | /app | any valid path |
 ### Build Best Practices
 1. **Choose the Right Install Type**
   - `default`: Basic installation, smallest image, to be honest, I use this most of the time.
   - `all`: Full features, larger image (include transformer, and nltk, make sure you really need them)
 2. **Platform Considerations**
   - Let Docker auto-detect platform unless you need cross-compilation
   - Use --platform for specific architecture requirements
   - Consider buildx for multi-architecture distribution
 3. **Development vs Production**
   - Use `INSTALL_TYPE=all` for development
   - Stick to `default` for production if you don't need extra features
   - Enable GPU only if you have compatible hardware
 4. **Performance Optimization**
   - The image automatically includes platform-specific optimizations
   - AMD64 gets OpenMP optimizations
   - ARM64 gets OpenBLAS optimizations
 ### Docker Hub
 > 🚧 Coming soon! The image will be available at `crawl4ai/server`. Stay tuned!
 ## Dockerfile Parameters
 Configure your build with these parameters:
 | Parameter | Description | Default | Options |
 |-----------|-------------|---------|----------|
 | PYTHON_VERSION | Python version to use | 3.10 | 3.8, 3.9, 3.10 |
 | INSTALL_TYPE | Installation profile | default | default, all, torch, transformer |
 | ENABLE_GPU | Enable GPU support | false | true, false |
 | APP_HOME | Application directory | /app | any valid path |
 | TARGETARCH | Target architecture | auto-detected | amd64, arm64 |
 ## Using the API
 ### Understanding Request Schema
 This is super important! The API expects a specific structure that matches our Python classes. Let me show you how it works.
 #### The Magic of Type Matching
 When you send a request, each configuration object needs a "type" field that matches the exact class name from the library. Here's an example:
 ```python
 # First, let's create objects the normal way
 from crawl4ai import BrowserConfig, CrawlerRunConfig, PruningContentFilter
 # Create some config objects
 browser_config = BrowserConfig(headless=True, viewport={"width": 1200, "height": 800})
 content_filter = PruningContentFilter(threshold=0.48, threshold_type="fixed")
 # Use dump() to see the serialized format
 print(browser_config.dump())
 ```
 This will output something like:
 ```json
 {
    "type": "BrowserConfig",
    "params": {
        "headless": true,
        "viewport": {
            "width": 1200,
            "height": 800
        }
    }
 }
 ```
 #### Making API Requests
 So when making a request, your JSON should look like this:
 ```json
 {
    "urls": ["https://example.com"],
    "browser_config": {
        "type": "BrowserConfig",
        "params": {
            "headless": true,
            "viewport": {"width": 1200, "height": 800}
        }
    },
    "crawler_config": {
        "type": "CrawlerRunConfig",
        "params": {
            "cache_mode": "bypass",
            "markdown_generator": {
                "type": "DefaultMarkdownGenerator",
                "params": {
                    "content_filter": {
                        "type": "PruningContentFilter",
                        "params": {
                            "threshold": 0.48,
                            "threshold_type": "fixed",
                            "min_word_threshold": 0
                        }
                    }
                }
            }
        }
    }
 }
 ```
 > 💡 **Pro tip**: Look at the class names in the library documentation - they map directly to the "type" fields in your requests!
 ### REST API Examples
 Let's look at some practical examples:
 #### Simple Crawl
 ```python
 import requests
 response = requests.post(
    "http://localhost:8000/crawl",
    json={
        "urls": ["https://example.com"],
        "browser_config": {
            "type": "BrowserConfig",
            "params": {"headless": True}
        }
    }
 )
 print(response.json())
 ```
 #### Streaming Results
 ```python
 import requests
 response = requests.post(
    "http://localhost:8000/crawl",
    json={
        "urls": ["https://example.com"],
        "crawler_config": {
            "type": "CrawlerRunConfig",
            "params": {"stream": True}
        }
    },
    stream=True
 )
 for line in response.iter_lines():
    if line:
        print(line.decode())
 ```
 ### Python SDK
 The SDK makes things even easier! Here's how to use it:
 ```python
 from crawl4ai.docker_client import Crawl4aiDockerClient
 from crawl4ai import BrowserConfig, CrawlerRunConfig
 async with Crawl4aiDockerClient() as client:
    # The SDK handles serialization for you!
    result = await client.crawl(
        urls=["https://example.com"],
        browser_config=BrowserConfig(headless=True),
        crawler_config=CrawlerRunConfig(stream=False)
    )
    print(result.markdown)
 ```
 ## Metrics & Monitoring
 Keep an eye on your crawler with these endpoints:
 - `/health` - Quick health check
 - `/metrics` - Detailed Prometheus metrics
 - `/schema` - Full API schema
 Example health check:
 ```bash
 curl http://localhost:8000/health
 ```
---
+## Deployment Scenarios
-## Troubleshooting
+> 🚧 Coming soon! We'll cover:
-1. **Missing Keys**: Ensure all required keys are set in `.llm.env`.
+> - Kubernetes deployment
-2. **Permissions**: Run `chmod +x docker-entrypoint.sh` if permissions are denied.
+> - Cloud provider setups (AWS, GCP, Azure)
-3. **Logs**: Check logs with:
+> - High-availability configurations
-   ```bash
+> - Load balancing strategies
-   docker logs crawl4ai
+
 ## Complete Examples
 Check out the `examples` folder in our repository for full working examples! Here's one to get you started:
 ```python
 import requests
 import time
 import httpx
 import asyncio
 from typing import Dict, Any
 from crawl4ai import (
    BrowserConfig, CrawlerRunConfig, DefaultMarkdownGenerator,
    PruningContentFilter, JsonCssExtractionStrategy, LLMContentFilter, CacheMode
 )
 from crawl4ai.docker_client import Crawl4aiDockerClient
 class Crawl4AiTester:
    def __init__(self, base_url: str = "http://localhost:11235"):
        self.base_url = base_url
    def submit_and_wait(
        self, request_data: Dict[str, Any], timeout: int = 300
    ) -> Dict[str, Any]:
        # Submit crawl job
        response = requests.post(f"{self.base_url}/crawl", json=request_data)
        task_id = response.json()["task_id"]
        print(f"Task ID: {task_id}")
        # Poll for result
        start_time = time.time()
        while True:
            if time.time() - start_time > timeout:
                raise TimeoutError(
                    f"Task {task_id} did not complete within {timeout} seconds"
                )
            result = requests.get(f"{self.base_url}/task/{task_id}")
            status = result.json()
            if status["status"] == "failed":
                print("Task failed:", status.get("error"))
                raise Exception(f"Task failed: {status.get('error')}")
            if status["status"] == "completed":
                return status
            time.sleep(2)
 async def test_direct_api():
    """Test direct API endpoints without using the client SDK"""
    print("\n=== Testing Direct API Calls ===")
    # Test 1: Basic crawl with content filtering
    browser_config = BrowserConfig(
        headless=True,
        viewport_width=1200,
        viewport_height=800
    )
    crawler_config = CrawlerRunConfig(
        cache_mode=CacheMode.BYPASS,
        markdown_generator=DefaultMarkdownGenerator(
            content_filter=PruningContentFilter(
                threshold=0.48,
                threshold_type="fixed",
                min_word_threshold=0
            ),
            options={"ignore_links": True}
        )
    )
    request_data = {
        "urls": ["https://example.com"],
        "browser_config": browser_config.dump(),
        "crawler_config": crawler_config.dump()
    }
    # Make direct API call
    async with httpx.AsyncClient() as client:
        response = await client.post(
            "http://localhost:8000/crawl",
            json=request_data,
            timeout=300
        )
        assert response.status_code == 200
        result = response.json()
        print("Basic crawl result:", result["success"])
    # Test 2: Structured extraction with JSON CSS
    schema = {
        "baseSelector": "article.post",
        "fields": [
            {"name": "title", "selector": "h1", "type": "text"},
            {"name": "content", "selector": ".content", "type": "html"}
        ]
    }
    crawler_config = CrawlerRunConfig(
        cache_mode=CacheMode.BYPASS,
        extraction_strategy=JsonCssExtractionStrategy(schema=schema)
    )
    request_data["crawler_config"] = crawler_config.dump()
    async with httpx.AsyncClient() as client:
        response = await client.post(
            "http://localhost:8000/crawl",
            json=request_data
        )
        assert response.status_code == 200
        result = response.json()
        print("Structured extraction result:", result["success"])
    # Test 3: Get schema
    # async with httpx.AsyncClient() as client:
    #     response = await client.get("http://localhost:8000/schema")
    #     assert response.status_code == 200
    #     schemas = response.json()
    #     print("Retrieved schemas for:", list(schemas.keys()))
 async def test_with_client():
    """Test using the Crawl4AI Docker client SDK"""
    print("\n=== Testing Client SDK ===")
    async with Crawl4aiDockerClient(verbose=True) as client:
        # Test 1: Basic crawl
        browser_config = BrowserConfig(headless=True)
        crawler_config = CrawlerRunConfig(
            cache_mode=CacheMode.BYPASS,
            markdown_generator=DefaultMarkdownGenerator(
                content_filter=PruningContentFilter(
                    threshold=0.48,
                    threshold_type="fixed"
                )
            )
        )
        result = await client.crawl(
            urls=["https://example.com"],
            browser_config=browser_config,
            crawler_config=crawler_config
        )
        print("Client SDK basic crawl:", result.success)
        # Test 2: LLM extraction with streaming
        crawler_config = CrawlerRunConfig(
            cache_mode=CacheMode.BYPASS,
            markdown_generator=DefaultMarkdownGenerator(
                content_filter=LLMContentFilter(
                    provider="openai/gpt-40",
                    instruction="Extract key technical concepts"
                )
            ),
            stream=True
        )
        async for result in await client.crawl(
            urls=["https://example.com"],
            browser_config=browser_config,
            crawler_config=crawler_config
        ):
            print(f"Streaming result for: {result.url}")
        # # Test 3: Get schema
        # schemas = await client.get_schema()
        # print("Retrieved client schemas for:", list(schemas.keys()))
 async def main():
    """Run all tests"""
    # Test direct API
    print("Testing direct API calls...")
    await test_direct_api()
    # Test client SDK
    print("\nTesting client SDK...")
    await test_with_client()
 if __name__ == "__main__":
    asyncio.run(main())
 ```
 ## Server Configuration
 The server's behavior can be customized through the `config.yml` file. Let's explore how to configure your Crawl4AI server for optimal performance and security.
 ### Understanding config.yml
 The configuration file is located at `deploy/docker/config.yml`. You can either modify this file before building the image or mount a custom configuration when running the container.
 Here's a detailed breakdown of the configuration options:
 ```yaml
 # Application Configuration
 app:
  title: "Crawl4AI API"           # Server title in OpenAPI docs
  version: "1.0.0"               # API version
  host: "0.0.0.0"               # Listen on all interfaces
  port: 8000                    # Server port
  reload: True                  # Enable hot reloading (development only)
  timeout_keep_alive: 300       # Keep-alive timeout in seconds
 # Rate Limiting Configuration
 rate_limiting:
  enabled: True                 # Enable/disable rate limiting
  default_limit: "100/minute"   # Rate limit format: "number/timeunit"
  trusted_proxies: []          # List of trusted proxy IPs
  storage_uri: "memory://"     # Use "redis://localhost:6379" for production
 # Security Configuration
 security:
  enabled: false               # Master toggle for security features
  https_redirect: True         # Force HTTPS
  trusted_hosts: ["*"]        # Allowed hosts (use specific domains in production)
  headers:                     # Security headers
    x_content_type_options: "nosniff"
    x_frame_options: "DENY"
    content_security_policy: "default-src 'self'"
    strict_transport_security: "max-age=63072000; includeSubDomains"
 # Crawler Configuration
 crawler:
  memory_threshold_percent: 95.0  # Memory usage threshold
  rate_limiter:
    base_delay: [1.0, 2.0]      # Min and max delay between requests
  timeouts:
    stream_init: 30.0           # Stream initialization timeout
    batch_process: 300.0        # Batch processing timeout
 # Logging Configuration
 logging:
  level: "INFO"                 # Log level (DEBUG, INFO, WARNING, ERROR)
  format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
 # Observability Configuration
 observability:
  prometheus:
    enabled: True              # Enable Prometheus metrics
    endpoint: "/metrics"       # Metrics endpoint
  health_check:
    endpoint: "/health"        # Health check endpoint
 ```
 ### Configuration Tips and Best Practices
 1. **Production Settings** 🏭
   ```yaml
   app:
     reload: False              # Disable reload in production
     timeout_keep_alive: 120    # Lower timeout for better resource management
   rate_limiting:
     storage_uri: "redis://redis:6379"  # Use Redis for distributed rate limiting
     default_limit: "50/minute"         # More conservative rate limit
   security:
     enabled: true                      # Enable all security features
     trusted_hosts: ["your-domain.com"] # Restrict to your domain
   ```
---
+2. **Development Settings** 🛠️
   ```yaml
   app:
     reload: True               # Enable hot reloading
     timeout_keep_alive: 300    # Longer timeout for debugging
   logging:
     level: "DEBUG"            # More verbose logging
   ```
-## Security Best Practices
+3. **High-Traffic Settings** 🚦
- Never commit `.llm.env` to version control.
+   ```yaml
- Use Docker secrets in production (Swarm/K8s).
+   crawler:
- Rotate keys regularly.
+     memory_threshold_percent: 85.0  # More conservative memory limit
     rate_limiter:
       base_delay: [2.0, 4.0]       # More aggressive rate limiting
   ```
 ### Customizing Your Configuration
 #### Method 1: Pre-build Configuration
 ```bash
 # Copy and modify config before building
 cp deploy/docker/config.yml custom-config.yml
 vim custom-config.yml
 # Build with custom config
 docker build -t crawl4ai-server:prod \
  --build-arg CONFIG_PATH=custom-config.yml .
 ```
 #### Method 2: Runtime Configuration
 ```bash
 # Mount custom config at runtime
 docker run -d -p 8000:8000 \
  -v $(pwd)/custom-config.yml:/app/config.yml \
  crawl4ai-server:prod
 ```
 ### Configuration Recommendations
 1. **Security First** 🔒
   - Always enable security in production
   - Use specific trusted_hosts instead of wildcards
   - Set up proper rate limiting to protect your server
   - Consider your environment before enabling HTTPS redirect
 2. **Resource Management** 💻
   - Adjust memory_threshold_percent based on available RAM
   - Set timeouts according to your content size and network conditions
   - Use Redis for rate limiting in multi-container setups
 3. **Monitoring** 📊
   - Enable Prometheus if you need metrics
   - Set DEBUG logging in development, INFO in production
   - Regular health check monitoring is crucial
 4. **Performance Tuning** ⚡
   - Start with conservative rate limiter delays
   - Increase batch_process timeout for large content
   - Adjust stream_init timeout based on initial response times
 ### Configuration Migration
 When upgrading Crawl4AI, follow these steps:
 1. Back up your current config:
   ```bash
   cp /app/config.yml /app/config.yml.backup
   ```
 2. Use version control:
   ```bash
   git add config.yml
   git commit -m "Save current server configuration"
   ```
 3. Test in staging first:
   ```bash
   docker run -d -p 8001:8000 \  # Use different port
     -v $(pwd)/new-config.yml:/app/config.yml \
     crawl4ai-server:prod
   ```
 ### Common Configuration Scenarios
 1. **Basic Development Setup**
   ```yaml
   security:
     enabled: false
   logging:
     level: "DEBUG"
   ```
 2. **Production API Server**
   ```yaml
   security:
     enabled: true
     trusted_hosts: ["api.yourdomain.com"]
   rate_limiting:
     enabled: true
     default_limit: "50/minute"
   ```
 3. **High-Performance Crawler**
   ```yaml
   crawler:
     memory_threshold_percent: 90.0
     timeouts:
       batch_process: 600.0
   ```
 ## Getting Help
 We're here to help you succeed with Crawl4AI! Here's how to get support:
 - 📖 Check our [full documentation](https://docs.crawl4ai.com)
 - 🐛 Found a bug? [Open an issue](https://github.com/unclecode/crawl4ai/issues)
 - 💬 Join our [Discord community](https://discord.gg/crawl4ai)
 - ⭐ Star us on GitHub to show support!
 ## Summary
 In this guide, we've covered everything you need to get started with Crawl4AI's Docker deployment:
 - Building and running the Docker container
 - Configuring the environment
 - Making API requests with proper typing
 - Using the Python SDK
 - Monitoring your deployment
 Remember, the examples in the `examples` folder are your friends - they show real-world usage patterns that you can adapt for your needs.
 Keep exploring, and don't hesitate to reach out if you need help! We're building something amazing together. 🚀
 Happy crawling! 🕷️
--- a/deploy/docker/api.py
+++ b/deploy/docker/api.py
@@ -0,0 +1,305 @@
 import os
 import json
 import logging
 from typing import Optional, AsyncGenerator
 from urllib.parse import unquote
 from fastapi import HTTPException, Request, status
 from fastapi.background import BackgroundTasks
 from fastapi.responses import JSONResponse
 from redis import asyncio as aioredis
 from crawl4ai import (
    AsyncWebCrawler,
    CrawlerRunConfig,
    LLMExtractionStrategy,
    CacheMode
 )
 from crawl4ai.content_filter_strategy import (
    PruningContentFilter,
    BM25ContentFilter,
    LLMContentFilter
 )
 from crawl4ai.markdown_generation_strategy import DefaultMarkdownGenerator
 from crawl4ai.content_scraping_strategy import LXMLWebScrapingStrategy
 from utils import (
    TaskStatus,
    FilterType,
    get_base_url,
    is_task_id,
    should_cleanup_task,
    decode_redis_hash
 )
 logger = logging.getLogger(__name__)
 async def process_llm_extraction(
    redis: aioredis.Redis,
    config: dict,
    task_id: str,
    url: str,
    instruction: str,
    schema: Optional[str] = None,
    cache: str = "0"
 ) -> None:
    """Process LLM extraction in background."""
    try:
        llm_strategy = LLMExtractionStrategy(
            provider=config["llm"]["provider"],
            api_token=os.environ.get(config["llm"].get("api_key_env", None), ""),
            instruction=instruction,
            schema=json.loads(schema) if schema else None,
        )
        cache_mode = CacheMode.ENABLED if cache == "1" else CacheMode.BYPASS
        async with AsyncWebCrawler() as crawler:
            result = await crawler.arun(
                url=url,
                config=CrawlerRunConfig(
                    extraction_strategy=llm_strategy,
                    scraping_strategy=LXMLWebScrapingStrategy(),
                    cache_mode=cache_mode
                )
            )
        if not result.success:
            await redis.hset(f"task:{task_id}", mapping={
                "status": TaskStatus.FAILED,
                "error": result.error_message
            })
            return
        content = json.loads(result.extracted_content)
        await redis.hset(f"task:{task_id}", mapping={
            "status": TaskStatus.COMPLETED,
            "result": json.dumps(content)
        })
    except Exception as e:
        logger.error(f"LLM extraction error: {str(e)}", exc_info=True)
        await redis.hset(f"task:{task_id}", mapping={
            "status": TaskStatus.FAILED,
            "error": str(e)
        })
 async def handle_markdown_request(
    url: str,
    filter_type: FilterType,
    query: Optional[str] = None,
    cache: str = "0",
    config: Optional[dict] = None
 ) -> str:
    """Handle markdown generation requests."""
    try:
        decoded_url = unquote(url)
        if not decoded_url.startswith(('http://', 'https://')):
            decoded_url = 'https://' + decoded_url
        if filter_type == FilterType.RAW:
            md_generator = DefaultMarkdownGenerator()
        else:
            content_filter = {
                FilterType.FIT: PruningContentFilter(),
                FilterType.BM25: BM25ContentFilter(user_query=query or ""),
                FilterType.LLM: LLMContentFilter(
                    provider=config["llm"]["provider"],
                    api_token=os.environ.get(config["llm"].get("api_key_env", None), ""),
                    instruction=query or "Extract main content"
                )
            }[filter_type]
            md_generator = DefaultMarkdownGenerator(content_filter=content_filter)
        cache_mode = CacheMode.ENABLED if cache == "1" else CacheMode.BYPASS
        async with AsyncWebCrawler() as crawler:
            result = await crawler.arun(
                url=decoded_url,
                config=CrawlerRunConfig(
                    markdown_generator=md_generator,
                    scraping_strategy=LXMLWebScrapingStrategy(),
                    cache_mode=cache_mode
                )
            )
            if not result.success:
                raise HTTPException(
                    status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
                    detail=result.error_message
                )
            return (result.markdown_v2.raw_markdown 
                   if filter_type == FilterType.RAW 
                   else result.markdown_v2.fit_markdown)
    except Exception as e:
        logger.error(f"Markdown error: {str(e)}", exc_info=True)
        raise HTTPException(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail=str(e)
        )
 async def handle_llm_request(
    redis: aioredis.Redis,
    background_tasks: BackgroundTasks,
    request: Request,
    input_path: str,
    query: Optional[str] = None,
    schema: Optional[str] = None,
    cache: str = "0",
    config: Optional[dict] = None
 ) -> JSONResponse:
    """Handle LLM extraction requests."""
    base_url = get_base_url(request)
    try:
        if is_task_id(input_path):
            return await handle_task_status(
                redis, input_path, base_url
            )
        if not query:
            return JSONResponse({
                "message": "Please provide an instruction",
                "_links": {
                    "example": {
                        "href": f"{base_url}/llm/{input_path}?q=Extract+main+content",
                        "title": "Try this example"
                    }
                }
            })
        return await create_new_task(
            redis,
            background_tasks,
            input_path,
            query,
            schema,
            cache,
            base_url,
            config
        )
    except Exception as e:
        logger.error(f"LLM endpoint error: {str(e)}", exc_info=True)
        return JSONResponse({
            "error": str(e),
            "_links": {
                "retry": {"href": str(request.url)}
            }
        }, status_code=status.HTTP_500_INTERNAL_SERVER_ERROR)
 async def handle_task_status(
    redis: aioredis.Redis,
    task_id: str,
    base_url: str
 ) -> JSONResponse:
    """Handle task status check requests."""
    task = await redis.hgetall(f"task:{task_id}")
    if not task:
        raise HTTPException(
            status_code=status.HTTP_404_NOT_FOUND,
            detail="Task not found"
        )
    task = decode_redis_hash(task)
    response = create_task_response(task, task_id, base_url)
    if task["status"] in [TaskStatus.COMPLETED, TaskStatus.FAILED]:
        if should_cleanup_task(task["created_at"]):
            await redis.delete(f"task:{task_id}")
    return JSONResponse(response)
 async def create_new_task(
    redis: aioredis.Redis,
    background_tasks: BackgroundTasks,
    input_path: str,
    query: str,
    schema: Optional[str],
    cache: str,
    base_url: str,
    config: dict
 ) -> JSONResponse:
    """Create and initialize a new task."""
    decoded_url = unquote(input_path)
    if not decoded_url.startswith(('http://', 'https://')):
        decoded_url = 'https://' + decoded_url
    from datetime import datetime
    task_id = f"llm_{int(datetime.now().timestamp())}_{id(background_tasks)}"
    await redis.hset(f"task:{task_id}", mapping={
        "status": TaskStatus.PROCESSING,
        "created_at": datetime.now().isoformat(),
        "url": decoded_url
    })
    background_tasks.add_task(
        process_llm_extraction,
        redis,
        config,
        task_id,
        decoded_url,
        query,
        schema,
        cache
    )
    return JSONResponse({
        "task_id": task_id,
        "status": TaskStatus.PROCESSING,
        "url": decoded_url,
        "_links": {
            "self": {"href": f"{base_url}/llm/{task_id}"},
            "status": {"href": f"{base_url}/llm/{task_id}"}
        }
    })
 def create_task_response(task: dict, task_id: str, base_url: str) -> dict:
    """Create response for task status check."""
    response = {
        "task_id": task_id,
        "status": task["status"],
        "created_at": task["created_at"],
        "url": task["url"],
        "_links": {
            "self": {"href": f"{base_url}/llm/{task_id}"},
            "refresh": {"href": f"{base_url}/llm/{task_id}"}
        }
    }
    if task["status"] == TaskStatus.COMPLETED:
        response["result"] = json.loads(task["result"])
    elif task["status"] == TaskStatus.FAILED:
        response["error"] = task["error"]
    return response
 async def stream_results(crawler: AsyncWebCrawler, results_gen: AsyncGenerator) -> AsyncGenerator[bytes, None]:
    """Stream results with heartbeats and completion markers."""
    import asyncio
    import json
    from utils import datetime_handler
    try:
        async for result in results_gen:
            try:
                result_dict = result.model_dump()
                logger.info(f"Streaming result for {result_dict.get('url', 'unknown')}")
                data = json.dumps(result_dict, default=datetime_handler) + "\n"
                yield data.encode('utf-8')
            except Exception as e:
                logger.error(f"Serialization error: {e}")
                error_response = {"error": str(e), "url": getattr(result, 'url', 'unknown')}
                yield (json.dumps(error_response) + "\n").encode('utf-8')
        yield json.dumps({"status": "completed"}).encode('utf-8')
    except asyncio.CancelledError:
        logger.warning("Client disconnected during streaming")
    finally:
        try:
            await crawler.close()
        except Exception as e:
            logger.error(f"Crawler cleanup error: {e}")
--- a/deploy/docker/config.yml
+++ b/deploy/docker/config.yml
@@ -0,0 +1,69 @@
 # Application Configuration
 app:
  title: "Crawl4AI API"
  version: "1.0.0"
  host: "0.0.0.0"
  port: 8000
  reload: True
  timeout_keep_alive: 300
 # Default LLM Configuration
 llm:
  provider: "openai/gpt-4o-mini"
  api_key_env: "OPENAI_API_KEY"
 # Redis Configuration
 redis:
  host: "localhost"
  port: 6379
  db: 0
  password: ""
  ssl: False
  ssl_cert_reqs: None
  ssl_ca_certs: None
  ssl_certfile: None
  ssl_keyfile: None
  ssl_cert_reqs: None
  ssl_ca_certs: None
  ssl_certfile: None
  ssl_keyfile: None
 # Rate Limiting Configuration
 rate_limiting:
  enabled: True
  default_limit: "1000/minute"
  trusted_proxies: []
  storage_uri: "memory://"  # Use "redis://localhost:6379" for production
 # Security Configuration
 security:
  enabled: false 
  https_redirect: True
  trusted_hosts: ["*"]
  headers:
    x_content_type_options: "nosniff"
    x_frame_options: "DENY"
    content_security_policy: "default-src 'self'"
    strict_transport_security: "max-age=63072000; includeSubDomains"
 # Crawler Configuration
 crawler:
  memory_threshold_percent: 95.0
  rate_limiter:
    base_delay: [1.0, 2.0]
  timeouts:
    stream_init: 30.0  # Timeout for stream initialization
    batch_process: 300.0  # Timeout for batch processing
 # Logging Configuration
 logging:
  level: "INFO"
  format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
 # Observability Configuration
 observability:
  prometheus:
    enabled: True
    endpoint: "/metrics"
  health_check:
    endpoint: "/health"
--- a/deploy/docker/requirements.txt
+++ b/deploy/docker/requirements.txt
@@ -1,4 +1,7 @@
 crawl4ai
 fastapi
 uvicorn
-gunicorn>=23.0.0
+gunicorn>=23.0.0
 slowapi>=0.1.9
 prometheus-fastapi-instrumentator>=7.0.2
 redis>=5.2.1
--- a/deploy/docker/server.py
+++ b/deploy/docker/server.py
@@ -1,120 +1,237 @@
 import os
 import sys
 import time
 from typing import  List, Optional
 sys.path.append(os.path.dirname(os.path.realpath(__file__)))
-from fastapi import FastAPI, HTTPException
+
-from fastapi.responses import StreamingResponse
+from redis import asyncio as aioredis
-import json
+from fastapi import FastAPI, HTTPException, Request, status
-import asyncio
+from fastapi.responses import StreamingResponse, RedirectResponse
-from typing import AsyncGenerator
+from fastapi.middleware.httpsredirect import HTTPSRedirectMiddleware
-from crawl4ai import (
+from fastapi.middleware.trustedhost import TrustedHostMiddleware
-    BrowserConfig,
+from pydantic import BaseModel, Field
-    CrawlerRunConfig,
+from slowapi import Limiter
-    AsyncWebCrawler,
+from slowapi.util import get_remote_address
-    MemoryAdaptiveDispatcher,
+from prometheus_fastapi_instrumentator import Instrumentator
-    RateLimiter,
+from fastapi.responses import PlainTextResponse
 from fastapi.responses import JSONResponse
 from fastapi.background import BackgroundTasks
 from typing import Dict
 import os
 from utils import (
    FilterType,
    load_config,
    setup_logging
 )
 from api import (
    handle_markdown_request,
    handle_llm_request
 )
-from typing import List, Optional
+# Load configuration and setup
-from pydantic import BaseModel
+config = load_config()
 setup_logging(config)
 # Initialize Redis
 redis = aioredis.from_url(config["redis"].get("uri", "redis://localhost"))
 # Initialize rate limiter
 limiter = Limiter(
    key_func=get_remote_address,
    default_limits=[config["rate_limiting"]["default_limit"]],
    storage_uri=config["rate_limiting"]["storage_uri"]
 )
 app = FastAPI(
    title=config["app"]["title"],
    version=config["app"]["version"]
 )
 # Configure middleware
 if config["security"]["enabled"]:
    if config["security"]["https_redirect"]:
        app.add_middleware(HTTPSRedirectMiddleware)
    if config["security"]["trusted_hosts"] and config["security"]["trusted_hosts"] != ["*"]:
        app.add_middleware(
            TrustedHostMiddleware,
            allowed_hosts=config["security"]["trusted_hosts"]
        )
 # Prometheus instrumentation
 if config["observability"]["prometheus"]["enabled"]:
    Instrumentator().instrument(app).expose(app)
 class CrawlRequest(BaseModel):
-    urls: List[str]
+    urls: List[str] = Field(
-    browser_config: Optional[dict] = None
+        min_length=1, 
-    crawler_config: Optional[dict] = None
+        max_length=100,
-
+        json_schema_extra={
-class CrawlResponse(BaseModel):
+            "items": {"type": "string", "maxLength": 2000, "pattern": "\\S"}
-    success: bool
+        }
-    results: List[dict]  
+    )
-
+    browser_config: Optional[Dict] = Field(
-    class Config:
+        default_factory=dict,
-        arbitrary_types_allowed = True
+        example={"headless": True, "viewport": {"width": 1200}}
-
+    )
-app = FastAPI(title="Crawl4AI API")
+    crawler_config: Optional[Dict] = Field(
-
+        default_factory=dict,
-async def stream_results(crawler: AsyncWebCrawler, results_gen: AsyncGenerator) -> AsyncGenerator[bytes, None]:
+        example={"stream": True, "cache_mode": "aggressive"}
    """Stream results and manage crawler lifecycle"""
    def datetime_handler(obj):
        """Custom handler for datetime objects during JSON serialization"""
        if hasattr(obj, 'isoformat'):
            return obj.isoformat()
        raise TypeError(f"Object of type {type(obj)} is not JSON serializable")
    try:
        async for result in results_gen:
            try:
                # Use dump method for serialization
                result_dict = result.model_dump()
                print(f"Streaming result for URL: {result_dict['url']}, Success: {result_dict['success']}")
                # Use custom JSON encoder with datetime handler
                yield (json.dumps(result_dict, default=datetime_handler) + "\n").encode('utf-8')
            except Exception as e:
                print(f"Error serializing result: {e}")
                error_response = {
                    "error": str(e),
                    "url": getattr(result, 'url', 'unknown')
                }
                yield (json.dumps(error_response, default=datetime_handler) + "\n").encode('utf-8')
    except asyncio.CancelledError:
        print("Client disconnected, cleaning up...")
    finally:
        try:
            await crawler.close()
        except Exception as e:
            print(f"Error closing crawler: {e}")
@app.post("/crawl")
 async def crawl(request: CrawlRequest):
    # Load configs using our new utilities
    browser_config = BrowserConfig.load(request.browser_config)
    crawler_config = CrawlerRunConfig.load(request.crawler_config)
    dispatcher = MemoryAdaptiveDispatcher(
        memory_threshold_percent=95.0,
        rate_limiter=RateLimiter(base_delay=(1.0, 2.0)),
    )
-    try:
+@app.middleware("http")
-        if crawler_config.stream:
+async def add_security_headers(request: Request, call_next):
-            crawler = AsyncWebCrawler(config=browser_config)
+    response = await call_next(request)
-            await crawler.start()
+    if config["security"]["enabled"]:
        response.headers.update(config["security"]["headers"])
    return response
-            results_gen = await crawler.arun_many(
+@app.get("/md/{url:path}")
-                urls=request.urls,
+@limiter.limit(config["rate_limiting"]["default_limit"])
-                config=crawler_config,
+async def get_markdown(
-                dispatcher=dispatcher
+    request: Request,
-            )
+    url: str,
    f: FilterType = FilterType.FIT,
    q: Optional[str] = None,
    c: Optional[str] = "0"
 ):
    """Get markdown from URL with optional filtering."""
    result = await handle_markdown_request(url, f, q, c, config)
    return PlainTextResponse(result)
-            return StreamingResponse(
+@app.get("/llm/{input:path}")
-                stream_results(crawler, results_gen),
+@limiter.limit(config["rate_limiting"]["default_limit"])
-                media_type='application/x-ndjson'
+async def llm_endpoint(
-            )
+    request: Request,
-        else:
+    background_tasks: BackgroundTasks,
-            async with AsyncWebCrawler(config=browser_config) as crawler:
+    input: str,
-                results = await crawler.arun_many(
+    q: Optional[str] = None,
-                    urls=request.urls,
+    s: Optional[str] = None,
-                    config=crawler_config,
+    c: Optional[str] = "0"
-                    dispatcher=dispatcher
+):
-                )
+    """Handle LLM extraction requests."""
-                # Use dump method for each result
+    return await handle_llm_request(
-                results_dict = [result.model_dump() for result in results]
+        redis, background_tasks, request, input, q, s, c, config
-                return CrawlResponse(success=True, results=results_dict)
+    )
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))
@app.get("/schema")
 async def get_schema():
-    """Return config schemas for client validation"""
+    """Endpoint for client-side validation schema."""
    from crawl4ai import BrowserConfig, CrawlerRunConfig
    return {
        "browser": BrowserConfig.model_json_schema(),
        "crawler": CrawlerRunConfig.model_json_schema()
    }
-@app.get("/health")
+@app.get(config["observability"]["health_check"]["endpoint"])
 async def health():
-    return {"status": "ok"}
+    """Health check endpoint."""
    return {"status": "ok", "timestamp": time.time()}
@app.get(config["observability"]["prometheus"]["endpoint"])
 async def metrics():
    """Prometheus metrics endpoint."""
    return RedirectResponse(url=config["observability"]["prometheus"]["endpoint"])
@app.post("/crawl")
@limiter.limit(config["rate_limiting"]["default_limit"])
 async def crawl(request: Request, crawl_request: CrawlRequest):
    """Handle crawl requests."""
    from crawl4ai import (
        AsyncWebCrawler,
        BrowserConfig,
        CrawlerRunConfig,
        MemoryAdaptiveDispatcher,
        RateLimiter
    )
    import asyncio
    import logging
    logger = logging.getLogger(__name__)
    crawler = None
    try:
        if not crawl_request.urls:
            logger.error("Empty URL list received")
            raise HTTPException(
                status_code=status.HTTP_400_BAD_REQUEST,
                detail="At least one URL required"
            )
        browser_config = BrowserConfig.load(crawl_request.browser_config)
        crawler_config = CrawlerRunConfig.load(crawl_request.crawler_config)
        dispatcher = MemoryAdaptiveDispatcher(
            memory_threshold_percent=config["crawler"]["memory_threshold_percent"],
            rate_limiter=RateLimiter(
                base_delay=tuple(config["crawler"]["rate_limiter"]["base_delay"])
            )
        )
        if crawler_config.stream:
            crawler = AsyncWebCrawler(config=browser_config)
            await crawler.start()
            results_gen = await asyncio.wait_for(
                crawler.arun_many(
                    urls=crawl_request.urls,
                    config=crawler_config,
                    dispatcher=dispatcher
                ),
                timeout=config["crawler"]["timeouts"]["stream_init"]
            )
            from api import stream_results
            return StreamingResponse(
                stream_results(crawler, results_gen),
                media_type='application/x-ndjson',
                headers={
                    'Cache-Control': 'no-cache',
                    'Connection': 'keep-alive',
                    'X-Stream-Status': 'active'
                }
            )
        else:
            async with AsyncWebCrawler(config=browser_config) as crawler:
                results = await asyncio.wait_for(
                    crawler.arun_many(
                        urls=crawl_request.urls,
                        config=crawler_config,
                        dispatcher=dispatcher
                    ),
                    timeout=config["crawler"]["timeouts"]["batch_process"]
                )
                return JSONResponse({
                    "success": True,
                    "results": [result.model_dump() for result in results]
                })
    except asyncio.TimeoutError as e:
        logger.error(f"Operation timed out: {str(e)}")
        raise HTTPException(
            status_code=status.HTTP_504_GATEWAY_TIMEOUT,
            detail="Processing timeout"
        )
    except Exception as e:
        logger.error(f"Server error: {str(e)}", exc_info=True)
        raise HTTPException(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail="Internal server error"
        )
    finally:
        if crawler:
            try:
                await crawler.close()
            except Exception as e:
                logger.error(f"Final crawler cleanup error: {e}")
 if __name__ == "__main__":
    import uvicorn
-    uvicorn.run("server:app", host="0.0.0.0", port=8000, reload=True)
+    uvicorn.run(
        "server:app",
        host=config["app"]["host"],
        port=config["app"]["port"],
        reload=config["app"]["reload"],
        timeout_keep_alive=config["app"]["timeout_keep_alive"]
    )
--- a/deploy/docker/utils.py
+++ b/deploy/docker/utils.py
@@ -0,0 +1,54 @@
 import logging
 import yaml
 from datetime import datetime
 from enum import Enum
 from pathlib import Path
 from fastapi import Request
 from typing import Dict, Optional
 class TaskStatus(str, Enum):
    PROCESSING = "processing"
    FAILED = "failed"
    COMPLETED = "completed"
 class FilterType(str, Enum):
    RAW = "raw"
    FIT = "fit"
    BM25 = "bm25"
    LLM = "llm"
 def load_config() -> Dict:
    """Load and return application configuration."""
    config_path = Path(__file__).parent / "config.yml"
    with open(config_path, "r") as config_file:
        return yaml.safe_load(config_file)
 def setup_logging(config: Dict) -> None:
    """Configure application logging."""
    logging.basicConfig(
        level=config["logging"]["level"],
        format=config["logging"]["format"]
    )
 def get_base_url(request: Request) -> str:
    """Get base URL including scheme and host."""
    return f"{request.url.scheme}://{request.url.netloc}"
 def is_task_id(value: str) -> bool:
    """Check if the value matches task ID pattern."""
    return value.startswith("llm_") and "_" in value
 def datetime_handler(obj: any) -> Optional[str]:
    """Handle datetime serialization for JSON."""
    if hasattr(obj, 'isoformat'):
        return obj.isoformat()
    raise TypeError(f"Object of type {type(obj)} is not JSON serializable")
 def should_cleanup_task(created_at: str) -> bool:
    """Check if task should be cleaned up based on creation time."""
    created = datetime.fromisoformat(created_at)
    return (datetime.now() - created).total_seconds() > 3600
 def decode_redis_hash(hash_data: Dict[bytes, bytes]) -> Dict[str, str]:
    """Decode Redis hash data from bytes to strings."""
    return {k.decode('utf-8'): v.decode('utf-8') for k, v in hash_data.items()}
--- a/docs/examples/llm_markdown_generator.py
+++ b/docs/examples/llm_markdown_generator.py
@@ -46,6 +46,7 @@ async def test_llm_filter():
            provider="openai/gpt-4o",
            api_token=os.getenv('OPENAI_API_KEY'),
            chunk_token_threshold=2 ** 12 * 2, # 2048 * 2
            ignore_cache = True,
            instruction="""
            Extract the main educational content while preserving its original wording and substance completely. Your task is to:
@@ -68,7 +69,7 @@ async def test_llm_filter():
        )        
        # Apply filtering
-        filtered_content = filter.filter_content(html, ignore_cache = True)
+        filtered_content = filter.filter_content(html)
        # Show results
        print("\nFiltered Content Length:", len(filtered_content))
--- a/server.py
+++ b/server.py