refactor(docker): improve server architecture and configuration

Complete overhaul of Docker deployment setup with improved architecture: - Add Redis integration for task management - Implement rate limiting and security middleware - Add Prometheus metrics and health checks - Improve error handling and logging - Add support for streaming responses - Implement proper configuration management - Add platform-specific optimizations for ARM64/AMD64 BREAKING CHANGE: Docker deployment now requires Redis and new config.yml structure
2025-02-02 20:19:51 +08:00
parent 7b1ef07c41
commit 33a21d6a7a
16 changed files with 1918 additions and 344 deletions
--- a/142
+++ b/142
@@ -1,32 +1,31 @@
-# syntax=docker/dockerfile:1.4
+FROM python:3.10-slim

-ARG TARGETPLATFORM
-ARG BUILDPLATFORM
+# Set build arguments
+ARG APP_HOME=/app
+ARG GITHUB_REPO=https://github.com/yourusername/crawl4ai.git
+ARG GITHUB_BRANCH=main
+ARG USE_LOCAL=true
+
+ENV PYTHONFAULTHANDLER=1 \
+    PYTHONHASHSEED=random \
+    PYTHONUNBUFFERED=1 \
+    PIP_NO_CACHE_DIR=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PIP_DISABLE_PIP_VERSION_CHECK=1 \
+    PIP_DEFAULT_TIMEOUT=100 \
+    DEBIAN_FRONTEND=noninteractive \
+    REDIS_HOST=localhost \
+    REDIS_PORT=6379

-# Other build arguments
 ARG PYTHON_VERSION=3.10
-
-# Base stage with system dependencies
-FROM python:${PYTHON_VERSION}-slim as base
-
-# Declare ARG variables again within the build stage
-ARG INSTALL_TYPE=all
+ARG INSTALL_TYPE=default
 ARG ENABLE_GPU=false
+ARG TARGETARCH

-# Platform-specific labels
 LABEL maintainer="unclecode"
 LABEL description="🔥🕷️ Crawl4AI: Open-source LLM Friendly Web Crawler & scraper"
 LABEL version="1.0"    

-# Environment setup
-ENV PYTHONUNBUFFERED=1 \
-    PYTHONDONTWRITEBYTECODE=1 \
-    PIP_NO_CACHE_DIR=1 \
-    PIP_DISABLE_PIP_VERSION_CHECK=1 \
-    PIP_DEFAULT_TIMEOUT=100 \
-    DEBIAN_FRONTEND=noninteractive
-
-# Install system dependencies
 RUN apt-get update && apt-get install -y --no-install-recommends \
    build-essential \
    curl \
@@ -37,10 +36,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
    pkg-config \
    python3-dev \
    libjpeg-dev \
-    libpng-dev \
+    redis-server \
    && rm -rf /var/lib/apt/lists/*

-# Playwright system dependencies for Linux
 RUN apt-get update && apt-get install -y --no-install-recommends \
    libglib2.0-0 \
    libnss3 \
@@ -65,8 +63,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
    libatspi2.0-0 \
    && rm -rf /var/lib/apt/lists/*

-# GPU support if enabled and architecture is supported
-RUN if [ "$ENABLE_GPU" = "true" ] && [ "$TARGETPLATFORM" = "linux/amd64" ] ; then \
+RUN if [ "$ENABLE_GPU" = "true" ] && [ "$TARGETARCH" = "amd64" ] ; then \
    apt-get update && apt-get install -y --no-install-recommends \
    nvidia-cuda-toolkit \
    && rm -rf /var/lib/apt/lists/* ; \
@@ -74,19 +71,40 @@ else \
    echo "Skipping NVIDIA CUDA Toolkit installation (unsupported platform or GPU disabled)"; \
 fi

-# Create and set working directory
-WORKDIR /app
+RUN if [ "$TARGETARCH" = "arm64" ]; then \
+    echo "🦾 Installing ARM-specific optimizations"; \
+    apt-get update && apt-get install -y --no-install-recommends \
+    libopenblas-dev \
+    && rm -rf /var/lib/apt/lists/*; \
+elif [ "$TARGETARCH" = "amd64" ]; then \
+    echo "🖥️ Installing AMD64-specific optimizations"; \
+    apt-get update && apt-get install -y --no-install-recommends \
+    libomp-dev \
+    && rm -rf /var/lib/apt/lists/*; \
+else \
+    echo "Skipping platform-specific optimizations (unsupported platform)"; \
+fi

-# Copy the entire project
-COPY . .
+WORKDIR ${APP_HOME}

-# Install base requirements
+RUN echo '#!/bin/bash\n\
+if [ "$USE_LOCAL" = "true" ]; then\n\
+    echo "📦 Installing from local source..."\n\
+    pip install --no-cache-dir /tmp/project/\n\
+else\n\
+    echo "🌐 Installing from GitHub..."\n\
+    for i in {1..3}; do \n\
+        git clone --branch ${GITHUB_BRANCH} ${GITHUB_REPO} /tmp/crawl4ai && break || \n\
+        { echo "Attempt $i/3 failed! Taking a short break... ☕"; sleep 5; }; \n\
+    done\n\
+    pip install --no-cache-dir /tmp/crawl4ai\n\
+fi' > /tmp/install.sh && chmod +x /tmp/install.sh
+
+COPY . /tmp/project/
+
+COPY deploy/docker/requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt

-# Install required library for FastAPI
-RUN pip install fastapi uvicorn psutil
-
-# Install ML dependencies first for better layer caching
 RUN if [ "$INSTALL_TYPE" = "all" ] ; then \
        pip install --no-cache-dir \
            torch \
@@ -99,38 +117,50 @@ RUN if [ "$INSTALL_TYPE" = "all" ] ; then \
        python -m nltk.downloader punkt stopwords ; \
    fi

-# Install the package
 RUN if [ "$INSTALL_TYPE" = "all" ] ; then \
-        pip install ".[all]" && \
+        pip install "/tmp/project/[all]" && \
        python -m crawl4ai.model_loader ; \
    elif [ "$INSTALL_TYPE" = "torch" ] ; then \
-        pip install ".[torch]" ; \
+        pip install "/tmp/project/[torch]" ; \
    elif [ "$INSTALL_TYPE" = "transformer" ] ; then \
-        pip install ".[transformer]" && \
+        pip install "/tmp/project/[transformer]" && \
        python -m crawl4ai.model_loader ; \
    else \
-        pip install "." ; \
+        pip install "/tmp/project" ; \
    fi
    
-    # Install MkDocs and required plugins
-RUN pip install --no-cache-dir \
-    mkdocs \
-    mkdocs-material \
-    mkdocs-terminal \
-    pymdown-extensions
+RUN pip install --no-cache-dir --upgrade pip && \
+    /tmp/install.sh && \
+    python -c "import crawl4ai; print('✅ crawl4ai is ready to rock!')" && \
+    python -c "from playwright.sync_api import sync_playwright; print('✅ Playwright is feeling dramatic!')"
    
-# Build MkDocs documentation
-RUN mkdocs build
+RUN playwright install --with-deps chromium

-# Install Playwright and browsers
-RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
-    playwright install chromium; \
-    elif [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
-    playwright install chromium; \
-    fi
+COPY deploy/docker/* ${APP_HOME}/

-# Expose port
-EXPOSE 8000 11235 9222 8080
+HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
+    CMD bash -c '\
+    MEM=$(free -m | awk "/^Mem:/{print \$2}"); \
+    if [ $MEM -lt 2048 ]; then \
+        echo "⚠️ Warning: Less than 2GB RAM available! Your container might need a memory boost! 🚀"; \
+        exit 1; \
+    fi && \
+    redis-cli ping > /dev/null && \
+    curl -f http://localhost:8000/health || exit 1'

-# Start the FastAPI server
-CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "11235"]
+COPY deploy/docker/docker-entrypoint.sh /usr/local/bin/
+RUN chmod +x /usr/local/bin/docker-entrypoint.sh
+
+EXPOSE 6379
+
+ENTRYPOINT ["docker-entrypoint.sh"]
+
+CMD service redis-server start && gunicorn \
+    --bind 0.0.0.0:8000 \
+    --workers 4 \
+    --threads 2 \
+    --timeout 120 \
+    --graceful-timeout 30 \
+    --log-level info \
+    --worker-class uvicorn.workers.UvicornWorker \
+    server:app
--- a/136
+++ b/136
@@ -0,0 +1,136 @@
+# syntax=docker/dockerfile:1.4
+
+ARG TARGETPLATFORM
+ARG BUILDPLATFORM
+
+# Other build arguments
+ARG PYTHON_VERSION=3.10
+
+# Base stage with system dependencies
+FROM python:${PYTHON_VERSION}-slim as base
+
+# Declare ARG variables again within the build stage
+ARG INSTALL_TYPE=all
+ARG ENABLE_GPU=false
+
+# Platform-specific labels
+LABEL maintainer="unclecode"
+LABEL description="🔥🕷️ Crawl4AI: Open-source LLM Friendly Web Crawler & scraper"
+LABEL version="1.0"
+
+# Environment setup
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PIP_NO_CACHE_DIR=1 \
+    PIP_DISABLE_PIP_VERSION_CHECK=1 \
+    PIP_DEFAULT_TIMEOUT=100 \
+    DEBIAN_FRONTEND=noninteractive
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    curl \
+    wget \
+    gnupg \
+    git \
+    cmake \
+    pkg-config \
+    python3-dev \
+    libjpeg-dev \
+    libpng-dev \
+    && rm -rf /var/lib/apt/lists/*
+
+# Playwright system dependencies for Linux
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    libglib2.0-0 \
+    libnss3 \
+    libnspr4 \
+    libatk1.0-0 \
+    libatk-bridge2.0-0 \
+    libcups2 \
+    libdrm2 \
+    libdbus-1-3 \
+    libxcb1 \
+    libxkbcommon0 \
+    libx11-6 \
+    libxcomposite1 \
+    libxdamage1 \
+    libxext6 \
+    libxfixes3 \
+    libxrandr2 \
+    libgbm1 \
+    libpango-1.0-0 \
+    libcairo2 \
+    libasound2 \
+    libatspi2.0-0 \
+    && rm -rf /var/lib/apt/lists/*
+
+# GPU support if enabled and architecture is supported
+RUN if [ "$ENABLE_GPU" = "true" ] && [ "$TARGETPLATFORM" = "linux/amd64" ] ; then \
+    apt-get update && apt-get install -y --no-install-recommends \
+    nvidia-cuda-toolkit \
+    && rm -rf /var/lib/apt/lists/* ; \
+else \
+    echo "Skipping NVIDIA CUDA Toolkit installation (unsupported platform or GPU disabled)"; \
+fi
+
+# Create and set working directory
+WORKDIR /app
+
+# Copy the entire project
+COPY . .
+
+# Install base requirements
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Install required library for FastAPI
+RUN pip install fastapi uvicorn psutil
+
+# Install ML dependencies first for better layer caching
+RUN if [ "$INSTALL_TYPE" = "all" ] ; then \
+        pip install --no-cache-dir \
+            torch \
+            torchvision \
+            torchaudio \
+            scikit-learn \
+            nltk \
+            transformers \
+            tokenizers && \
+        python -m nltk.downloader punkt stopwords ; \
+    fi
+
+# Install the package
+RUN if [ "$INSTALL_TYPE" = "all" ] ; then \
+        pip install ".[all]" && \
+        python -m crawl4ai.model_loader ; \
+    elif [ "$INSTALL_TYPE" = "torch" ] ; then \
+        pip install ".[torch]" ; \
+    elif [ "$INSTALL_TYPE" = "transformer" ] ; then \
+        pip install ".[transformer]" && \
+        python -m crawl4ai.model_loader ; \
+    else \
+        pip install "." ; \
+    fi
+
+    # Install MkDocs and required plugins
+RUN pip install --no-cache-dir \
+    mkdocs \
+    mkdocs-material \
+    mkdocs-terminal \
+    pymdown-extensions
+
+# Build MkDocs documentation
+RUN mkdocs build
+
+# Install Playwright and browsers
+RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
+    playwright install chromium; \
+    elif [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
+    playwright install chromium; \
+    fi
+
+# Expose port
+EXPOSE 8000 11235 9222 8080
+
+# Start the FastAPI server
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "11235"]
--- a/crawl4ai/content_filter_strategy.py
+++ b/crawl4ai/content_filter_strategy.py
@@ -5,7 +5,7 @@ from typing import List, Tuple, Dict, Optional
 from rank_bm25 import BM25Okapi
 from collections import deque
 from bs4 import NavigableString, Comment
-from .utils import clean_tokens, perform_completion_with_backoff, escape_json_string, sanitize_html, get_home_folder, extract_xml_data
+from .utils import clean_tokens, perform_completion_with_backoff, escape_json_string, sanitize_html, get_home_folder, extract_xml_data, merge_chunks
 from abc import ABC, abstractmethod
 import math
 from snowballstemmer import stemmer
@@ -23,7 +23,14 @@ from colorama import Fore, Style
 class RelevantContentFilter(ABC):
    """Abstract base class for content filtering strategies"""

-    def __init__(self, user_query: str = None):
+    def __init__(self, user_query: str = None, verbose: bool = False, logger: Optional[AsyncLogger] = None):
+        """
+        Initializes the RelevantContentFilter class with optional user query.
+
+        Args:
+            user_query (str): User query for filtering (optional).
+            verbose (bool): Enable verbose logging (default: False).
+        """
        self.user_query = user_query
        self.included_tags = {
            # Primary structure
@@ -92,6 +99,8 @@ class RelevantContentFilter(ABC):
            r"nav|footer|header|sidebar|ads|comment|promo|advert|social|share", re.I
        )
        self.min_word_count = 2
+        self.verbose = False
+        self.logger = logger

    @abstractmethod
    def filter_content(self, html: str) -> List[str]:
@@ -755,8 +764,11 @@ class LLMContentFilter(RelevantContentFilter):
        base_url: Optional[str] = None,
        api_base: Optional[str] = None,
        extra_args: Dict = None,
+        # char_token_rate: float = WORD_TOKEN_RATE * 5,
+        # chunk_mode: str = "char",
        verbose: bool = False,
        logger: Optional[AsyncLogger] = None,
+        ignore_cache: bool = False,
    ):
        super().__init__(None)
        self.provider = provider
@@ -768,10 +780,15 @@ class LLMContentFilter(RelevantContentFilter):
        self.instruction = instruction
        self.chunk_token_threshold = chunk_token_threshold
        self.overlap_rate = overlap_rate
-        self.word_token_rate = word_token_rate
+        self.word_token_rate = word_token_rate or WORD_TOKEN_RATE
+        # self.chunk_mode: str = chunk_mode
+        # self.char_token_rate = char_token_rate or word_token_rate / 5
+        # self.token_rate = word_token_rate if chunk_mode == "word" else self.char_token_rate
+        self.token_rate = word_token_rate or WORD_TOKEN_RATE
        self.base_url = base_url
        self.api_base = api_base or base_url
        self.extra_args = extra_args or {}
+        self.ignore_cache = ignore_cache
        self.verbose = verbose
        
        # Setup logger with custom styling for LLM operations
@@ -779,7 +796,7 @@ class LLMContentFilter(RelevantContentFilter):
            self.logger = logger
        elif verbose:
            self.logger = AsyncLogger(
-                verbose=True,
+                verbose=verbose,
                icons={
                    **AsyncLogger.DEFAULT_ICONS,
                    "LLM": "★",  # Star for LLM operations
@@ -803,45 +820,25 @@ class LLMContentFilter(RelevantContentFilter):
        return hashlib.md5(content.encode()).hexdigest()

    def _merge_chunks(self, text: str) -> List[str]:
-        """Split text into chunks with overlap"""
-        # Calculate tokens and sections
-        total_tokens = len(text.split()) * self.word_token_rate
-        num_sections = max(1, math.floor(total_tokens / self.chunk_token_threshold))
-        adjusted_chunk_threshold = total_tokens / num_sections
+        """Split text into chunks with overlap using char or word mode."""
+        ov = int(self.chunk_token_threshold * self.overlap_rate)
+        sections = merge_chunks(
+            docs = [text],
+            target_size= self.chunk_token_threshold,
+            overlap=ov,
+            word_token_ratio=self.word_token_rate
+        )
+        return sections
    
-        # Split into words
-        words = text.split()
-        chunks = []
-        current_chunk = []
-        current_token_count = 0
        
-        for word in words:
-            word_tokens = len(word) * self.word_token_rate
-            if current_token_count + word_tokens <= adjusted_chunk_threshold:
-                current_chunk.append(word)
-                current_token_count += word_tokens
-            else:
-                # Add overlap if not the last chunk
-                if chunks and self.overlap_rate > 0:
-                    overlap_size = int(len(current_chunk) * self.overlap_rate)
-                    current_chunk.extend(current_chunk[-overlap_size:])

-                chunks.append(" ".join(current_chunk))
-                current_chunk = [word]
-                current_token_count = word_tokens
-
-        if current_chunk:
-            chunks.append(" ".join(current_chunk))
-
-        return chunks
-
-    def filter_content(self, html: str, ignore_cache: bool = False) -> List[str]:
+    def filter_content(self, html: str, ignore_cache: bool = True) -> List[str]:
        if not html or not isinstance(html, str):
            return []

        if self.logger:
            self.logger.info(
-                "Starting LLM content filtering process", 
+                "Starting LLM markdown content filtering process", 
                tag="LLM",
                params={"provider": self.provider},
                colors={"provider": Fore.CYAN}
@@ -853,9 +850,12 @@ class LLMContentFilter(RelevantContentFilter):
        cache_key = self._get_cache_key(html, self.instruction or "")
        cache_file = cache_dir / f"{cache_key}.json"

+        # if ignore_cache == None:
+        ignore_cache = self.ignore_cache
+
        if not ignore_cache and cache_file.exists():
            if self.logger:
-                self.logger.info("Found cached result", tag="CACHE")
+                self.logger.info("Found  cached markdown result", tag="CACHE")
            try:
                with cache_file.open('r') as f:
                    cached_data = json.load(f)
@@ -867,13 +867,13 @@ class LLMContentFilter(RelevantContentFilter):
                    return cached_data['blocks']
            except Exception as e:
                if self.logger:
-                    self.logger.error(f"Cache read error: {str(e)}", tag="CACHE")
+                    self.logger.error(f"LLM markdown: Cache read error: {str(e)}", tag="CACHE")

        # Split into chunks
        html_chunks = self._merge_chunks(html)
        if self.logger:
            self.logger.info(
-                "Split content into {chunk_count} chunks", 
+                "LLM markdown: Split content into {chunk_count} chunks", 
                tag="CHUNK",
                params={"chunk_count": len(html_chunks)},
                colors={"chunk_count": Fore.YELLOW}
@@ -887,7 +887,7 @@ class LLMContentFilter(RelevantContentFilter):
            for i, chunk in enumerate(html_chunks):
                if self.logger:
                    self.logger.debug(
-                        "Processing chunk {chunk_num}/{total_chunks}", 
+                        "LLM markdown: Processing chunk {chunk_num}/{total_chunks}", 
                        tag="CHUNK",
                        params={
                            "chunk_num": i + 1,
@@ -904,16 +904,38 @@ class LLMContentFilter(RelevantContentFilter):
                for var, value in prompt_variables.items():
                    prompt = prompt.replace("{" + var + "}", value)

+                def _proceed_with_chunk(
+                        provider: str,
+                        prompt: str,
+                        api_token: str,
+                        base_url: Optional[str] = None,
+                        extra_args: Dict = {}
+                    ) -> List[str]:
+                    if self.logger:
+                        self.logger.info(
+                            "LLM Markdown: Processing chunk {chunk_num}", 
+                            tag="CHUNK",
+                            params={"chunk_num": i + 1}
+                        )
+                    return perform_completion_with_backoff(
+                        provider,
+                        prompt,
+                        api_token,
+                        base_url=base_url,
+                        extra_args=extra_args
+                    )
+
                future = executor.submit(
-                    perform_completion_with_backoff,
+                    _proceed_with_chunk,
                    self.provider,
                    prompt,
                    self.api_token,
-                    base_url=self.api_base,
-                    extra_args=self.extra_args
+                    self.api_base,
+                    self.extra_args
                )
                futures.append((i, future))

+
            # Collect results in order
            ordered_results = []
            for i, future in sorted(futures):
@@ -940,14 +962,14 @@ class LLMContentFilter(RelevantContentFilter):
                        ordered_results.append(blocks)
                        if self.logger:
                            self.logger.success(
-                                "Successfully processed chunk {chunk_num}", 
+                                "LLM markdown: Successfully processed chunk {chunk_num}", 
                                tag="CHUNK",
                                params={"chunk_num": i + 1}
                            )
                except Exception as e:
                    if self.logger:
                        self.logger.error(
-                            "Error processing chunk {chunk_num}: {error}", 
+                            "LLM markdown: Error processing chunk {chunk_num}: {error}", 
                            tag="CHUNK",
                            params={
                                "chunk_num": i + 1,
@@ -958,7 +980,7 @@ class LLMContentFilter(RelevantContentFilter):
        end_time = time.time()
        if self.logger:
            self.logger.success(
-                "Completed processing in {time:.2f}s", 
+                "LLM markdown: Completed processing in {time:.2f}s", 
                tag="LLM",
                params={"time": end_time - start_time},
                colors={"time": Fore.YELLOW}
--- a/crawl4ai/extraction_strategy.py
+++ b/crawl4ai/extraction_strategy.py
@@ -21,6 +21,9 @@ from .utils import (
    extract_xml_data,
    split_and_parse_json_objects,
    sanitize_input_encode,
+    chunk_documents,
+    merge_chunks,
+    advanced_split,
 )
 from .models import * # noqa: F403

@@ -501,6 +504,10 @@ class LLMExtractionStrategy(ExtractionStrategy):
        instruction: str = None,
        schema: Dict = None,
        extraction_type="block",
+        chunk_token_threshold=CHUNK_TOKEN_THRESHOLD,
+        overlap_rate=OVERLAP_RATE,
+        word_token_rate=WORD_TOKEN_RATE,
+        apply_chunking=True,
        **kwargs,
    ):
        """
@@ -652,53 +659,16 @@ class LLMExtractionStrategy(ExtractionStrategy):
            )
        return blocks

-    def _merge(self, documents, chunk_token_threshold, overlap):
+    def _merge(self, documents, chunk_token_threshold, overlap) -> List[str]:
        """
        Merge documents into sections based on chunk_token_threshold and overlap.
        """
-        # chunks = []
-        sections = []
-        total_tokens = 0
-
-        # Calculate the total tokens across all documents
-        for document in documents:
-            total_tokens += len(document.split(" ")) * self.word_token_rate
-
-        # Calculate the number of sections needed
-        num_sections = math.floor(total_tokens / chunk_token_threshold)
-        if num_sections < 1:
-            num_sections = 1  # Ensure there is at least one section
-        adjusted_chunk_threshold = total_tokens / num_sections
-
-        total_token_so_far = 0
-        current_chunk = []
-
-        for document in documents:
-            tokens = document.split(" ")
-            token_count = len(tokens) * self.word_token_rate
-
-            if total_token_so_far + token_count <= adjusted_chunk_threshold:
-                current_chunk.extend(tokens)
-                total_token_so_far += token_count
-            else:
-                # Ensure to handle the last section properly
-                if len(sections) == num_sections - 1:
-                    current_chunk.extend(tokens)
-                    continue
-
-                # Add overlap if specified
-                if overlap > 0 and current_chunk:
-                    overlap_tokens = current_chunk[-overlap:]
-                    current_chunk.extend(overlap_tokens)
-
-                sections.append(" ".join(current_chunk))
-                current_chunk = tokens
-                total_token_so_far = token_count
-
-        # Add the last chunk
-        if current_chunk:
-            sections.append(" ".join(current_chunk))
-
+        sections =  merge_chunks(
+            docs = documents,
+            target_size= chunk_token_threshold,
+            overlap=overlap,
+            word_token_ratio=self.word_token_rate
+        )
        return sections

    def run(self, url: str, sections: List[str]) -> List[Dict[str, Any]]:
--- a/crawl4ai/markdown_generation_strategy.py
+++ b/crawl4ai/markdown_generation_strategy.py
@@ -1,4 +1,5 @@
 from abc import ABC, abstractmethod
+from tabnanny import verbose
 from typing import Optional, Dict, Any, Tuple
 from .models import MarkdownGenerationResult
 from .html2text import CustomHTML2Text
@@ -29,9 +30,11 @@ class MarkdownGenerationStrategy(ABC):
        self,
        content_filter: Optional[RelevantContentFilter] = None,
        options: Optional[Dict[str, Any]] = None,
+        verbose: bool = False,
    ):
        self.content_filter = content_filter
        self.options = options or {}
+        self.verbose = verbose

    @abstractmethod
    def generate_markdown(
--- a/crawl4ai/prompts.py
+++ b/crawl4ai/prompts.py
@@ -206,17 +206,6 @@ Output the final list of JSON objects, wrapped in <blocks>...</blocks> XML tags.

 PROMPT_FILTER_CONTENT = """Your task is to filter and convert HTML content into clean, focused markdown that's optimized for use with LLMs and information retrieval systems.

-INPUT HTML: 
-<|HTML_CONTENT_START|>
-{HTML}
-<|HTML_CONTENT_END|>
-
-
-SPECIFIC INSTRUCTION: 
-<|USER_INSTRUCTION_START|>
-{REQUEST}
-<|USER_INSTRUCTION_END|>
-
 TASK DETAILS:
 1. Content Selection
 - DO: Keep essential information, main content, key details
@@ -240,15 +229,7 @@ TASK DETAILS:
 - DON'T: Fragment related content
 - DON'T: Duplicate information

-Example Input:
-<div class="main-content"><h1>Setup Guide</h1><p>Follow these steps...</p></div>
-<div class="sidebar">Related articles...</div>
-
-Example Output:
-# Setup Guide
-Follow these steps...
-
-IMPORTANT: If specific instruction is provided above, prioritize those requirements over these general guidelines.
+IMPORTANT: If user specific instruction is provided, ignore above guideline and prioritize those requirements over these general guidelines.

 OUTPUT FORMAT: 
 Wrap your response in <content> tags. Use proper markdown throughout.
@@ -256,7 +237,18 @@ Wrap your response in <content> tags. Use proper markdown throughout.
 [Your markdown content here]
 </content>

-Begin filtering now."""
+Begin filtering now.
+
+--------------------------------------------
+
+<|HTML_CONTENT_START|>
+{HTML}
+<|HTML_CONTENT_END|>
+
+<|USER_INSTRUCTION_START|>
+{REQUEST}
+<|USER_INSTRUCTION_END|>
+"""

 JSON_SCHEMA_BUILDER= """
 # HTML Schema Generation Instructions
--- a/crawl4ai/utils.py
+++ b/crawl4ai/utils.py
@@ -1,3 +1,4 @@
+from ast import Call
 import time
 from urllib.parse import urlparse
 from concurrent.futures import ThreadPoolExecutor, as_completed
@@ -8,9 +9,10 @@ import re
 import os
 import platform
 from .prompts import PROMPT_EXTRACT_BLOCKS
+from array import array
 from .config import *
 from pathlib import Path
-from typing import Dict, Any
+from typing import Dict, Any, List, Tuple, Union, Optional, Callable
 from urllib.parse import urljoin
 import requests
 from requests.exceptions import InvalidSchema
@@ -31,6 +33,154 @@ import aiohttp
 from pathlib import Path
 from packaging import version
 from . import __version__
+from typing import Sequence, List
+from array import array
+from itertools import chain
+from collections import deque
+from typing import Callable, Generator, Iterable, List, Optional
+
+def chunk_documents(
+    documents: Iterable[str],
+    chunk_token_threshold: int,
+    overlap: int,
+    word_token_rate: float = 0.75,
+    tokenizer: Optional[Callable[[str], List[str]]] = None,
+) -> Generator[str, None, None]:
+    """
+    Efficiently chunks documents into token-limited sections with overlap between chunks.
+
+    Args:
+        documents: Iterable of document strings
+        chunk_token_threshold: Maximum tokens per chunk
+        overlap: Number of tokens to overlap between chunks
+        word_token_rate: Token estimate per word when not using a tokenizer
+        tokenizer: Function that splits text into tokens (if available)
+
+    Yields:
+        Text chunks as strings
+    """
+    token_queue = deque()
+    contribution_queue = deque()
+    current_token_count = 0.0
+
+    for doc in documents:
+        # Tokenize document
+        if tokenizer:
+            tokens = tokenizer(doc)
+            contributions = [1.0] * len(tokens)
+        else:
+            tokens = doc.split()
+            contributions = [word_token_rate] * len(tokens)
+
+        # Add to processing queues
+        token_queue.extend(tokens)
+        contribution_queue.extend(contributions)
+        current_token_count += sum(contributions)
+
+        # Process full chunks
+        while current_token_count >= chunk_token_threshold:
+            # Find chunk split point
+            chunk_tokens = []
+            chunk_contrib = []
+            chunk_total = 0.0
+            
+            # Build chunk up to threshold
+            while contribution_queue:
+                next_contrib = contribution_queue[0]
+                if chunk_total + next_contrib > chunk_token_threshold:
+                    break
+                
+                chunk_total += next_contrib
+                chunk_contrib.append(contribution_queue.popleft())
+                chunk_tokens.append(token_queue.popleft())
+
+            # Handle edge case where first token exceeds threshold
+            if not chunk_contrib:  # Single token exceeds threshold
+                chunk_contrib.append(contribution_queue.popleft())
+                chunk_tokens.append(token_queue.popleft())
+
+            # Calculate overlap
+            overlap_total = 0.0
+            overlap_idx = 0
+            for contrib in reversed(chunk_contrib):
+                if overlap_total + contrib > overlap:
+                    break
+                overlap_total += contrib
+                overlap_idx += 1
+
+            # Prepend overlap to queues
+            if overlap_idx > 0:
+                overlap_tokens = chunk_tokens[-overlap_idx:]
+                overlap_contrib = chunk_contrib[-overlap_idx:]
+                
+                token_queue.extendleft(reversed(overlap_tokens))
+                contribution_queue.extendleft(reversed(overlap_contrib))
+                current_token_count += overlap_total
+
+            # Update current token count and yield chunk
+            current_token_count -= sum(chunk_contrib)
+            yield " ".join(chunk_tokens[:len(chunk_tokens)-overlap_idx] if overlap_idx else chunk_tokens)
+
+    # Yield remaining tokens
+    if token_queue:
+        yield " ".join(token_queue)
+
+def merge_chunks(
+    docs: Sequence[str], 
+    target_size: int,
+    overlap: int = 0,
+    word_token_ratio: float = 1.0,
+    splitter: Callable = None
+) -> List[str]:
+    """Merges documents into chunks of specified token size.
+    
+    Args:
+        docs: Input documents
+        target_size: Desired token count per chunk
+        overlap: Number of tokens to overlap between chunks
+        word_token_ratio: Multiplier for word->token conversion
+    """
+    # Pre-tokenize all docs and store token counts
+    splitter = splitter or str.split
+    token_counts = array('I')
+    all_tokens: List[List[str]] = []
+    total_tokens = 0
+    
+    for doc in docs:
+        tokens = doc.split()
+        count = int(len(tokens) * word_token_ratio)
+        if count:  # Skip empty docs
+            token_counts.append(count)
+            all_tokens.append(tokens)
+            total_tokens += count
+    
+    if not total_tokens:
+        return []
+
+    # Pre-allocate chunks
+    num_chunks = max(1, (total_tokens + target_size - 1) // target_size)
+    chunks: List[List[str]] = [[] for _ in range(num_chunks)]
+    
+    curr_chunk = 0
+    curr_size = 0
+    
+    # Distribute tokens
+    for tokens in chain.from_iterable(all_tokens):
+        if curr_size >= target_size and curr_chunk < num_chunks - 1:
+            if overlap > 0:
+                overlap_tokens = chunks[curr_chunk][-overlap:]
+                curr_chunk += 1
+                chunks[curr_chunk].extend(overlap_tokens)
+                curr_size = len(overlap_tokens)
+            else:
+                curr_chunk += 1
+                curr_size = 0
+                
+        chunks[curr_chunk].append(tokens)
+        curr_size += 1
+
+    # Return only non-empty chunks
+    return [' '.join(chunk) for chunk in chunks if chunk]


 class VersionManager:
@@ -189,6 +339,77 @@ class InvalidCSSSelectorError(Exception):
    pass


+SPLITS = bytearray([
+    # Control chars (0-31) + space (32)
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    # Special chars (33-47): ! " # $ % & ' ( ) * + , - . /
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    # Numbers (48-57): Treat as non-splits
+    0,0,0,0,0,0,0,0,0,0,
+    # More special chars (58-64): : ; < = > ? @
+    1,1,1,1,1,1,1,
+    # Uppercase (65-90): Keep
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    # More special chars (91-96): [ \ ] ^ _ `
+    1,1,1,1,1,1,
+    # Lowercase (97-122): Keep
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    # Special chars (123-126): { | } ~
+    1,1,1,1,
+    # Extended ASCII
+    *([1] * 128)
+])
+
+# Additional split chars for HTML/code
+HTML_CODE_CHARS = {
+    # HTML specific
+    '•', '►', '▼', '©', '®', '™', '→', '⇒', '≈', '≤', '≥',
+    # Programming symbols  
+    '+=', '-=', '*=', '/=', '=>', '<=>', '!=', '==', '===',
+    '++', '--', '<<', '>>', '&&', '||', '??', '?:', '?.', 
+    # Common Unicode
+    '…', '"', '"', ''', ''', '«', '»', '—', '–',
+    # Additional splits
+    '+', '=', '~', '@', '#', '$', '%', '^', '&', '*',
+    '(', ')', '{', '}', '[', ']', '|', '\\', '/', '`',
+    '<', '>', ',', '.', '?', '!', ':', ';', '-', '_'
+}
+
+def advanced_split(text: str) -> list[str]:
+    result = []
+    word = array('u')
+    
+    i = 0
+    text_len = len(text)
+    
+    while i < text_len:
+        char = text[i]
+        o = ord(char)
+        
+        # Fast path for ASCII
+        if o < 256 and SPLITS[o]:
+            if word:
+                result.append(word.tounicode())
+                word = array('u')
+        # Check for multi-char symbols
+        elif i < text_len - 1:
+            two_chars = char + text[i + 1]
+            if two_chars in HTML_CODE_CHARS:
+                if word:
+                    result.append(word.tounicode())
+                    word = array('u')
+                i += 1  # Skip next char since we used it
+            else:
+                word.append(char)
+        else:
+            word.append(char)
+        i += 1
+            
+    if word:
+        result.append(word.tounicode())
+        
+    return result
+
 def create_box_message(
    message: str,
    type: str = "info",
--- a/deploy/docker/Dockerfile.bak
+++ b/deploy/docker/Dockerfile.bak
--- a/deploy/docker/README.md
+++ b/deploy/docker/README.md
@@ -1,113 +1,764 @@
-# Crawl4AI Docker Setup
+# Crawl4AI Docker Guide 🐳

-## Quick Start
-1. Build the Docker image:
-   ```bash
-   docker build -t crawl4ai-server:prod .
-   ```
+## Table of Contents
+- [Prerequisites](#prerequisites)
+- [Installation](#installation)
+  - [Local Build](#local-build)
+  - [Docker Hub](#docker-hub)
+- [Dockerfile Parameters](#dockerfile-parameters)
+- [Using the API](#using-the-api)
+  - [Understanding Request Schema](#understanding-request-schema)
+  - [REST API Examples](#rest-api-examples)
+  - [Python SDK](#python-sdk)
+- [Metrics & Monitoring](#metrics--monitoring)
+- [Deployment Scenarios](#deployment-scenarios)
+- [Complete Examples](#complete-examples)
+- [Getting Help](#getting-help)

-2. Run the container:
-   ```bash
-   docker run -d -p 8000:8000 \
+## Prerequisites
+
+Before we dive in, make sure you have:
+- Docker installed and running (version 20.10.0 or higher)
+- At least 4GB of RAM available for the container
+- Python 3.10+ (if using the Python SDK)
+- Node.js 16+ (if using the Node.js examples)
+
+> 💡 **Pro tip**: Run `docker info` to check your Docker installation and available resources.
+
+## Installation
+
+### Local Build
+
+Let's get your local environment set up step by step!
+
+#### 1. Building the Image
+
+First, clone the repository and build the Docker image:
+
+```bash
+# Clone the repository
+git clone https://github.com/unclecode/crawl4ai.git
+cd crawl4ai
+
+# Build the Docker image
+docker build -t crawl4ai-server:prod \
+  --build-arg PYTHON_VERSION=3.10 \
+  --build-arg INSTALL_TYPE=all \
+  --build-arg ENABLE_GPU=false \
+  deploy/docker/
+```
+
+#### 2. Environment Setup
+
+If you plan to use LLMs (Language Models), you'll need to set up your API keys. Create a `.llm.env` file:
+
+```env
+# OpenAI
+OPENAI_API_KEY=sk-your-key
+
+# Anthropic
+ANTHROPIC_API_KEY=your-anthropic-key
+
+# DeepSeek
+DEEPSEEK_API_KEY=your-deepseek-key
+
+# Check out https://docs.litellm.ai/docs/providers for more providers!
+```
+
+> 🔑 **Note**: Keep your API keys secure! Never commit them to version control.
+
+#### 3. Running the Container
+
+You have several options for running the container:
+
+Basic run (no LLM support):
+```bash
+docker run -d -p 8000:8000 --name crawl4ai crawl4ai-server:prod
+```
+
+With LLM support:
+```bash
+docker run -d -p 8000:8000 \
  --env-file .llm.env \
  --name crawl4ai \
  crawl4ai-server:prod
-   ```
-
---
-
-## Configuration Options
-
-### 1. **Using .llm.env File**
-Create a `.llm.env` file with your API keys:
-```bash
-OPENAI_API_KEY=sk-your-key
-DEEPSEEK_API_KEY=your-deepseek-key
 ```

-Run with:
+Using host environment variables (Not a good practice, but works for local testing):
 ```bash
 docker run -d -p 8000:8000 \
  --env-file .llm.env \
+  --env-from "$(env)" \
+  --name crawl4ai \
  crawl4ai-server:prod
 ```

-### 2. **Direct Environment Variables**
-Pass keys directly:
+### More on Building
+
+You have several options for building the Docker image based on your needs:
+
+#### Basic Build
 ```bash
-docker run -d -p 8000:8000 \
-  -e OPENAI_API_KEY="sk-your-key" \
-  -e DEEPSEEK_API_KEY="your-deepseek-key" \
-  crawl4ai-server:prod
+# Clone the repository
+git clone https://github.com/unclecode/crawl4ai.git
+cd crawl4ai
+
+# Simple build with defaults
+docker build -t crawl4ai-server:prod deploy/docker/
 ```

-### 3. **Copy Host Environment Variables**
-Use the `--copy-env` flag to copy `.llm.env` from the host:
+#### Advanced Build Options
 ```bash
-docker run -d -p 8000:8000 \
-  --copy-env \
-  crawl4ai-server:prod
+# Build with custom parameters
+docker build -t crawl4ai-server:prod \
+  --build-arg PYTHON_VERSION=3.10 \
+  --build-arg INSTALL_TYPE=all \
+  --build-arg ENABLE_GPU=false \
+  deploy/docker/
 ```

-### 4. **Advanced: Docker Compose**
-Create a `docker-compose.yml`:
-```yaml
-version: '3.8'
-services:
-  crawl4ai:
-    image: crawl4ai-server:prod
-    ports:
-      - "8000:8000"
-    env_file:
-      - .llm.env
-    restart: unless-stopped
-```
+#### Platform-Specific Builds
+The Dockerfile includes optimizations for different architectures (ARM64 and AMD64). Docker automatically detects your platform, but you can specify it explicitly:

-Run with:
 ```bash
-docker-compose up -d
+# Build for ARM64
+docker build --platform linux/arm64 -t crawl4ai-server:arm64 deploy/docker/
+
+# Build for AMD64
+docker build --platform linux/amd64 -t crawl4ai-server:amd64 deploy/docker/
 ```

---
+#### Multi-Platform Build
+For distributing your image across different architectures, use `buildx`:

-## Supported Environment Variables
-| Variable               | Description                          |
-|------------------------|--------------------------------------|
-| `OPENAI_API_KEY`       | OpenAI API key                       |
-| `DEEPSEEK_API_KEY`     | DeepSeek API key                     |
-| `ANTHROPIC_API_KEY`    | Anthropic API key                    |
-| `GROQ_API_KEY`         | Groq API key                         |
-| `TOGETHER_API_KEY`     | Together API key                     |
-| `LLAMA_CLOUD_API_KEY`  | Llama Cloud API key                  |
-| `COHERE_API_KEY`       | Cohere API key                       |
-| `MISTRAL_API_KEY`      | Mistral API key                      |
-| `PERPLEXITY_API_KEY`   | Perplexity API key                   |
-| `VERTEXAI_PROJECT_ID`  | Google Vertex AI project ID          |
-| `VERTEXAI_LOCATION`    | Google Vertex AI location            |
+```bash
+# Set up buildx builder
+docker buildx create --use

---
+# Build for multiple platforms
+docker buildx build \
+  --platform linux/amd64,linux/arm64 \
+  -t yourusername/crawl4ai-server:multi \
+  --push \
+  deploy/docker/
+```

-## Healthcheck
-The container includes a healthcheck:
+> 💡 **Note**: Multi-platform builds require Docker Buildx and need to be pushed to a registry.
+
+#### Development Build
+For development, you might want to enable all features:
+
+```bash
+docker build -t crawl4ai-server:dev \
+  --build-arg INSTALL_TYPE=all \
+  --build-arg PYTHON_VERSION=3.10 \
+  --build-arg ENABLE_GPU=true \
+  deploy/docker/
+```
+
+#### GPU-Enabled Build
+If you plan to use GPU acceleration:
+
+```bash
+docker build -t crawl4ai-server:gpu \
+  --build-arg ENABLE_GPU=true \
+  deploy/docker/
+```
+
+### Build Arguments Explained
+
+| Argument | Description | Default | Options |
+|----------|-------------|---------|----------|
+| PYTHON_VERSION | Python version | 3.10 | 3.8, 3.9, 3.10 |
+| INSTALL_TYPE | Feature set | default | default, all, torch, transformer |
+| ENABLE_GPU | GPU support | false | true, false |
+| APP_HOME | Install path | /app | any valid path |
+
+### Build Best Practices
+
+1. **Choose the Right Install Type**
+   - `default`: Basic installation, smallest image, to be honest, I use this most of the time.
+   - `all`: Full features, larger image (include transformer, and nltk, make sure you really need them)
+
+2. **Platform Considerations**
+   - Let Docker auto-detect platform unless you need cross-compilation
+   - Use --platform for specific architecture requirements
+   - Consider buildx for multi-architecture distribution
+
+3. **Development vs Production**
+   - Use `INSTALL_TYPE=all` for development
+   - Stick to `default` for production if you don't need extra features
+   - Enable GPU only if you have compatible hardware
+
+4. **Performance Optimization**
+   - The image automatically includes platform-specific optimizations
+   - AMD64 gets OpenMP optimizations
+   - ARM64 gets OpenBLAS optimizations
+
+### Docker Hub
+
+> 🚧 Coming soon! The image will be available at `crawl4ai/server`. Stay tuned!
+
+## Dockerfile Parameters
+
+Configure your build with these parameters:
+
+| Parameter | Description | Default | Options |
+|-----------|-------------|---------|----------|
+| PYTHON_VERSION | Python version to use | 3.10 | 3.8, 3.9, 3.10 |
+| INSTALL_TYPE | Installation profile | default | default, all, torch, transformer |
+| ENABLE_GPU | Enable GPU support | false | true, false |
+| APP_HOME | Application directory | /app | any valid path |
+| TARGETARCH | Target architecture | auto-detected | amd64, arm64 |
+
+## Using the API
+
+### Understanding Request Schema
+
+This is super important! The API expects a specific structure that matches our Python classes. Let me show you how it works.
+
+#### The Magic of Type Matching
+
+When you send a request, each configuration object needs a "type" field that matches the exact class name from the library. Here's an example:
+
+```python
+# First, let's create objects the normal way
+from crawl4ai import BrowserConfig, CrawlerRunConfig, PruningContentFilter
+
+# Create some config objects
+browser_config = BrowserConfig(headless=True, viewport={"width": 1200, "height": 800})
+content_filter = PruningContentFilter(threshold=0.48, threshold_type="fixed")
+
+# Use dump() to see the serialized format
+print(browser_config.dump())
+```
+
+This will output something like:
+```json
+{
+    "type": "BrowserConfig",
+    "params": {
+        "headless": true,
+        "viewport": {
+            "width": 1200,
+            "height": 800
+        }
+    }
+}
+```
+
+#### Making API Requests
+
+So when making a request, your JSON should look like this:
+
+```json
+{
+    "urls": ["https://example.com"],
+    "browser_config": {
+        "type": "BrowserConfig",
+        "params": {
+            "headless": true,
+            "viewport": {"width": 1200, "height": 800}
+        }
+    },
+    "crawler_config": {
+        "type": "CrawlerRunConfig",
+        "params": {
+            "cache_mode": "bypass",
+            "markdown_generator": {
+                "type": "DefaultMarkdownGenerator",
+                "params": {
+                    "content_filter": {
+                        "type": "PruningContentFilter",
+                        "params": {
+                            "threshold": 0.48,
+                            "threshold_type": "fixed",
+                            "min_word_threshold": 0
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+```
+
+> 💡 **Pro tip**: Look at the class names in the library documentation - they map directly to the "type" fields in your requests!
+
+### REST API Examples
+
+Let's look at some practical examples:
+
+#### Simple Crawl
+
+```python
+import requests
+
+response = requests.post(
+    "http://localhost:8000/crawl",
+    json={
+        "urls": ["https://example.com"],
+        "browser_config": {
+            "type": "BrowserConfig",
+            "params": {"headless": True}
+        }
+    }
+)
+print(response.json())
+```
+
+#### Streaming Results
+
+```python
+import requests
+
+response = requests.post(
+    "http://localhost:8000/crawl",
+    json={
+        "urls": ["https://example.com"],
+        "crawler_config": {
+            "type": "CrawlerRunConfig",
+            "params": {"stream": True}
+        }
+    },
+    stream=True
+)
+
+for line in response.iter_lines():
+    if line:
+        print(line.decode())
+```
+
+### Python SDK
+
+The SDK makes things even easier! Here's how to use it:
+
+```python
+from crawl4ai.docker_client import Crawl4aiDockerClient
+from crawl4ai import BrowserConfig, CrawlerRunConfig
+
+async with Crawl4aiDockerClient() as client:
+    # The SDK handles serialization for you!
+    result = await client.crawl(
+        urls=["https://example.com"],
+        browser_config=BrowserConfig(headless=True),
+        crawler_config=CrawlerRunConfig(stream=False)
+    )
+    print(result.markdown)
+```
+
+## Metrics & Monitoring
+
+Keep an eye on your crawler with these endpoints:
+
+- `/health` - Quick health check
+- `/metrics` - Detailed Prometheus metrics
+- `/schema` - Full API schema
+
+Example health check:
 ```bash
 curl http://localhost:8000/health
 ```

---
+## Deployment Scenarios

-## Troubleshooting
-1. **Missing Keys**: Ensure all required keys are set in `.llm.env`.
-2. **Permissions**: Run `chmod +x docker-entrypoint.sh` if permissions are denied.
-3. **Logs**: Check logs with:
-   ```bash
-   docker logs crawl4ai
+> 🚧 Coming soon! We'll cover:
+> - Kubernetes deployment
+> - Cloud provider setups (AWS, GCP, Azure)
+> - High-availability configurations
+> - Load balancing strategies
+
+## Complete Examples
+
+Check out the `examples` folder in our repository for full working examples! Here's one to get you started:
+
+```python
+import requests
+import time
+import httpx
+import asyncio
+from typing import Dict, Any
+from crawl4ai import (
+    BrowserConfig, CrawlerRunConfig, DefaultMarkdownGenerator,
+    PruningContentFilter, JsonCssExtractionStrategy, LLMContentFilter, CacheMode
+)
+from crawl4ai.docker_client import Crawl4aiDockerClient
+
+class Crawl4AiTester:
+    def __init__(self, base_url: str = "http://localhost:11235"):
+        self.base_url = base_url
+
+    def submit_and_wait(
+        self, request_data: Dict[str, Any], timeout: int = 300
+    ) -> Dict[str, Any]:
+        # Submit crawl job
+        response = requests.post(f"{self.base_url}/crawl", json=request_data)
+        task_id = response.json()["task_id"]
+        print(f"Task ID: {task_id}")
+
+        # Poll for result
+        start_time = time.time()
+        while True:
+            if time.time() - start_time > timeout:
+                raise TimeoutError(
+                    f"Task {task_id} did not complete within {timeout} seconds"
+                )
+
+            result = requests.get(f"{self.base_url}/task/{task_id}")
+            status = result.json()
+
+            if status["status"] == "failed":
+                print("Task failed:", status.get("error"))
+                raise Exception(f"Task failed: {status.get('error')}")
+
+            if status["status"] == "completed":
+                return status
+
+            time.sleep(2)
+
+async def test_direct_api():
+    """Test direct API endpoints without using the client SDK"""
+    print("\n=== Testing Direct API Calls ===")
+    
+    # Test 1: Basic crawl with content filtering
+    browser_config = BrowserConfig(
+        headless=True,
+        viewport_width=1200,
+        viewport_height=800
+    )
+    
+    crawler_config = CrawlerRunConfig(
+        cache_mode=CacheMode.BYPASS,
+        markdown_generator=DefaultMarkdownGenerator(
+            content_filter=PruningContentFilter(
+                threshold=0.48,
+                threshold_type="fixed",
+                min_word_threshold=0
+            ),
+            options={"ignore_links": True}
+        )
+    )
+
+    request_data = {
+        "urls": ["https://example.com"],
+        "browser_config": browser_config.dump(),
+        "crawler_config": crawler_config.dump()
+    }
+
+    # Make direct API call
+    async with httpx.AsyncClient() as client:
+        response = await client.post(
+            "http://localhost:8000/crawl",
+            json=request_data,
+            timeout=300
+        )
+        assert response.status_code == 200
+        result = response.json()
+        print("Basic crawl result:", result["success"])
+
+    # Test 2: Structured extraction with JSON CSS
+    schema = {
+        "baseSelector": "article.post",
+        "fields": [
+            {"name": "title", "selector": "h1", "type": "text"},
+            {"name": "content", "selector": ".content", "type": "html"}
+        ]
+    }
+
+    crawler_config = CrawlerRunConfig(
+        cache_mode=CacheMode.BYPASS,
+        extraction_strategy=JsonCssExtractionStrategy(schema=schema)
+    )
+
+    request_data["crawler_config"] = crawler_config.dump()
+
+    async with httpx.AsyncClient() as client:
+        response = await client.post(
+            "http://localhost:8000/crawl",
+            json=request_data
+        )
+        assert response.status_code == 200
+        result = response.json()
+        print("Structured extraction result:", result["success"])
+
+    # Test 3: Get schema
+    # async with httpx.AsyncClient() as client:
+    #     response = await client.get("http://localhost:8000/schema")
+    #     assert response.status_code == 200
+    #     schemas = response.json()
+    #     print("Retrieved schemas for:", list(schemas.keys()))
+
+async def test_with_client():
+    """Test using the Crawl4AI Docker client SDK"""
+    print("\n=== Testing Client SDK ===")
+    
+    async with Crawl4aiDockerClient(verbose=True) as client:
+        # Test 1: Basic crawl
+        browser_config = BrowserConfig(headless=True)
+        crawler_config = CrawlerRunConfig(
+            cache_mode=CacheMode.BYPASS,
+            markdown_generator=DefaultMarkdownGenerator(
+                content_filter=PruningContentFilter(
+                    threshold=0.48,
+                    threshold_type="fixed"
+                )
+            )
+        )
+
+        result = await client.crawl(
+            urls=["https://example.com"],
+            browser_config=browser_config,
+            crawler_config=crawler_config
+        )
+        print("Client SDK basic crawl:", result.success)
+
+        # Test 2: LLM extraction with streaming
+        crawler_config = CrawlerRunConfig(
+            cache_mode=CacheMode.BYPASS,
+            markdown_generator=DefaultMarkdownGenerator(
+                content_filter=LLMContentFilter(
+                    provider="openai/gpt-40",
+                    instruction="Extract key technical concepts"
+                )
+            ),
+            stream=True
+        )
+
+        async for result in await client.crawl(
+            urls=["https://example.com"],
+            browser_config=browser_config,
+            crawler_config=crawler_config
+        ):
+            print(f"Streaming result for: {result.url}")
+
+        # # Test 3: Get schema
+        # schemas = await client.get_schema()
+        # print("Retrieved client schemas for:", list(schemas.keys()))
+
+async def main():
+    """Run all tests"""
+    # Test direct API
+    print("Testing direct API calls...")
+    await test_direct_api()
+
+    # Test client SDK
+    print("\nTesting client SDK...")
+    await test_with_client()
+
+if __name__ == "__main__":
+    asyncio.run(main())
+```
+
+## Server Configuration
+
+The server's behavior can be customized through the `config.yml` file. Let's explore how to configure your Crawl4AI server for optimal performance and security.
+
+### Understanding config.yml
+
+The configuration file is located at `deploy/docker/config.yml`. You can either modify this file before building the image or mount a custom configuration when running the container.
+
+Here's a detailed breakdown of the configuration options:
+
+```yaml
+# Application Configuration
+app:
+  title: "Crawl4AI API"           # Server title in OpenAPI docs
+  version: "1.0.0"               # API version
+  host: "0.0.0.0"               # Listen on all interfaces
+  port: 8000                    # Server port
+  reload: True                  # Enable hot reloading (development only)
+  timeout_keep_alive: 300       # Keep-alive timeout in seconds
+
+# Rate Limiting Configuration
+rate_limiting:
+  enabled: True                 # Enable/disable rate limiting
+  default_limit: "100/minute"   # Rate limit format: "number/timeunit"
+  trusted_proxies: []          # List of trusted proxy IPs
+  storage_uri: "memory://"     # Use "redis://localhost:6379" for production
+
+# Security Configuration
+security:
+  enabled: false               # Master toggle for security features
+  https_redirect: True         # Force HTTPS
+  trusted_hosts: ["*"]        # Allowed hosts (use specific domains in production)
+  headers:                     # Security headers
+    x_content_type_options: "nosniff"
+    x_frame_options: "DENY"
+    content_security_policy: "default-src 'self'"
+    strict_transport_security: "max-age=63072000; includeSubDomains"
+
+# Crawler Configuration
+crawler:
+  memory_threshold_percent: 95.0  # Memory usage threshold
+  rate_limiter:
+    base_delay: [1.0, 2.0]      # Min and max delay between requests
+  timeouts:
+    stream_init: 30.0           # Stream initialization timeout
+    batch_process: 300.0        # Batch processing timeout
+
+# Logging Configuration
+logging:
+  level: "INFO"                 # Log level (DEBUG, INFO, WARNING, ERROR)
+  format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+
+# Observability Configuration
+observability:
+  prometheus:
+    enabled: True              # Enable Prometheus metrics
+    endpoint: "/metrics"       # Metrics endpoint
+  health_check:
+    endpoint: "/health"        # Health check endpoint
+```
+
+### Configuration Tips and Best Practices
+
+1. **Production Settings** 🏭
+   ```yaml
+   app:
+     reload: False              # Disable reload in production
+     timeout_keep_alive: 120    # Lower timeout for better resource management
+   
+   rate_limiting:
+     storage_uri: "redis://redis:6379"  # Use Redis for distributed rate limiting
+     default_limit: "50/minute"         # More conservative rate limit
+   
+   security:
+     enabled: true                      # Enable all security features
+     trusted_hosts: ["your-domain.com"] # Restrict to your domain
   ```

---
+2. **Development Settings** 🛠️
+   ```yaml
+   app:
+     reload: True               # Enable hot reloading
+     timeout_keep_alive: 300    # Longer timeout for debugging
   
-## Security Best Practices
- Never commit `.llm.env` to version control.
- Use Docker secrets in production (Swarm/K8s).
- Rotate keys regularly.
+   logging:
+     level: "DEBUG"            # More verbose logging
+   ```

+3. **High-Traffic Settings** 🚦
+   ```yaml
+   crawler:
+     memory_threshold_percent: 85.0  # More conservative memory limit
+     rate_limiter:
+       base_delay: [2.0, 4.0]       # More aggressive rate limiting
+   ```

+### Customizing Your Configuration
+
+#### Method 1: Pre-build Configuration
+```bash
+# Copy and modify config before building
+cp deploy/docker/config.yml custom-config.yml
+vim custom-config.yml
+
+# Build with custom config
+docker build -t crawl4ai-server:prod \
+  --build-arg CONFIG_PATH=custom-config.yml .
+```
+
+#### Method 2: Runtime Configuration
+```bash
+# Mount custom config at runtime
+docker run -d -p 8000:8000 \
+  -v $(pwd)/custom-config.yml:/app/config.yml \
+  crawl4ai-server:prod
+```
+
+### Configuration Recommendations
+
+1. **Security First** 🔒
+   - Always enable security in production
+   - Use specific trusted_hosts instead of wildcards
+   - Set up proper rate limiting to protect your server
+   - Consider your environment before enabling HTTPS redirect
+
+2. **Resource Management** 💻
+   - Adjust memory_threshold_percent based on available RAM
+   - Set timeouts according to your content size and network conditions
+   - Use Redis for rate limiting in multi-container setups
+
+3. **Monitoring** 📊
+   - Enable Prometheus if you need metrics
+   - Set DEBUG logging in development, INFO in production
+   - Regular health check monitoring is crucial
+
+4. **Performance Tuning** ⚡
+   - Start with conservative rate limiter delays
+   - Increase batch_process timeout for large content
+   - Adjust stream_init timeout based on initial response times
+
+### Configuration Migration
+
+When upgrading Crawl4AI, follow these steps:
+
+1. Back up your current config:
+   ```bash
+   cp /app/config.yml /app/config.yml.backup
+   ```
+
+2. Use version control:
+   ```bash
+   git add config.yml
+   git commit -m "Save current server configuration"
+   ```
+
+3. Test in staging first:
+   ```bash
+   docker run -d -p 8001:8000 \  # Use different port
+     -v $(pwd)/new-config.yml:/app/config.yml \
+     crawl4ai-server:prod
+   ```
+
+### Common Configuration Scenarios
+
+1. **Basic Development Setup**
+   ```yaml
+   security:
+     enabled: false
+   logging:
+     level: "DEBUG"
+   ```
+
+2. **Production API Server**
+   ```yaml
+   security:
+     enabled: true
+     trusted_hosts: ["api.yourdomain.com"]
+   rate_limiting:
+     enabled: true
+     default_limit: "50/minute"
+   ```
+
+3. **High-Performance Crawler**
+   ```yaml
+   crawler:
+     memory_threshold_percent: 90.0
+     timeouts:
+       batch_process: 600.0
+   ```
+
+## Getting Help
+
+We're here to help you succeed with Crawl4AI! Here's how to get support:
+
+- 📖 Check our [full documentation](https://docs.crawl4ai.com)
+- 🐛 Found a bug? [Open an issue](https://github.com/unclecode/crawl4ai/issues)
+- 💬 Join our [Discord community](https://discord.gg/crawl4ai)
+- ⭐ Star us on GitHub to show support!
+
+## Summary
+
+In this guide, we've covered everything you need to get started with Crawl4AI's Docker deployment:
+- Building and running the Docker container
+- Configuring the environment
+- Making API requests with proper typing
+- Using the Python SDK
+- Monitoring your deployment
+
+Remember, the examples in the `examples` folder are your friends - they show real-world usage patterns that you can adapt for your needs.
+
+Keep exploring, and don't hesitate to reach out if you need help! We're building something amazing together. 🚀
+
+Happy crawling! 🕷️
--- a/deploy/docker/api.py
+++ b/deploy/docker/api.py
@@ -0,0 +1,305 @@
+import os
+import json
+import logging
+from typing import Optional, AsyncGenerator
+from urllib.parse import unquote
+from fastapi import HTTPException, Request, status
+from fastapi.background import BackgroundTasks
+from fastapi.responses import JSONResponse
+from redis import asyncio as aioredis
+
+from crawl4ai import (
+    AsyncWebCrawler,
+    CrawlerRunConfig,
+    LLMExtractionStrategy,
+    CacheMode
+)
+from crawl4ai.content_filter_strategy import (
+    PruningContentFilter,
+    BM25ContentFilter,
+    LLMContentFilter
+)
+from crawl4ai.markdown_generation_strategy import DefaultMarkdownGenerator
+from crawl4ai.content_scraping_strategy import LXMLWebScrapingStrategy
+
+from utils import (
+    TaskStatus,
+    FilterType,
+    get_base_url,
+    is_task_id,
+    should_cleanup_task,
+    decode_redis_hash
+)
+
+logger = logging.getLogger(__name__)
+
+async def process_llm_extraction(
+    redis: aioredis.Redis,
+    config: dict,
+    task_id: str,
+    url: str,
+    instruction: str,
+    schema: Optional[str] = None,
+    cache: str = "0"
+) -> None:
+    """Process LLM extraction in background."""
+    try:
+        llm_strategy = LLMExtractionStrategy(
+            provider=config["llm"]["provider"],
+            api_token=os.environ.get(config["llm"].get("api_key_env", None), ""),
+            instruction=instruction,
+            schema=json.loads(schema) if schema else None,
+        )
+
+        cache_mode = CacheMode.ENABLED if cache == "1" else CacheMode.BYPASS
+
+        async with AsyncWebCrawler() as crawler:
+            result = await crawler.arun(
+                url=url,
+                config=CrawlerRunConfig(
+                    extraction_strategy=llm_strategy,
+                    scraping_strategy=LXMLWebScrapingStrategy(),
+                    cache_mode=cache_mode
+                )
+            )
+
+        if not result.success:
+            await redis.hset(f"task:{task_id}", mapping={
+                "status": TaskStatus.FAILED,
+                "error": result.error_message
+            })
+            return
+
+        content = json.loads(result.extracted_content)
+        await redis.hset(f"task:{task_id}", mapping={
+            "status": TaskStatus.COMPLETED,
+            "result": json.dumps(content)
+        })
+
+    except Exception as e:
+        logger.error(f"LLM extraction error: {str(e)}", exc_info=True)
+        await redis.hset(f"task:{task_id}", mapping={
+            "status": TaskStatus.FAILED,
+            "error": str(e)
+        })
+
+async def handle_markdown_request(
+    url: str,
+    filter_type: FilterType,
+    query: Optional[str] = None,
+    cache: str = "0",
+    config: Optional[dict] = None
+) -> str:
+    """Handle markdown generation requests."""
+    try:
+        decoded_url = unquote(url)
+        if not decoded_url.startswith(('http://', 'https://')):
+            decoded_url = 'https://' + decoded_url
+
+        if filter_type == FilterType.RAW:
+            md_generator = DefaultMarkdownGenerator()
+        else:
+            content_filter = {
+                FilterType.FIT: PruningContentFilter(),
+                FilterType.BM25: BM25ContentFilter(user_query=query or ""),
+                FilterType.LLM: LLMContentFilter(
+                    provider=config["llm"]["provider"],
+                    api_token=os.environ.get(config["llm"].get("api_key_env", None), ""),
+                    instruction=query or "Extract main content"
+                )
+            }[filter_type]
+            md_generator = DefaultMarkdownGenerator(content_filter=content_filter)
+
+        cache_mode = CacheMode.ENABLED if cache == "1" else CacheMode.BYPASS
+
+        async with AsyncWebCrawler() as crawler:
+            result = await crawler.arun(
+                url=decoded_url,
+                config=CrawlerRunConfig(
+                    markdown_generator=md_generator,
+                    scraping_strategy=LXMLWebScrapingStrategy(),
+                    cache_mode=cache_mode
+                )
+            )
+            
+            if not result.success:
+                raise HTTPException(
+                    status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+                    detail=result.error_message
+                )
+
+            return (result.markdown_v2.raw_markdown 
+                   if filter_type == FilterType.RAW 
+                   else result.markdown_v2.fit_markdown)
+
+    except Exception as e:
+        logger.error(f"Markdown error: {str(e)}", exc_info=True)
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=str(e)
+        )
+
+async def handle_llm_request(
+    redis: aioredis.Redis,
+    background_tasks: BackgroundTasks,
+    request: Request,
+    input_path: str,
+    query: Optional[str] = None,
+    schema: Optional[str] = None,
+    cache: str = "0",
+    config: Optional[dict] = None
+) -> JSONResponse:
+    """Handle LLM extraction requests."""
+    base_url = get_base_url(request)
+    
+    try:
+        if is_task_id(input_path):
+            return await handle_task_status(
+                redis, input_path, base_url
+            )
+
+        if not query:
+            return JSONResponse({
+                "message": "Please provide an instruction",
+                "_links": {
+                    "example": {
+                        "href": f"{base_url}/llm/{input_path}?q=Extract+main+content",
+                        "title": "Try this example"
+                    }
+                }
+            })
+
+        return await create_new_task(
+            redis,
+            background_tasks,
+            input_path,
+            query,
+            schema,
+            cache,
+            base_url,
+            config
+        )
+
+    except Exception as e:
+        logger.error(f"LLM endpoint error: {str(e)}", exc_info=True)
+        return JSONResponse({
+            "error": str(e),
+            "_links": {
+                "retry": {"href": str(request.url)}
+            }
+        }, status_code=status.HTTP_500_INTERNAL_SERVER_ERROR)
+
+async def handle_task_status(
+    redis: aioredis.Redis,
+    task_id: str,
+    base_url: str
+) -> JSONResponse:
+    """Handle task status check requests."""
+    task = await redis.hgetall(f"task:{task_id}")
+    if not task:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail="Task not found"
+        )
+
+    task = decode_redis_hash(task)
+    response = create_task_response(task, task_id, base_url)
+
+    if task["status"] in [TaskStatus.COMPLETED, TaskStatus.FAILED]:
+        if should_cleanup_task(task["created_at"]):
+            await redis.delete(f"task:{task_id}")
+
+    return JSONResponse(response)
+
+async def create_new_task(
+    redis: aioredis.Redis,
+    background_tasks: BackgroundTasks,
+    input_path: str,
+    query: str,
+    schema: Optional[str],
+    cache: str,
+    base_url: str,
+    config: dict
+) -> JSONResponse:
+    """Create and initialize a new task."""
+    decoded_url = unquote(input_path)
+    if not decoded_url.startswith(('http://', 'https://')):
+        decoded_url = 'https://' + decoded_url
+
+    from datetime import datetime
+    task_id = f"llm_{int(datetime.now().timestamp())}_{id(background_tasks)}"
+    
+    await redis.hset(f"task:{task_id}", mapping={
+        "status": TaskStatus.PROCESSING,
+        "created_at": datetime.now().isoformat(),
+        "url": decoded_url
+    })
+
+    background_tasks.add_task(
+        process_llm_extraction,
+        redis,
+        config,
+        task_id,
+        decoded_url,
+        query,
+        schema,
+        cache
+    )
+
+    return JSONResponse({
+        "task_id": task_id,
+        "status": TaskStatus.PROCESSING,
+        "url": decoded_url,
+        "_links": {
+            "self": {"href": f"{base_url}/llm/{task_id}"},
+            "status": {"href": f"{base_url}/llm/{task_id}"}
+        }
+    })
+
+def create_task_response(task: dict, task_id: str, base_url: str) -> dict:
+    """Create response for task status check."""
+    response = {
+        "task_id": task_id,
+        "status": task["status"],
+        "created_at": task["created_at"],
+        "url": task["url"],
+        "_links": {
+            "self": {"href": f"{base_url}/llm/{task_id}"},
+            "refresh": {"href": f"{base_url}/llm/{task_id}"}
+        }
+    }
+
+    if task["status"] == TaskStatus.COMPLETED:
+        response["result"] = json.loads(task["result"])
+    elif task["status"] == TaskStatus.FAILED:
+        response["error"] = task["error"]
+
+    return response
+
+async def stream_results(crawler: AsyncWebCrawler, results_gen: AsyncGenerator) -> AsyncGenerator[bytes, None]:
+    """Stream results with heartbeats and completion markers."""
+    import asyncio
+    import json
+    from utils import datetime_handler
+
+    try:
+        async for result in results_gen:
+            try:
+                result_dict = result.model_dump()
+                logger.info(f"Streaming result for {result_dict.get('url', 'unknown')}")
+                data = json.dumps(result_dict, default=datetime_handler) + "\n"
+                yield data.encode('utf-8')
+            except Exception as e:
+                logger.error(f"Serialization error: {e}")
+                error_response = {"error": str(e), "url": getattr(result, 'url', 'unknown')}
+                yield (json.dumps(error_response) + "\n").encode('utf-8')
+
+        yield json.dumps({"status": "completed"}).encode('utf-8')
+        
+    except asyncio.CancelledError:
+        logger.warning("Client disconnected during streaming")
+    finally:
+        try:
+            await crawler.close()
+        except Exception as e:
+            logger.error(f"Crawler cleanup error: {e}")
--- a/deploy/docker/config.yml
+++ b/deploy/docker/config.yml
@@ -0,0 +1,69 @@
+# Application Configuration
+app:
+  title: "Crawl4AI API"
+  version: "1.0.0"
+  host: "0.0.0.0"
+  port: 8000
+  reload: True
+  timeout_keep_alive: 300
+
+# Default LLM Configuration
+llm:
+  provider: "openai/gpt-4o-mini"
+  api_key_env: "OPENAI_API_KEY"
+
+# Redis Configuration
+redis:
+  host: "localhost"
+  port: 6379
+  db: 0
+  password: ""
+  ssl: False
+  ssl_cert_reqs: None
+  ssl_ca_certs: None
+  ssl_certfile: None
+  ssl_keyfile: None
+  ssl_cert_reqs: None
+  ssl_ca_certs: None
+  ssl_certfile: None
+  ssl_keyfile: None
+
+# Rate Limiting Configuration
+rate_limiting:
+  enabled: True
+  default_limit: "1000/minute"
+  trusted_proxies: []
+  storage_uri: "memory://"  # Use "redis://localhost:6379" for production
+
+# Security Configuration
+security:
+  enabled: false 
+  https_redirect: True
+  trusted_hosts: ["*"]
+  headers:
+    x_content_type_options: "nosniff"
+    x_frame_options: "DENY"
+    content_security_policy: "default-src 'self'"
+    strict_transport_security: "max-age=63072000; includeSubDomains"
+
+# Crawler Configuration
+crawler:
+  memory_threshold_percent: 95.0
+  rate_limiter:
+    base_delay: [1.0, 2.0]
+  timeouts:
+    stream_init: 30.0  # Timeout for stream initialization
+    batch_process: 300.0  # Timeout for batch processing
+
+# Logging Configuration
+logging:
+  level: "INFO"
+  format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+
+# Observability Configuration
+observability:
+  prometheus:
+    enabled: True
+    endpoint: "/metrics"
+  health_check:
+    endpoint: "/health"
--- a/deploy/docker/requirements.txt
+++ b/deploy/docker/requirements.txt
@@ -2,3 +2,6 @@ crawl4ai
 fastapi
 uvicorn
 gunicorn>=23.0.0
+slowapi>=0.1.9
+prometheus-fastapi-instrumentator>=7.0.2
+redis>=5.2.1
--- a/deploy/docker/server.py
+++ b/deploy/docker/server.py
@@ -1,120 +1,237 @@
 import os
 import sys
+import time
+from typing import  List, Optional
+
 sys.path.append(os.path.dirname(os.path.realpath(__file__)))
-from fastapi import FastAPI, HTTPException
-from fastapi.responses import StreamingResponse
-import json
-import asyncio
-from typing import AsyncGenerator
-from crawl4ai import (
-    BrowserConfig,
-    CrawlerRunConfig,
-    AsyncWebCrawler,
-    MemoryAdaptiveDispatcher,
-    RateLimiter,
+
+from redis import asyncio as aioredis
+from fastapi import FastAPI, HTTPException, Request, status
+from fastapi.responses import StreamingResponse, RedirectResponse
+from fastapi.middleware.httpsredirect import HTTPSRedirectMiddleware
+from fastapi.middleware.trustedhost import TrustedHostMiddleware
+from pydantic import BaseModel, Field
+from slowapi import Limiter
+from slowapi.util import get_remote_address
+from prometheus_fastapi_instrumentator import Instrumentator
+from fastapi.responses import PlainTextResponse
+from fastapi.responses import JSONResponse
+from fastapi.background import BackgroundTasks
+from typing import Dict
+import os
+
+from utils import (
+    FilterType,
+    load_config,
+    setup_logging
+)
+from api import (
+    handle_markdown_request,
+    handle_llm_request
 )

-from typing import List, Optional
-from pydantic import BaseModel
+# Load configuration and setup
+config = load_config()
+setup_logging(config)
+
+# Initialize Redis
+redis = aioredis.from_url(config["redis"].get("uri", "redis://localhost"))
+
+# Initialize rate limiter
+limiter = Limiter(
+    key_func=get_remote_address,
+    default_limits=[config["rate_limiting"]["default_limit"]],
+    storage_uri=config["rate_limiting"]["storage_uri"]
+)
+
+app = FastAPI(
+    title=config["app"]["title"],
+    version=config["app"]["version"]
+)
+
+# Configure middleware
+if config["security"]["enabled"]:
+    if config["security"]["https_redirect"]:
+        app.add_middleware(HTTPSRedirectMiddleware)
+    if config["security"]["trusted_hosts"] and config["security"]["trusted_hosts"] != ["*"]:
+        app.add_middleware(
+            TrustedHostMiddleware,
+            allowed_hosts=config["security"]["trusted_hosts"]
+        )
+
+# Prometheus instrumentation
+if config["observability"]["prometheus"]["enabled"]:
+    Instrumentator().instrument(app).expose(app)

 class CrawlRequest(BaseModel):
-    urls: List[str]
-    browser_config: Optional[dict] = None
-    crawler_config: Optional[dict] = None
-
-class CrawlResponse(BaseModel):
-    success: bool
-    results: List[dict]  
-
-    class Config:
-        arbitrary_types_allowed = True
-
-app = FastAPI(title="Crawl4AI API")
-
-async def stream_results(crawler: AsyncWebCrawler, results_gen: AsyncGenerator) -> AsyncGenerator[bytes, None]:
-    """Stream results and manage crawler lifecycle"""
-    def datetime_handler(obj):
-        """Custom handler for datetime objects during JSON serialization"""
-        if hasattr(obj, 'isoformat'):
-            return obj.isoformat()
-        raise TypeError(f"Object of type {type(obj)} is not JSON serializable")
-
-    try:
-        async for result in results_gen:
-            try:
-                # Use dump method for serialization
-                result_dict = result.model_dump()
-                print(f"Streaming result for URL: {result_dict['url']}, Success: {result_dict['success']}")
-                # Use custom JSON encoder with datetime handler
-                yield (json.dumps(result_dict, default=datetime_handler) + "\n").encode('utf-8')
-            except Exception as e:
-                print(f"Error serializing result: {e}")
-                error_response = {
-                    "error": str(e),
-                    "url": getattr(result, 'url', 'unknown')
+    urls: List[str] = Field(
+        min_length=1, 
+        max_length=100,
+        json_schema_extra={
+            "items": {"type": "string", "maxLength": 2000, "pattern": "\\S"}
        }
-                yield (json.dumps(error_response, default=datetime_handler) + "\n").encode('utf-8')
-    except asyncio.CancelledError:
-        print("Client disconnected, cleaning up...")
-    finally:
-        try:
-            await crawler.close()
-        except Exception as e:
-            print(f"Error closing crawler: {e}")
-
-@app.post("/crawl")
-async def crawl(request: CrawlRequest):
-    # Load configs using our new utilities
-    browser_config = BrowserConfig.load(request.browser_config)
-    crawler_config = CrawlerRunConfig.load(request.crawler_config)
-
-    dispatcher = MemoryAdaptiveDispatcher(
-        memory_threshold_percent=95.0,
-        rate_limiter=RateLimiter(base_delay=(1.0, 2.0)),
+    )
+    browser_config: Optional[Dict] = Field(
+        default_factory=dict,
+        example={"headless": True, "viewport": {"width": 1200}}
+    )
+    crawler_config: Optional[Dict] = Field(
+        default_factory=dict,
+        example={"stream": True, "cache_mode": "aggressive"}
    )

-    try:
-        if crawler_config.stream:
-            crawler = AsyncWebCrawler(config=browser_config)
-            await crawler.start()
+@app.middleware("http")
+async def add_security_headers(request: Request, call_next):
+    response = await call_next(request)
+    if config["security"]["enabled"]:
+        response.headers.update(config["security"]["headers"])
+    return response

-            results_gen = await crawler.arun_many(
-                urls=request.urls,
-                config=crawler_config,
-                dispatcher=dispatcher
-            )
+@app.get("/md/{url:path}")
+@limiter.limit(config["rate_limiting"]["default_limit"])
+async def get_markdown(
+    request: Request,
+    url: str,
+    f: FilterType = FilterType.FIT,
+    q: Optional[str] = None,
+    c: Optional[str] = "0"
+):
+    """Get markdown from URL with optional filtering."""
+    result = await handle_markdown_request(url, f, q, c, config)
+    return PlainTextResponse(result)

-            return StreamingResponse(
-                stream_results(crawler, results_gen),
-                media_type='application/x-ndjson'
+@app.get("/llm/{input:path}")
+@limiter.limit(config["rate_limiting"]["default_limit"])
+async def llm_endpoint(
+    request: Request,
+    background_tasks: BackgroundTasks,
+    input: str,
+    q: Optional[str] = None,
+    s: Optional[str] = None,
+    c: Optional[str] = "0"
+):
+    """Handle LLM extraction requests."""
+    return await handle_llm_request(
+        redis, background_tasks, request, input, q, s, c, config
    )
-        else:
-            async with AsyncWebCrawler(config=browser_config) as crawler:
-                results = await crawler.arun_many(
-                    urls=request.urls,
-                    config=crawler_config,
-                    dispatcher=dispatcher
-                )
-                # Use dump method for each result
-                results_dict = [result.model_dump() for result in results]
-                return CrawlResponse(success=True, results=results_dict)
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))

@app.get("/schema")
 async def get_schema():
-    """Return config schemas for client validation"""
+    """Endpoint for client-side validation schema."""
+    from crawl4ai import BrowserConfig, CrawlerRunConfig
    return {
        "browser": BrowserConfig.model_json_schema(),
        "crawler": CrawlerRunConfig.model_json_schema()
    }

-@app.get("/health")
+@app.get(config["observability"]["health_check"]["endpoint"])
 async def health():
-    return {"status": "ok"}
+    """Health check endpoint."""
+    return {"status": "ok", "timestamp": time.time()}

+@app.get(config["observability"]["prometheus"]["endpoint"])
+async def metrics():
+    """Prometheus metrics endpoint."""
+    return RedirectResponse(url=config["observability"]["prometheus"]["endpoint"])

+@app.post("/crawl")
+@limiter.limit(config["rate_limiting"]["default_limit"])
+async def crawl(request: Request, crawl_request: CrawlRequest):
+    """Handle crawl requests."""
+    from crawl4ai import (
+        AsyncWebCrawler,
+        BrowserConfig,
+        CrawlerRunConfig,
+        MemoryAdaptiveDispatcher,
+        RateLimiter
+    )
+    import asyncio
+    import logging
+
+    logger = logging.getLogger(__name__)
+    crawler = None
+
+    try:
+        if not crawl_request.urls:
+            logger.error("Empty URL list received")
+            raise HTTPException(
+                status_code=status.HTTP_400_BAD_REQUEST,
+                detail="At least one URL required"
+            )
+
+        browser_config = BrowserConfig.load(crawl_request.browser_config)
+        crawler_config = CrawlerRunConfig.load(crawl_request.crawler_config)
+
+        dispatcher = MemoryAdaptiveDispatcher(
+            memory_threshold_percent=config["crawler"]["memory_threshold_percent"],
+            rate_limiter=RateLimiter(
+                base_delay=tuple(config["crawler"]["rate_limiter"]["base_delay"])
+            )
+        )
+
+        if crawler_config.stream:
+            crawler = AsyncWebCrawler(config=browser_config)
+            await crawler.start()
+
+            results_gen = await asyncio.wait_for(
+                crawler.arun_many(
+                    urls=crawl_request.urls,
+                    config=crawler_config,
+                    dispatcher=dispatcher
+                ),
+                timeout=config["crawler"]["timeouts"]["stream_init"]
+            )
+
+            from api import stream_results
+            return StreamingResponse(
+                stream_results(crawler, results_gen),
+                media_type='application/x-ndjson',
+                headers={
+                    'Cache-Control': 'no-cache',
+                    'Connection': 'keep-alive',
+                    'X-Stream-Status': 'active'
+                }
+            )
+        else:
+            async with AsyncWebCrawler(config=browser_config) as crawler:
+                results = await asyncio.wait_for(
+                    crawler.arun_many(
+                        urls=crawl_request.urls,
+                        config=crawler_config,
+                        dispatcher=dispatcher
+                    ),
+                    timeout=config["crawler"]["timeouts"]["batch_process"]
+                )
+                return JSONResponse({
+                    "success": True,
+                    "results": [result.model_dump() for result in results]
+                })
+
+    except asyncio.TimeoutError as e:
+        logger.error(f"Operation timed out: {str(e)}")
+        raise HTTPException(
+            status_code=status.HTTP_504_GATEWAY_TIMEOUT,
+            detail="Processing timeout"
+        )
+    except Exception as e:
+        logger.error(f"Server error: {str(e)}", exc_info=True)
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail="Internal server error"
+        )
+    finally:
+        if crawler:
+            try:
+                await crawler.close()
+            except Exception as e:
+                logger.error(f"Final crawler cleanup error: {e}")

 if __name__ == "__main__":
    import uvicorn
-    uvicorn.run("server:app", host="0.0.0.0", port=8000, reload=True)
+    uvicorn.run(
+        "server:app",
+        host=config["app"]["host"],
+        port=config["app"]["port"],
+        reload=config["app"]["reload"],
+        timeout_keep_alive=config["app"]["timeout_keep_alive"]
+    )
--- a/deploy/docker/utils.py
+++ b/deploy/docker/utils.py
@@ -0,0 +1,54 @@
+import logging
+import yaml
+from datetime import datetime
+from enum import Enum
+from pathlib import Path
+from fastapi import Request
+from typing import Dict, Optional
+
+class TaskStatus(str, Enum):
+    PROCESSING = "processing"
+    FAILED = "failed"
+    COMPLETED = "completed"
+
+class FilterType(str, Enum):
+    RAW = "raw"
+    FIT = "fit"
+    BM25 = "bm25"
+    LLM = "llm"
+
+def load_config() -> Dict:
+    """Load and return application configuration."""
+    config_path = Path(__file__).parent / "config.yml"
+    with open(config_path, "r") as config_file:
+        return yaml.safe_load(config_file)
+
+def setup_logging(config: Dict) -> None:
+    """Configure application logging."""
+    logging.basicConfig(
+        level=config["logging"]["level"],
+        format=config["logging"]["format"]
+    )
+
+def get_base_url(request: Request) -> str:
+    """Get base URL including scheme and host."""
+    return f"{request.url.scheme}://{request.url.netloc}"
+
+def is_task_id(value: str) -> bool:
+    """Check if the value matches task ID pattern."""
+    return value.startswith("llm_") and "_" in value
+
+def datetime_handler(obj: any) -> Optional[str]:
+    """Handle datetime serialization for JSON."""
+    if hasattr(obj, 'isoformat'):
+        return obj.isoformat()
+    raise TypeError(f"Object of type {type(obj)} is not JSON serializable")
+
+def should_cleanup_task(created_at: str) -> bool:
+    """Check if task should be cleaned up based on creation time."""
+    created = datetime.fromisoformat(created_at)
+    return (datetime.now() - created).total_seconds() > 3600
+
+def decode_redis_hash(hash_data: Dict[bytes, bytes]) -> Dict[str, str]:
+    """Decode Redis hash data from bytes to strings."""
+    return {k.decode('utf-8'): v.decode('utf-8') for k, v in hash_data.items()}
--- a/docs/examples/llm_markdown_generator.py
+++ b/docs/examples/llm_markdown_generator.py
@@ -46,6 +46,7 @@ async def test_llm_filter():
            provider="openai/gpt-4o",
            api_token=os.getenv('OPENAI_API_KEY'),
            chunk_token_threshold=2 ** 12 * 2, # 2048 * 2
+            ignore_cache = True,
            instruction="""
            Extract the main educational content while preserving its original wording and substance completely. Your task is to:

@@ -68,7 +69,7 @@ async def test_llm_filter():
        )        

        # Apply filtering
-        filtered_content = filter.filter_content(html, ignore_cache = True)
+        filtered_content = filter.filter_content(html)
        
        # Show results
        print("\nFiltered Content Length:", len(filtered_content))
--- a/server.py
+++ b/server.py