Merge branch 'next' into 2025-MAR-ALPHA-1

2025-04-17 10:50:02 +05:30
parent dcc265458c 94d486579c
commit eed7f88f29
38 changed files with 5574 additions and 878 deletions
--- a/47
+++ b/47
@@ -24,7 +24,7 @@ ARG TARGETARCH

 LABEL maintainer="unclecode"
 LABEL description="🔥🕷️ Crawl4AI: Open-source LLM Friendly Web Crawler & scraper"
-LABEL version="1.0"    
+LABEL version="1.0"

 RUN apt-get update && apt-get install -y --no-install-recommends \
    build-essential \
@@ -38,6 +38,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
    libjpeg-dev \
    redis-server \
    supervisor \
+    && apt-get clean \ 
    && rm -rf /var/lib/apt/lists/*

 RUN apt-get update && apt-get install -y --no-install-recommends \
@@ -62,11 +63,13 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
    libcairo2 \
    libasound2 \
    libatspi2.0-0 \
+    && apt-get clean \ 
    && rm -rf /var/lib/apt/lists/*

 RUN if [ "$ENABLE_GPU" = "true" ] && [ "$TARGETARCH" = "amd64" ] ; then \
    apt-get update && apt-get install -y --no-install-recommends \
    nvidia-cuda-toolkit \
+    && apt-get clean \ 
    && rm -rf /var/lib/apt/lists/* ; \
 else \
    echo "Skipping NVIDIA CUDA Toolkit installation (unsupported platform or GPU disabled)"; \
@@ -76,16 +79,24 @@ RUN if [ "$TARGETARCH" = "arm64" ]; then \
    echo "🦾 Installing ARM-specific optimizations"; \
    apt-get update && apt-get install -y --no-install-recommends \
    libopenblas-dev \
+    && apt-get clean \ 
    && rm -rf /var/lib/apt/lists/*; \
 elif [ "$TARGETARCH" = "amd64" ]; then \
    echo "🖥️ Installing AMD64-specific optimizations"; \
    apt-get update && apt-get install -y --no-install-recommends \
    libomp-dev \
+    && apt-get clean \ 
    && rm -rf /var/lib/apt/lists/*; \
 else \
    echo "Skipping platform-specific optimizations (unsupported platform)"; \
 fi

+# Create a non-root user and group
+RUN groupadd -r appuser && useradd --no-log-init -r -g appuser appuser
+
+# Create and set permissions for appuser home directory
+RUN mkdir -p /home/appuser && chown -R appuser:appuser /home/appuser
+
 WORKDIR ${APP_HOME}

 RUN echo '#!/bin/bash\n\
@@ -103,6 +114,7 @@ fi' > /tmp/install.sh && chmod +x /tmp/install.sh

 COPY . /tmp/project/

+# Copy supervisor config first (might need root later, but okay for now)
 COPY deploy/docker/supervisord.conf .

 COPY deploy/docker/requirements.txt .
@@ -131,16 +143,31 @@ RUN if [ "$INSTALL_TYPE" = "all" ] ; then \
    else \
        pip install "/tmp/project" ; \
    fi
-    
+
 RUN pip install --no-cache-dir --upgrade pip && \
    /tmp/install.sh && \
    python -c "import crawl4ai; print('✅ crawl4ai is ready to rock!')" && \
    python -c "from playwright.sync_api import sync_playwright; print('✅ Playwright is feeling dramatic!')"
-    
-RUN playwright install --with-deps chromium

+RUN crawl4ai-setup
+
+RUN playwright install --with-deps
+
+RUN mkdir -p /home/appuser/.cache/ms-playwright \
+    && cp -r /root/.cache/ms-playwright/chromium-* /home/appuser/.cache/ms-playwright/ \
+    && chown -R appuser:appuser /home/appuser/.cache/ms-playwright
+
+RUN crawl4ai-doctor
+
+# Copy application code
 COPY deploy/docker/* ${APP_HOME}/

+# Change ownership of the application directory to the non-root user
+RUN chown -R appuser:appuser ${APP_HOME}
+
+# give permissions to redis persistence dirs if used
+RUN mkdir -p /var/lib/redis /var/log/redis && chown -R appuser:appuser /var/lib/redis /var/log/redis
+
 HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
    CMD bash -c '\
    MEM=$(free -m | awk "/^Mem:/{print \$2}"); \
@@ -149,8 +176,14 @@ HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
        exit 1; \
    fi && \
    redis-cli ping > /dev/null && \
-    curl -f http://localhost:8000/health || exit 1'
+    curl -f http://localhost:11235/health || exit 1'

 EXPOSE 6379
-CMD ["supervisord", "-c", "supervisord.conf"]
-    
+# Switch to the non-root user before starting the application
+USER appuser
+
+# Set environment variables to ptoduction
+ENV PYTHON_ENV=production 
+
+# Start the application using supervisord
+CMD ["supervisord", "-c", "supervisord.conf"]
--- a/crawl4ai/init.py
+++ b/crawl4ai/init.py
@@ -2,7 +2,7 @@
 import warnings

 from .async_webcrawler import AsyncWebCrawler, CacheMode
-from .async_configs import BrowserConfig, CrawlerRunConfig, HTTPCrawlerConfig, LLMConfig
+from .async_configs import BrowserConfig, CrawlerRunConfig, HTTPCrawlerConfig, LLMConfig, ProxyConfig

 from .content_scraping_strategy import (
    ContentScrapingStrategy,
@@ -121,6 +121,7 @@ __all__ = [
    "Crawl4aiDockerClient",
    "ProxyRotationStrategy",
    "RoundRobinProxyStrategy",
+    "ProxyConfig"
 ]


--- a/crawl4ai/async_configs.py
+++ b/crawl4ai/async_configs.py
@@ -5,6 +5,7 @@ from .config import (
    MIN_WORD_THRESHOLD,
    IMAGE_DESCRIPTION_MIN_WORD_THRESHOLD,
    PROVIDER_MODELS,
+    PROVIDER_MODELS_PREFIXES,
    SCREENSHOT_HEIGHT_TRESHOLD,
    PAGE_TIMEOUT,
    IMAGE_SCORE_THRESHOLD,
@@ -27,11 +28,8 @@ import inspect
 from typing import Any, Dict, Optional
 from enum import Enum

-from .proxy_strategy import ProxyConfig
-try:
-    from .browser.models import DockerConfig
-except ImportError:
-    DockerConfig = None
+# from .proxy_strategy import ProxyConfig
+


 def to_serializable_dict(obj: Any, ignore_default_value : bool = False) -> Dict:
@@ -122,23 +120,25 @@ def from_serializable_dict(data: Any) -> Any:
    # Handle typed data
    if isinstance(data, dict) and "type" in data:
        # Handle plain dictionaries
-        if data["type"] == "dict":
+        if data["type"] == "dict" and "value" in data:
            return {k: from_serializable_dict(v) for k, v in data["value"].items()}

        # Import from crawl4ai for class instances
        import crawl4ai

-        cls = getattr(crawl4ai, data["type"])
+        if hasattr(crawl4ai, data["type"]):
+            cls = getattr(crawl4ai, data["type"])

-        # Handle Enum
-        if issubclass(cls, Enum):
-            return cls(data["params"])
+            # Handle Enum
+            if issubclass(cls, Enum):
+                return cls(data["params"])

-        # Handle class instances
-        constructor_args = {
-            k: from_serializable_dict(v) for k, v in data["params"].items()
-        }
-        return cls(**constructor_args)
+            if "params" in data:
+                # Handle class instances
+                constructor_args = {
+                    k: from_serializable_dict(v) for k, v in data["params"].items()
+                }
+                return cls(**constructor_args)

    # Handle lists
    if isinstance(data, list):
@@ -159,6 +159,117 @@ def is_empty_value(value: Any) -> bool:
        return True
    return False

+class ProxyConfig:
+    def __init__(
+        self,
+        server: str,
+        username: Optional[str] = None,
+        password: Optional[str] = None,
+        ip: Optional[str] = None,
+    ):
+        """Configuration class for a single proxy.
+        
+        Args:
+            server: Proxy server URL (e.g., "http://127.0.0.1:8080")
+            username: Optional username for proxy authentication
+            password: Optional password for proxy authentication
+            ip: Optional IP address for verification purposes
+        """
+        self.server = server
+        self.username = username
+        self.password = password
+        
+        # Extract IP from server if not explicitly provided
+        self.ip = ip or self._extract_ip_from_server()
+    
+    def _extract_ip_from_server(self) -> Optional[str]:
+        """Extract IP address from server URL."""
+        try:
+            # Simple extraction assuming http://ip:port format
+            if "://" in self.server:
+                parts = self.server.split("://")[1].split(":")
+                return parts[0]
+            else:
+                parts = self.server.split(":")
+                return parts[0]
+        except Exception:
+            return None
+    
+    @staticmethod
+    def from_string(proxy_str: str) -> "ProxyConfig":
+        """Create a ProxyConfig from a string in the format 'ip:port:username:password'."""
+        parts = proxy_str.split(":")
+        if len(parts) == 4:  # ip:port:username:password
+            ip, port, username, password = parts
+            return ProxyConfig(
+                server=f"http://{ip}:{port}",
+                username=username,
+                password=password,
+                ip=ip
+            )
+        elif len(parts) == 2:  # ip:port only
+            ip, port = parts
+            return ProxyConfig(
+                server=f"http://{ip}:{port}",
+                ip=ip
+            )
+        else:
+            raise ValueError(f"Invalid proxy string format: {proxy_str}")
+    
+    @staticmethod
+    def from_dict(proxy_dict: Dict) -> "ProxyConfig":
+        """Create a ProxyConfig from a dictionary."""
+        return ProxyConfig(
+            server=proxy_dict.get("server"),
+            username=proxy_dict.get("username"),
+            password=proxy_dict.get("password"),
+            ip=proxy_dict.get("ip")
+        )
+    
+    @staticmethod
+    def from_env(env_var: str = "PROXIES") -> List["ProxyConfig"]:
+        """Load proxies from environment variable.
+        
+        Args:
+            env_var: Name of environment variable containing comma-separated proxy strings
+            
+        Returns:
+            List of ProxyConfig objects
+        """
+        proxies = []
+        try:
+            proxy_list = os.getenv(env_var, "").split(",")
+            for proxy in proxy_list:
+                if not proxy:
+                    continue
+                proxies.append(ProxyConfig.from_string(proxy))
+        except Exception as e:
+            print(f"Error loading proxies from environment: {e}")
+        return proxies
+    
+    def to_dict(self) -> Dict:
+        """Convert to dictionary representation."""
+        return {
+            "server": self.server,
+            "username": self.username,
+            "password": self.password,
+            "ip": self.ip
+        }
+    
+    def clone(self, **kwargs) -> "ProxyConfig":
+        """Create a copy of this configuration with updated values.
+
+        Args:
+            **kwargs: Key-value pairs of configuration options to update
+
+        Returns:
+            ProxyConfig: A new instance with the specified updates
+        """
+        config_dict = self.to_dict()
+        config_dict.update(kwargs)
+        return ProxyConfig.from_dict(config_dict)
+
+

 class BrowserConfig:
    """
@@ -195,8 +306,6 @@ class BrowserConfig:
                             Default: None.
        proxy_config (ProxyConfig or dict or None): Detailed proxy configuration, e.g. {"server": "...", "username": "..."}.
                                     If None, no additional proxy config. Default: None.
-        docker_config (DockerConfig or dict or None): Configuration for Docker-based browser automation.
-                                     Contains settings for Docker container operation. Default: None.
        viewport_width (int): Default viewport width for pages. Default: 1080.
        viewport_height (int): Default viewport height for pages. Default: 600.
        viewport (dict): Default viewport dimensions for pages. If set, overrides viewport_width and viewport_height.
@@ -242,7 +351,6 @@ class BrowserConfig:
        channel: str = "chromium",
        proxy: str = None,
        proxy_config: Union[ProxyConfig, dict, None] = None,
-        docker_config: Union[DockerConfig, dict, None] = None,
        viewport_width: int = 1080,
        viewport_height: int = 600,
        viewport: dict = None,
@@ -283,15 +391,7 @@ class BrowserConfig:
            self.chrome_channel = ""
        self.proxy = proxy
        self.proxy_config = proxy_config
-        
-        # Handle docker configuration
-        if isinstance(docker_config, dict) and DockerConfig is not None:
-            self.docker_config = DockerConfig.from_kwargs(docker_config)
-        else:
-            self.docker_config = docker_config

-        if self.docker_config:
-            self.user_data_dir = self.docker_config.user_data_dir

        self.viewport_width = viewport_width
        self.viewport_height = viewport_height
@@ -362,7 +462,6 @@ class BrowserConfig:
            channel=kwargs.get("channel", "chromium"),
            proxy=kwargs.get("proxy"),
            proxy_config=kwargs.get("proxy_config", None),
-            docker_config=kwargs.get("docker_config", None),
            viewport_width=kwargs.get("viewport_width", 1080),
            viewport_height=kwargs.get("viewport_height", 600),
            accept_downloads=kwargs.get("accept_downloads", False),
@@ -419,13 +518,7 @@ class BrowserConfig:
            "debugging_port": self.debugging_port,
            "host": self.host,
        }
-        
-        # Include docker_config if it exists
-        if hasattr(self, "docker_config") and self.docker_config is not None:
-            if hasattr(self.docker_config, "to_dict"):
-                result["docker_config"] = self.docker_config.to_dict()
-            else:
-                result["docker_config"] = self.docker_config
+
                
        return result

@@ -1178,9 +1271,18 @@ class LLMConfig:
        elif api_token and api_token.startswith("env:"):
            self.api_token = os.getenv(api_token[4:])
        else:
-            self.api_token = PROVIDER_MODELS.get(provider, "no-token") or os.getenv(
-                DEFAULT_PROVIDER_API_KEY
-            )
+            # Check if given provider starts with any of key in PROVIDER_MODELS_PREFIXES
+            # If not, check if it is in PROVIDER_MODELS
+            prefixes = PROVIDER_MODELS_PREFIXES.keys()
+            if any(provider.startswith(prefix) for prefix in prefixes):
+                selected_prefix = next(
+                    (prefix for prefix in prefixes if provider.startswith(prefix)),
+                    None,
+                )
+                self.api_token = PROVIDER_MODELS_PREFIXES.get(selected_prefix)                    
+            else:
+                self.provider = DEFAULT_PROVIDER
+                self.api_token = os.getenv(DEFAULT_PROVIDER_API_KEY)
        self.base_url = base_url
        self.temprature = temprature
        self.max_tokens = max_tokens
--- a/crawl4ai/async_webcrawler.py
+++ b/crawl4ai/async_webcrawler.py
@@ -36,7 +36,7 @@ from .markdown_generation_strategy import (
 )
 from .deep_crawling import DeepCrawlDecorator
 from .async_logger import AsyncLogger, AsyncLoggerBase
-from .async_configs import BrowserConfig, CrawlerRunConfig
+from .async_configs import BrowserConfig, CrawlerRunConfig, ProxyConfig
 from .async_dispatcher import *  # noqa: F403
 from .async_dispatcher import BaseDispatcher, MemoryAdaptiveDispatcher, RateLimiter

@@ -291,12 +291,12 @@ class AsyncWebCrawler:

                # Update proxy configuration from rotation strategy if available
                if config and config.proxy_rotation_strategy:
-                    next_proxy = await config.proxy_rotation_strategy.get_next_proxy()
+                    next_proxy : ProxyConfig = await config.proxy_rotation_strategy.get_next_proxy()
                    if next_proxy:
                        self.logger.info(
                            message="Switch proxy: {proxy}",
                            tag="PROXY",
-                            params={"proxy": next_proxy.server},
+                            params={"proxy": next_proxy.server} 
                        )
                        config.proxy_config = next_proxy
                        # config = config.clone(proxy_config=next_proxy)
--- a/crawl4ai/browser_manager.py
+++ b/crawl4ai/browser_manager.py
@@ -94,6 +94,7 @@ class ManagedBrowser:
        host: str = "localhost",
        debugging_port: int = 9222,
        cdp_url: Optional[str] = None, 
+        browser_config: Optional[BrowserConfig] = None,
    ):
        """
        Initialize the ManagedBrowser instance.
@@ -109,17 +110,19 @@ class ManagedBrowser:
            host (str): Host for debugging the browser. Default: "localhost".
            debugging_port (int): Port for debugging the browser. Default: 9222.
            cdp_url (str or None): CDP URL to connect to the browser. Default: None.
+            browser_config (BrowserConfig): Configuration object containing all browser settings. Default: None.
        """
-        self.browser_type = browser_type
-        self.user_data_dir = user_data_dir
-        self.headless = headless
+        self.browser_type = browser_config.browser_type
+        self.user_data_dir = browser_config.user_data_dir
+        self.headless = browser_config.headless
        self.browser_process = None
        self.temp_dir = None
-        self.debugging_port = debugging_port
-        self.host = host
+        self.debugging_port = browser_config.debugging_port
+        self.host = browser_config.host
        self.logger = logger
        self.shutting_down = False
-        self.cdp_url = cdp_url
+        self.cdp_url = browser_config.cdp_url
+        self.browser_config = browser_config

    async def start(self) -> str:
        """
@@ -142,6 +145,9 @@ class ManagedBrowser:
        # Get browser path and args based on OS and browser type
        # browser_path = self._get_browser_path()
        args = await self._get_browser_args()
+        
+        if self.browser_config.extra_args:
+            args.extend(self.browser_config.extra_args)

        # Start browser process
        try:
@@ -477,6 +483,7 @@ class BrowserManager:
                logger=self.logger,
                debugging_port=self.config.debugging_port,
                cdp_url=self.config.cdp_url,
+                browser_config=self.config,
            )

    async def start(self):
@@ -491,10 +498,12 @@ class BrowserManager:

        Note: This method should be called in a separate task to avoid blocking the main event loop.
        """
-        if self.playwright is None:
-            from playwright.async_api import async_playwright
+        if self.playwright is not None:
+            await self.close()
+            
+        from playwright.async_api import async_playwright

-            self.playwright = await async_playwright().start()
+        self.playwright = await async_playwright().start()

        if self.config.cdp_url or self.config.use_managed_browser:
            self.config.use_managed_browser = True
--- a/crawl4ai/config.py
+++ b/crawl4ai/config.py
@@ -29,6 +29,14 @@ PROVIDER_MODELS = {
    'gemini/gemini-2.0-flash-lite-preview-02-05': os.getenv("GEMINI_API_KEY"),
    "deepseek/deepseek-chat": os.getenv("DEEPSEEK_API_KEY"),
 }
+PROVIDER_MODELS_PREFIXES = {
+    "ollama": "no-token-needed",  # Any model from Ollama no need for API token
+    "groq": os.getenv("GROQ_API_KEY"),
+    "openai": os.getenv("OPENAI_API_KEY"),
+    "anthropic": os.getenv("ANTHROPIC_API_KEY"),
+    "gemini": os.getenv("GEMINI_API_KEY"),
+    "deepseek": os.getenv("DEEPSEEK_API_KEY"),
+}

 # Chunk token threshold
 CHUNK_TOKEN_THRESHOLD = 2**11  # 2048 tokens
--- a/crawl4ai/extraction_strategy.py
+++ b/crawl4ai/extraction_strategy.py
@@ -7,7 +7,9 @@ import time

 from .prompts import PROMPT_EXTRACT_BLOCKS, PROMPT_EXTRACT_BLOCKS_WITH_INSTRUCTION, PROMPT_EXTRACT_SCHEMA_WITH_INSTRUCTION, JSON_SCHEMA_BUILDER_XPATH, PROMPT_EXTRACT_INFERRED_SCHEMA
 from .config import (
-    DEFAULT_PROVIDER, CHUNK_TOKEN_THRESHOLD,
+    DEFAULT_PROVIDER,
+    DEFAULT_PROVIDER_API_KEY,
+    CHUNK_TOKEN_THRESHOLD,
    OVERLAP_RATE,
    WORD_TOKEN_RATE,
 )
@@ -542,6 +544,11 @@ class LLMExtractionStrategy(ExtractionStrategy):
        """
        super().__init__( input_format=input_format, **kwargs)
        self.llm_config = llm_config
+        if not self.llm_config:
+            self.llm_config = create_llm_config(
+                provider=DEFAULT_PROVIDER,
+                api_token=os.environ.get(DEFAULT_PROVIDER_API_KEY),
+            )
        self.instruction = instruction
        self.extract_type = extraction_type
        self.schema = schema
--- a/crawl4ai/install.py
+++ b/crawl4ai/install.py
@@ -40,10 +40,25 @@ def setup_home_directory():
            f.write("")

 def post_install():
-    """Run all post-installation tasks"""
+    """
+    Run all post-installation tasks.
+    Checks CRAWL4AI_MODE environment variable. If set to 'api',
+    skips Playwright browser installation.
+    """
    logger.info("Running post-installation setup...", tag="INIT")
    setup_home_directory()
-    install_playwright()
+
+    # Check environment variable to conditionally skip Playwright install
+    run_mode = os.getenv('CRAWL4AI_MODE')
+    if run_mode == 'api':
+        logger.warning(
+            "CRAWL4AI_MODE=api detected. Skipping Playwright browser installation.",
+            tag="SETUP"
+        )
+    else:
+        # Proceed with installation only if mode is not 'api'
+        install_playwright()
+
    run_migration()
    # TODO: Will be added in the future
    # setup_builtin_browser()
--- a/crawl4ai/proxy_strategy.py
+++ b/crawl4ai/proxy_strategy.py
@@ -4,6 +4,9 @@ from itertools import cycle
 import os


+########### ATTENTION PEOPLE OF EARTH ###########
+# I have moved this config to async_configs.py, kept it here, in case someone still importing it, however
+# be a dear and follow `from crawl4ai import ProxyConfig` instead :)
 class ProxyConfig:
    def __init__(
        self,
@@ -119,12 +122,12 @@ class ProxyRotationStrategy(ABC):
    """Base abstract class for proxy rotation strategies"""
    
    @abstractmethod
-    async def get_next_proxy(self) -> Optional[Dict]:
+    async def get_next_proxy(self) -> Optional[ProxyConfig]:
        """Get next proxy configuration from the strategy"""
        pass

    @abstractmethod
-    def add_proxies(self, proxies: List[Dict]):
+    def add_proxies(self, proxies: List[ProxyConfig]):
        """Add proxy configurations to the strategy"""
        pass

--- a/crawl4ai/ssl_certificate.py
+++ b/crawl4ai/ssl_certificate.py
@@ -9,83 +9,44 @@ from urllib.parse import urlparse
 import OpenSSL.crypto
 from pathlib import Path

-
-class SSLCertificate:
+# === Inherit from dict ===
+class SSLCertificate(dict):
    """
-    A class representing an SSL certificate with methods to export in various formats.
+    A class representing an SSL certificate, behaving like a dictionary
+    for direct JSON serialization. It stores the certificate information internally
+    and provides methods for export and property access.

-    Attributes:
-        cert_info (Dict[str, Any]): The certificate information.
-
-        Methods:
-            from_url(url: str, timeout: int = 10) -> Optional['SSLCertificate']: Create SSLCertificate instance from a URL.
-            from_file(file_path: str) -> Optional['SSLCertificate']: Create SSLCertificate instance from a file.
-            from_binary(binary_data: bytes) -> Optional['SSLCertificate']: Create SSLCertificate instance from binary data.
-            export_as_pem() -> str: Export the certificate as PEM format.
-            export_as_der() -> bytes: Export the certificate as DER format.
-            export_as_json() -> Dict[str, Any]: Export the certificate as JSON format.
-            export_as_text() -> str: Export the certificate as text format.
+    Inherits from dict, so instances are directly JSON serializable.
    """

+    # Use __slots__ for potential memory optimization if desired, though less common when inheriting dict
+    # __slots__ = ("_cert_info",) # If using slots, be careful with dict inheritance interaction
+
    def __init__(self, cert_info: Dict[str, Any]):
-        self._cert_info = self._decode_cert_data(cert_info)
-
-    @staticmethod
-    def from_url(url: str, timeout: int = 10) -> Optional["SSLCertificate"]:
        """
-        Create SSLCertificate instance from a URL.
+        Initializes the SSLCertificate object.

        Args:
-            url (str): URL of the website.
-            timeout (int): Timeout for the connection (default: 10).
-
-        Returns:
-            Optional[SSLCertificate]: SSLCertificate instance if successful, None otherwise.
+            cert_info (Dict[str, Any]): The raw certificate dictionary.
        """
-        try:
-            hostname = urlparse(url).netloc
-            if ":" in hostname:
-                hostname = hostname.split(":")[0]
+        # 1. Decode the data (handle bytes -> str)
+        decoded_info = self._decode_cert_data(cert_info)

-            context = ssl.create_default_context()
-            with socket.create_connection((hostname, 443), timeout=timeout) as sock:
-                with context.wrap_socket(sock, server_hostname=hostname) as ssock:
-                    cert_binary = ssock.getpeercert(binary_form=True)
-                    x509 = OpenSSL.crypto.load_certificate(
-                        OpenSSL.crypto.FILETYPE_ASN1, cert_binary
-                    )
+        # 2. Store the decoded info internally (optional but good practice)
+        # self._cert_info = decoded_info # You can keep this if methods rely on it

-                    cert_info = {
-                        "subject": dict(x509.get_subject().get_components()),
-                        "issuer": dict(x509.get_issuer().get_components()),
-                        "version": x509.get_version(),
-                        "serial_number": hex(x509.get_serial_number()),
-                        "not_before": x509.get_notBefore(),
-                        "not_after": x509.get_notAfter(),
-                        "fingerprint": x509.digest("sha256").hex(),
-                        "signature_algorithm": x509.get_signature_algorithm(),
-                        "raw_cert": base64.b64encode(cert_binary),
-                    }
-
-                    # Add extensions
-                    extensions = []
-                    for i in range(x509.get_extension_count()):
-                        ext = x509.get_extension(i)
-                        extensions.append(
-                            {"name": ext.get_short_name(), "value": str(ext)}
-                        )
-                    cert_info["extensions"] = extensions
-
-                    return SSLCertificate(cert_info)
-
-        except Exception:
-            return None
+        # 3. Initialize the dictionary part of the object with the decoded data
+        super().__init__(decoded_info)

    @staticmethod
    def _decode_cert_data(data: Any) -> Any:
        """Helper method to decode bytes in certificate data."""
        if isinstance(data, bytes):
-            return data.decode("utf-8")
+            try:
+                # Try UTF-8 first, fallback to latin-1 for arbitrary bytes
+                return data.decode("utf-8")
+            except UnicodeDecodeError:
+                return data.decode("latin-1") # Or handle as needed, maybe hex representation
        elif isinstance(data, dict):
            return {
                (
@@ -97,36 +58,119 @@ class SSLCertificate:
            return [SSLCertificate._decode_cert_data(item) for item in data]
        return data

+    @staticmethod
+    def from_url(url: str, timeout: int = 10) -> Optional["SSLCertificate"]:
+        """
+        Create SSLCertificate instance from a URL. Fetches cert info and initializes.
+        (Fetching logic remains the same)
+        """
+        cert_info_raw = None # Variable to hold the fetched dict
+        try:
+            hostname = urlparse(url).netloc
+            if ":" in hostname:
+                hostname = hostname.split(":")[0]
+
+            context = ssl.create_default_context()
+            # Set check_hostname to False and verify_mode to CERT_NONE temporarily
+            # for potentially problematic certificates during fetch, but parse the result regardless.
+            # context.check_hostname = False
+            # context.verify_mode = ssl.CERT_NONE
+
+            with socket.create_connection((hostname, 443), timeout=timeout) as sock:
+                with context.wrap_socket(sock, server_hostname=hostname) as ssock:
+                    cert_binary = ssock.getpeercert(binary_form=True)
+                    if not cert_binary:
+                         print(f"Warning: No certificate returned for {hostname}")
+                         return None
+
+                    x509 = OpenSSL.crypto.load_certificate(
+                        OpenSSL.crypto.FILETYPE_ASN1, cert_binary
+                    )
+
+                    # Create the dictionary directly
+                    cert_info_raw = {
+                        "subject": dict(x509.get_subject().get_components()),
+                        "issuer": dict(x509.get_issuer().get_components()),
+                        "version": x509.get_version(),
+                        "serial_number": hex(x509.get_serial_number()),
+                        "not_before": x509.get_notBefore(), # Keep as bytes initially, _decode handles it
+                        "not_after": x509.get_notAfter(),   # Keep as bytes initially
+                        "fingerprint": x509.digest("sha256").hex(), # hex() is already string
+                        "signature_algorithm": x509.get_signature_algorithm(), # Keep as bytes
+                        "raw_cert": base64.b64encode(cert_binary), # Base64 is bytes, _decode handles it
+                    }
+
+                    # Add extensions
+                    extensions = []
+                    for i in range(x509.get_extension_count()):
+                        ext = x509.get_extension(i)
+                        # get_short_name() returns bytes, str(ext) handles value conversion
+                        extensions.append(
+                            {"name": ext.get_short_name(), "value": str(ext)}
+                        )
+                    cert_info_raw["extensions"] = extensions
+
+        except ssl.SSLCertVerificationError as e:
+             print(f"SSL Verification Error for {url}: {e}")
+             # Decide if you want to proceed or return None based on your needs
+             # You might try fetching without verification here if needed, but be cautious.
+             return None
+        except socket.gaierror:
+            print(f"Could not resolve hostname: {hostname}")
+            return None
+        except socket.timeout:
+            print(f"Connection timed out for {url}")
+            return None
+        except Exception as e:
+            print(f"Error fetching/processing certificate for {url}: {e}")
+            # Log the full error details if needed: logging.exception("Cert fetch error")
+            return None
+
+        # If successful, create the SSLCertificate instance from the dictionary
+        if cert_info_raw:
+             return SSLCertificate(cert_info_raw)
+        else:
+             return None
+
+
+    # --- Properties now access the dictionary items directly via self[] ---
+    @property
+    def issuer(self) -> Dict[str, str]:
+        return self.get("issuer", {}) # Use self.get for safety
+
+    @property
+    def subject(self) -> Dict[str, str]:
+        return self.get("subject", {})
+
+    @property
+    def valid_from(self) -> str:
+        return self.get("not_before", "")
+
+    @property
+    def valid_until(self) -> str:
+        return self.get("not_after", "")
+
+    @property
+    def fingerprint(self) -> str:
+        return self.get("fingerprint", "")
+
+    # --- Export methods can use `self` directly as it is the dict ---
    def to_json(self, filepath: Optional[str] = None) -> Optional[str]:
-        """
-        Export certificate as JSON.
-
-        Args:
-            filepath (Optional[str]): Path to save the JSON file (default: None).
-
-        Returns:
-            Optional[str]: JSON string if successful, None otherwise.
-        """
-        json_str = json.dumps(self._cert_info, indent=2, ensure_ascii=False)
+        """Export certificate as JSON."""
+        # `self` is already the dictionary we want to serialize
+        json_str = json.dumps(self, indent=2, ensure_ascii=False)
        if filepath:
            Path(filepath).write_text(json_str, encoding="utf-8")
            return None
        return json_str

    def to_pem(self, filepath: Optional[str] = None) -> Optional[str]:
-        """
-        Export certificate as PEM.
-
-        Args:
-            filepath (Optional[str]): Path to save the PEM file (default: None).
-
-        Returns:
-            Optional[str]: PEM string if successful, None otherwise.
-        """
+        """Export certificate as PEM."""
        try:
+            # Decode the raw_cert (which should be string due to _decode)
+            raw_cert_bytes = base64.b64decode(self.get("raw_cert", ""))
            x509 = OpenSSL.crypto.load_certificate(
-                OpenSSL.crypto.FILETYPE_ASN1,
-                base64.b64decode(self._cert_info["raw_cert"]),
+                OpenSSL.crypto.FILETYPE_ASN1, raw_cert_bytes
            )
            pem_data = OpenSSL.crypto.dump_certificate(
                OpenSSL.crypto.FILETYPE_PEM, x509
@@ -136,49 +180,25 @@ class SSLCertificate:
                Path(filepath).write_text(pem_data, encoding="utf-8")
                return None
            return pem_data
-        except Exception:
-            return None
+        except Exception as e:
+             print(f"Error converting to PEM: {e}")
+             return None

    def to_der(self, filepath: Optional[str] = None) -> Optional[bytes]:
-        """
-        Export certificate as DER.
-
-        Args:
-            filepath (Optional[str]): Path to save the DER file (default: None).
-
-        Returns:
-            Optional[bytes]: DER bytes if successful, None otherwise.
-        """
+        """Export certificate as DER."""
        try:
-            der_data = base64.b64decode(self._cert_info["raw_cert"])
+            # Decode the raw_cert (which should be string due to _decode)
+            der_data = base64.b64decode(self.get("raw_cert", ""))
            if filepath:
                Path(filepath).write_bytes(der_data)
                return None
            return der_data
-        except Exception:
-            return None
+        except Exception as e:
+             print(f"Error converting to DER: {e}")
+             return None

-    @property
-    def issuer(self) -> Dict[str, str]:
-        """Get certificate issuer information."""
-        return self._cert_info.get("issuer", {})
-
-    @property
-    def subject(self) -> Dict[str, str]:
-        """Get certificate subject information."""
-        return self._cert_info.get("subject", {})
-
-    @property
-    def valid_from(self) -> str:
-        """Get certificate validity start date."""
-        return self._cert_info.get("not_before", "")
-
-    @property
-    def valid_until(self) -> str:
-        """Get certificate validity end date."""
-        return self._cert_info.get("not_after", "")
-
-    @property
-    def fingerprint(self) -> str:
-        """Get certificate fingerprint."""
-        return self._cert_info.get("fingerprint", "")
+    # Optional: Add __repr__ for better debugging
+    def __repr__(self) -> str:
+        subject_cn = self.subject.get('CN', 'N/A')
+        issuer_cn = self.issuer.get('CN', 'N/A')
+        return f"<SSLCertificate Subject='{subject_cn}' Issuer='{issuer_cn}'>"
--- a/deploy/docker/README-new.md
+++ b/deploy/docker/README-new.md
@@ -0,0 +1,644 @@
+# Crawl4AI Docker Guide 🐳
+
+## Table of Contents
+- [Prerequisites](#prerequisites)
+- [Installation](#installation)
+  - [Option 1: Using Docker Compose (Recommended)](#option-1-using-docker-compose-recommended)
+  - [Option 2: Manual Local Build & Run](#option-2-manual-local-build--run)
+  - [Option 3: Using Pre-built Docker Hub Images](#option-3-using-pre-built-docker-hub-images)
+- [Dockerfile Parameters](#dockerfile-parameters)
+- [Using the API](#using-the-api)
+  - [Understanding Request Schema](#understanding-request-schema)
+  - [REST API Examples](#rest-api-examples)
+  - [Python SDK](#python-sdk)
+- [Metrics & Monitoring](#metrics--monitoring)
+- [Deployment Scenarios](#deployment-scenarios)
+- [Complete Examples](#complete-examples)
+- [Server Configuration](#server-configuration)
+  - [Understanding config.yml](#understanding-configyml)
+  - [JWT Authentication](#jwt-authentication)
+  - [Configuration Tips and Best Practices](#configuration-tips-and-best-practices)
+  - [Customizing Your Configuration](#customizing-your-configuration)
+  - [Configuration Recommendations](#configuration-recommendations)
+- [Getting Help](#getting-help)
+
+## Prerequisites
+
+Before we dive in, make sure you have:
+- Docker installed and running (version 20.10.0 or higher), including `docker compose` (usually bundled with Docker Desktop).
+- `git` for cloning the repository.
+- At least 4GB of RAM available for the container (more recommended for heavy use).
+- Python 3.10+ (if using the Python SDK).
+- Node.js 16+ (if using the Node.js examples).
+
+> 💡 **Pro tip**: Run `docker info` to check your Docker installation and available resources.
+
+## Installation
+
+We offer several ways to get the Crawl4AI server running. Docker Compose is the easiest way to manage local builds and runs.
+
+### Option 1: Using Docker Compose (Recommended)
+
+Docker Compose simplifies building and running the service, especially for local development and testing across different platforms.
+
+#### 1. Clone Repository
+
+```bash
+git clone https://github.com/unclecode/crawl4ai.git
+cd crawl4ai
+```
+
+#### 2. Environment Setup (API Keys)
+
+If you plan to use LLMs, copy the example environment file and add your API keys. This file should be in the **project root directory**.
+
+```bash
+# Make sure you are in the 'crawl4ai' root directory
+cp deploy/docker/.llm.env.example .llm.env
+
+# Now edit .llm.env and add your API keys
+# Example content:
+# OPENAI_API_KEY=sk-your-key
+# ANTHROPIC_API_KEY=your-anthropic-key
+# ...
+```
+> 🔑 **Note**: Keep your API keys secure! Never commit `.llm.env` to version control.
+
+#### 3. Build and Run with Compose
+
+The `docker-compose.yml` file in the project root defines services for different scenarios using **profiles**.
+
+*   **Build and Run Locally (AMD64):**
+    ```bash
+    # Builds the image locally using Dockerfile and runs it
+    docker compose --profile local-amd64 up --build -d
+    ```
+
+*   **Build and Run Locally (ARM64):**
+    ```bash
+    # Builds the image locally using Dockerfile and runs it
+    docker compose --profile local-arm64 up --build -d
+    ```
+
+*   **Run Pre-built Image from Docker Hub (AMD64):**
+    ```bash
+    # Pulls and runs the specified AMD64 image from Docker Hub
+    # (Set VERSION env var for specific tags, e.g., VERSION=0.5.1-d1)
+    docker compose --profile hub-amd64 up -d
+    ```
+
+*   **Run Pre-built Image from Docker Hub (ARM64):**
+    ```bash
+    # Pulls and runs the specified ARM64 image from Docker Hub
+    docker compose --profile hub-arm64 up -d
+    ```
+
+> The server will be available at `http://localhost:11235`.
+
+#### 4. Stopping Compose Services
+
+```bash
+# Stop the service(s) associated with a profile (e.g., local-amd64)
+docker compose --profile local-amd64 down
+```
+
+### Option 2: Manual Local Build & Run
+
+If you prefer not to use Docker Compose for local builds.
+
+#### 1. Clone Repository & Setup Environment
+
+Follow steps 1 and 2 from the Docker Compose section above (clone repo, `cd crawl4ai`, create `.llm.env` in the root).
+
+#### 2. Build the Image (Multi-Arch)
+
+Use `docker buildx` to build the image. This example builds for multiple platforms and loads the image matching your host architecture into the local Docker daemon.
+
+```bash
+# Make sure you are in the 'crawl4ai' root directory
+docker buildx build --platform linux/amd64,linux/arm64 -t crawl4ai-local:latest --load .
+```
+
+#### 3. Run the Container
+
+*   **Basic run (no LLM support):**
+    ```bash
+    # Replace --platform if your host is ARM64
+    docker run -d \
+      -p 11235:11235 \
+      --name crawl4ai-standalone \
+      --shm-size=1g \
+      --platform linux/amd64 \
+      crawl4ai-local:latest
+    ```
+
+*   **With LLM support:**
+    ```bash
+    # Make sure .llm.env is in the current directory (project root)
+    # Replace --platform if your host is ARM64
+    docker run -d \
+      -p 11235:11235 \
+      --name crawl4ai-standalone \
+      --env-file .llm.env \
+      --shm-size=1g \
+      --platform linux/amd64 \
+      crawl4ai-local:latest
+    ```
+
+> The server will be available at `http://localhost:11235`.
+
+#### 4. Stopping the Manual Container
+
+```bash
+docker stop crawl4ai-standalone && docker rm crawl4ai-standalone
+```
+
+### Option 3: Using Pre-built Docker Hub Images
+
+Pull and run images directly from Docker Hub without building locally.
+
+#### 1. Pull the Image
+
+We use a versioning scheme like `LIBRARY_VERSION-dREVISION` (e.g., `0.5.1-d1`). The `latest` tag points to the most recent stable release. Images are built with multi-arch manifests, so Docker usually pulls the correct version for your system automatically.
+
+```bash
+# Pull a specific version (recommended for stability)
+docker pull unclecode/crawl4ai:0.5.1-d1
+
+# Or pull the latest stable version
+docker pull unclecode/crawl4ai:latest
+```
+
+#### 2. Setup Environment (API Keys)
+
+If using LLMs, create the `.llm.env` file in a directory of your choice, similar to Step 2 in the Compose section.
+
+#### 3. Run the Container
+
+*   **Basic run:**
+    ```bash
+    docker run -d \
+      -p 11235:11235 \
+      --name crawl4ai-hub \
+      --shm-size=1g \
+      unclecode/crawl4ai:0.5.1-d1 # Or use :latest
+    ```
+
+*   **With LLM support:**
+    ```bash
+    # Make sure .llm.env is in the current directory you are running docker from
+    docker run -d \
+      -p 11235:11235 \
+      --name crawl4ai-hub \
+      --env-file .llm.env \
+      --shm-size=1g \
+      unclecode/crawl4ai:0.5.1-d1 # Or use :latest
+    ```
+
+> The server will be available at `http://localhost:11235`.
+
+#### 4. Stopping the Hub Container
+
+```bash
+docker stop crawl4ai-hub && docker rm crawl4ai-hub
+```
+
+#### Docker Hub Versioning Explained
+
+*   **Image Name:** `unclecode/crawl4ai`
+*   **Tag Format:** `LIBRARY_VERSION-dREVISION`
+    *   `LIBRARY_VERSION`: The Semantic Version of the core `crawl4ai` Python library included (e.g., `0.5.1`).
+    *   `dREVISION`: An incrementing number (starting at `d1`) for Docker build changes made *without* changing the library version (e.g., base image updates, dependency fixes). Resets to `d1` for each new `LIBRARY_VERSION`.
+*   **Example:** `unclecode/crawl4ai:0.5.1-d1`
+*   **`latest` Tag:** Points to the most recent stable `LIBRARY_VERSION-dREVISION`.
+*   **Multi-Arch:** Images support `linux/amd64` and `linux/arm64`. Docker automatically selects the correct architecture.
+
+---
+
+*(Rest of the document remains largely the same, but with key updates below)*
+
+---
+
+## Dockerfile Parameters
+
+You can customize the image build process using build arguments (`--build-arg`). These are typically used via `docker buildx build` or within the `docker-compose.yml` file.
+
+```bash
+# Example: Build with 'all' features using buildx
+docker buildx build \
+  --platform linux/amd64,linux/arm64 \
+  --build-arg INSTALL_TYPE=all \
+  -t yourname/crawl4ai-all:latest \
+  --load \
+  . # Build from root context
+```
+
+### Build Arguments Explained
+
+| Argument     | Description                              | Default   | Options                            |
+| :----------- | :--------------------------------------- | :-------- | :--------------------------------- |
+| INSTALL_TYPE | Feature set                              | `default` | `default`, `all`, `torch`, `transformer` |
+| ENABLE_GPU   | GPU support (CUDA for AMD64)           | `false`   | `true`, `false`                    |
+| APP_HOME     | Install path inside container (advanced) | `/app`    | any valid path                   |
+| USE_LOCAL    | Install library from local source        | `true`    | `true`, `false`                    |
+| GITHUB_REPO  | Git repo to clone if USE_LOCAL=false   | *(see Dockerfile)* | any git URL                  |
+| GITHUB_BRANCH| Git branch to clone if USE_LOCAL=false   | `main`    | any branch name                  |
+
+*(Note: PYTHON_VERSION is fixed by the `FROM` instruction in the Dockerfile)*
+
+### Build Best Practices
+
+1.  **Choose the Right Install Type**
+    *   `default`: Basic installation, smallest image size. Suitable for most standard web scraping and markdown generation.
+    *   `all`: Full features including `torch` and `transformers` for advanced extraction strategies (e.g., CosineStrategy, certain LLM filters). Significantly larger image. Ensure you need these extras.
+2.  **Platform Considerations**
+    *   Use `buildx` for building multi-architecture images, especially for pushing to registries.
+    *   Use `docker compose` profiles (`local-amd64`, `local-arm64`) for easy platform-specific local builds.
+3.  **Performance Optimization**
+    *   The image automatically includes platform-specific optimizations (OpenMP for AMD64, OpenBLAS for ARM64).
+
+---
+
+## Using the API
+
+Communicate with the running Docker server via its REST API (defaulting to `http://localhost:11235`). You can use the Python SDK or make direct HTTP requests.
+
+### Python SDK
+
+Install the SDK: `pip install crawl4ai`
+
+```python
+import asyncio
+from crawl4ai.docker_client import Crawl4aiDockerClient
+from crawl4ai import BrowserConfig, CrawlerRunConfig, CacheMode # Assuming you have crawl4ai installed
+
+async def main():
+    # Point to the correct server port
+    async with Crawl4aiDockerClient(base_url="http://localhost:11235", verbose=True) as client:
+        # If JWT is enabled on the server, authenticate first:
+        # await client.authenticate("user@example.com") # See Server Configuration section
+
+        # Example Non-streaming crawl
+        print("--- Running Non-Streaming Crawl ---")
+        results = await client.crawl(
+            ["https://httpbin.org/html"],
+            browser_config=BrowserConfig(headless=True), # Use library classes for config aid
+            crawler_config=CrawlerRunConfig(cache_mode=CacheMode.BYPASS)
+        )
+        if results: # client.crawl returns None on failure
+          print(f"Non-streaming results success: {results.success}")
+          if results.success:
+              for result in results: # Iterate through the CrawlResultContainer
+                  print(f"URL: {result.url}, Success: {result.success}")
+        else:
+            print("Non-streaming crawl failed.")
+
+
+        # Example Streaming crawl
+        print("\n--- Running Streaming Crawl ---")
+        stream_config = CrawlerRunConfig(stream=True, cache_mode=CacheMode.BYPASS)
+        try:
+            async for result in await client.crawl( # client.crawl returns an async generator for streaming
+                ["https://httpbin.org/html", "https://httpbin.org/links/5/0"],
+                browser_config=BrowserConfig(headless=True),
+                crawler_config=stream_config
+            ):
+                print(f"Streamed result: URL: {result.url}, Success: {result.success}")
+        except Exception as e:
+            print(f"Streaming crawl failed: {e}")
+
+
+        # Example Get schema
+        print("\n--- Getting Schema ---")
+        schema = await client.get_schema()
+        print(f"Schema received: {bool(schema)}") # Print whether schema was received
+
+if __name__ == "__main__":
+    asyncio.run(main())
+```
+
+*(SDK parameters like timeout, verify_ssl etc. remain the same)*
+
+### Second Approach: Direct API Calls
+
+Crucially, when sending configurations directly via JSON, they **must** follow the `{"type": "ClassName", "params": {...}}` structure for any non-primitive value (like config objects or strategies). Dictionaries must be wrapped as `{"type": "dict", "value": {...}}`.
+
+*(Keep the detailed explanation of Configuration Structure, Basic Pattern, Simple vs Complex, Strategy Pattern, Complex Nested Example, Quick Grammar Overview, Important Rules, Pro Tip)*
+
+#### More Examples *(Ensure Schema example uses type/value wrapper)*
+
+**Advanced Crawler Configuration**
+*(Keep example, ensure cache_mode uses valid enum value like "bypass")*
+
+**Extraction Strategy**
+```json
+{
+    "crawler_config": {
+        "type": "CrawlerRunConfig",
+        "params": {
+            "extraction_strategy": {
+                "type": "JsonCssExtractionStrategy",
+                "params": {
+                    "schema": {
+                        "type": "dict",
+                        "value": {
+                           "baseSelector": "article.post",
+                           "fields": [
+                               {"name": "title", "selector": "h1", "type": "text"},
+                               {"name": "content", "selector": ".content", "type": "html"}
+                           ]
+                         }
+                    }
+                }
+            }
+        }
+    }
+}
+```
+
+**LLM Extraction Strategy** *(Keep example, ensure schema uses type/value wrapper)*
+*(Keep Deep Crawler Example)*
+
+### REST API Examples
+
+Update URLs to use port `11235`.
+
+#### Simple Crawl
+
+```python
+import requests
+
+# Configuration objects converted to the required JSON structure
+browser_config_payload = {
+    "type": "BrowserConfig",
+    "params": {"headless": True}
+}
+crawler_config_payload = {
+    "type": "CrawlerRunConfig",
+    "params": {"stream": False, "cache_mode": "bypass"} # Use string value of enum
+}
+
+crawl_payload = {
+    "urls": ["https://httpbin.org/html"],
+    "browser_config": browser_config_payload,
+    "crawler_config": crawler_config_payload
+}
+response = requests.post(
+    "http://localhost:11235/crawl", # Updated port
+    # headers={"Authorization": f"Bearer {token}"},  # If JWT is enabled
+    json=crawl_payload
+)
+print(f"Status Code: {response.status_code}")
+if response.ok:
+    print(response.json())
+else:
+    print(f"Error: {response.text}")
+
+```
+
+#### Streaming Results
+
+```python
+import json
+import httpx # Use httpx for async streaming example
+
+async def test_stream_crawl(token: str = None): # Made token optional
+    """Test the /crawl/stream endpoint with multiple URLs."""
+    url = "http://localhost:11235/crawl/stream" # Updated port
+    payload = {
+        "urls": [
+            "https://httpbin.org/html",
+            "https://httpbin.org/links/5/0",
+        ],
+        "browser_config": {
+            "type": "BrowserConfig",
+            "params": {"headless": True, "viewport": {"type": "dict", "value": {"width": 1200, "height": 800}}} # Viewport needs type:dict
+        },
+        "crawler_config": {
+            "type": "CrawlerRunConfig",
+            "params": {"stream": True, "cache_mode": "bypass"}
+        }
+    }
+
+    headers = {}
+    # if token:
+    #    headers = {"Authorization": f"Bearer {token}"} # If JWT is enabled
+
+    try:
+        async with httpx.AsyncClient() as client:
+            async with client.stream("POST", url, json=payload, headers=headers, timeout=120.0) as response:
+                print(f"Status: {response.status_code} (Expected: 200)")
+                response.raise_for_status() # Raise exception for bad status codes
+
+                # Read streaming response line-by-line (NDJSON)
+                async for line in response.aiter_lines():
+                    if line:
+                        try:
+                            data = json.loads(line)
+                            # Check for completion marker
+                            if data.get("status") == "completed":
+                                print("Stream completed.")
+                                break
+                            print(f"Streamed Result: {json.dumps(data, indent=2)}")
+                        except json.JSONDecodeError:
+                            print(f"Warning: Could not decode JSON line: {line}")
+
+    except httpx.HTTPStatusError as e:
+         print(f"HTTP error occurred: {e.response.status_code} - {e.response.text}")
+    except Exception as e:
+        print(f"Error in streaming crawl test: {str(e)}")
+
+# To run this example:
+# import asyncio
+# asyncio.run(test_stream_crawl())
+```
+
+---
+
+## Metrics & Monitoring
+
+Keep an eye on your crawler with these endpoints:
+
+- `/health` - Quick health check
+- `/metrics` - Detailed Prometheus metrics
+- `/schema` - Full API schema
+
+Example health check:
+```bash
+curl http://localhost:11235/health
+```
+
+---
+
+*(Deployment Scenarios and Complete Examples sections remain the same, maybe update links if examples moved)*
+
+---
+
+## Server Configuration
+
+The server's behavior can be customized through the `config.yml` file.
+
+### Understanding config.yml
+
+The configuration file is loaded from `/app/config.yml` inside the container. By default, the file from `deploy/docker/config.yml` in the repository is copied there during the build.
+
+Here's a detailed breakdown of the configuration options (using defaults from `deploy/docker/config.yml`):
+
+```yaml
+# Application Configuration
+app:
+  title: "Crawl4AI API"
+  version: "1.0.0" # Consider setting this to match library version, e.g., "0.5.1"
+  host: "0.0.0.0"
+  port: 8020 # NOTE: This port is used ONLY when running server.py directly. Gunicorn overrides this (see supervisord.conf).
+  reload: False # Default set to False - suitable for production
+  timeout_keep_alive: 300
+
+# Default LLM Configuration
+llm:
+  provider: "openai/gpt-4o-mini"
+  api_key_env: "OPENAI_API_KEY"
+  # api_key: sk-...  # If you pass the API key directly then api_key_env will be ignored
+
+# Redis Configuration (Used by internal Redis server managed by supervisord)
+redis:
+  host: "localhost"
+  port: 6379
+  db: 0
+  password: ""
+  # ... other redis options ...
+
+# Rate Limiting Configuration
+rate_limiting:
+  enabled: True
+  default_limit: "1000/minute"
+  trusted_proxies: []
+  storage_uri: "memory://"  # Use "redis://localhost:6379" if you need persistent/shared limits
+
+# Security Configuration
+security:
+  enabled: false # Master toggle for security features
+  jwt_enabled: false # Enable JWT authentication (requires security.enabled=true)
+  https_redirect: false # Force HTTPS (requires security.enabled=true)
+  trusted_hosts: ["*"] # Allowed hosts (use specific domains in production)
+  headers: # Security headers (applied if security.enabled=true)
+    x_content_type_options: "nosniff"
+    x_frame_options: "DENY"
+    content_security_policy: "default-src 'self'"
+    strict_transport_security: "max-age=63072000; includeSubDomains"
+
+# Crawler Configuration
+crawler:
+  memory_threshold_percent: 95.0
+  rate_limiter:
+    base_delay: [1.0, 2.0] # Min/max delay between requests in seconds for dispatcher
+  timeouts:
+    stream_init: 30.0  # Timeout for stream initialization
+    batch_process: 300.0 # Timeout for non-streaming /crawl processing
+
+# Logging Configuration
+logging:
+  level: "INFO"
+  format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+
+# Observability Configuration
+observability:
+  prometheus:
+    enabled: True
+    endpoint: "/metrics"
+  health_check:
+    endpoint: "/health"
+```
+
+*(JWT Authentication section remains the same, just note the default port is now 11235 for requests)*
+
+*(Configuration Tips and Best Practices remain the same)*
+
+### Customizing Your Configuration
+
+You can override the default `config.yml`.
+
+#### Method 1: Modify Before Build
+
+1.  Edit the `deploy/docker/config.yml` file in your local repository clone.
+2.  Build the image using `docker buildx` or `docker compose --profile local-... up --build`. The modified file will be copied into the image.
+
+#### Method 2: Runtime Mount (Recommended for Custom Deploys)
+
+1.  Create your custom configuration file, e.g., `my-custom-config.yml` locally. Ensure it contains all necessary sections.
+2.  Mount it when running the container:
+
+    *   **Using `docker run`:**
+        ```bash
+        # Assumes my-custom-config.yml is in the current directory
+        docker run -d -p 11235:11235 \
+          --name crawl4ai-custom-config \
+          --env-file .llm.env \
+          --shm-size=1g \
+          -v $(pwd)/my-custom-config.yml:/app/config.yml \
+          unclecode/crawl4ai:latest # Or your specific tag
+        ```
+
+    *   **Using `docker-compose.yml`:** Add a `volumes` section to the service definition:
+        ```yaml
+        services:
+          crawl4ai-hub-amd64: # Or your chosen service
+            image: unclecode/crawl4ai:latest
+            profiles: ["hub-amd64"]
+            <<: *base-config
+            volumes:
+              # Mount local custom config over the default one in the container
+              - ./my-custom-config.yml:/app/config.yml
+              # Keep the shared memory volume from base-config
+              - /dev/shm:/dev/shm
+        ```
+        *(Note: Ensure `my-custom-config.yml` is in the same directory as `docker-compose.yml`)*
+
+> 💡 When mounting, your custom file *completely replaces* the default one. Ensure it's a valid and complete configuration.
+
+### Configuration Recommendations
+
+1. **Security First** 🔒
+   - Always enable security in production
+   - Use specific trusted_hosts instead of wildcards
+   - Set up proper rate limiting to protect your server
+   - Consider your environment before enabling HTTPS redirect
+
+2. **Resource Management** 💻
+   - Adjust memory_threshold_percent based on available RAM
+   - Set timeouts according to your content size and network conditions
+   - Use Redis for rate limiting in multi-container setups
+
+3. **Monitoring** 📊
+   - Enable Prometheus if you need metrics
+   - Set DEBUG logging in development, INFO in production
+   - Regular health check monitoring is crucial
+
+4. **Performance Tuning** ⚡
+   - Start with conservative rate limiter delays
+   - Increase batch_process timeout for large content
+   - Adjust stream_init timeout based on initial response times
+
+## Getting Help
+
+We're here to help you succeed with Crawl4AI! Here's how to get support:
+
+- 📖 Check our [full documentation](https://docs.crawl4ai.com)
+- 🐛 Found a bug? [Open an issue](https://github.com/unclecode/crawl4ai/issues)
+- 💬 Join our [Discord community](https://discord.gg/crawl4ai)
+- ⭐ Star us on GitHub to show support!
+
+## Summary
+
+In this guide, we've covered everything you need to get started with Crawl4AI's Docker deployment:
+- Building and running the Docker container
+- Configuring the environment
+- Making API requests with proper typing
+- Using the Python SDK
+- Monitoring your deployment
+
+Remember, the examples in the `examples` folder are your friends - they show real-world usage patterns that you can adapt for your needs.
+
+Keep exploring, and don't hesitate to reach out if you need help! We're building something amazing together. 🚀
+
+Happy crawling! 🕷️
--- a/deploy/docker/api.py
+++ b/deploy/docker/api.py
@@ -391,21 +391,25 @@ async def handle_crawl_request(
            )
        )

-        async with AsyncWebCrawler(config=browser_config) as crawler:
-            results = []
-            func = getattr(crawler, "arun" if len(urls) == 1 else "arun_many")
-            partial_func = partial(func, 
-                                   urls[0] if len(urls) == 1 else urls, 
-                                   config=crawler_config, 
-                                   dispatcher=dispatcher)
-            results = await partial_func()
-            return {
-                "success": True,
-                "results": [result.model_dump() for result in results]
-            }
+        crawler: AsyncWebCrawler = AsyncWebCrawler(config=browser_config)
+        await crawler.start()
+        results = []
+        func = getattr(crawler, "arun" if len(urls) == 1 else "arun_many")
+        partial_func = partial(func, 
+                                urls[0] if len(urls) == 1 else urls, 
+                                config=crawler_config, 
+                                dispatcher=dispatcher)
+        results = await partial_func()
+        await crawler.close()
+        return {
+            "success": True,
+            "results": [result.model_dump() for result in results]
+        }

    except Exception as e:
        logger.error(f"Crawl error: {str(e)}", exc_info=True)
+        if 'crawler' in locals():
+            await crawler.close()
        raise HTTPException(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail=str(e)
--- a/deploy/docker/config.yml
+++ b/deploy/docker/config.yml
@@ -4,7 +4,7 @@ app:
  version: "1.0.0"
  host: "0.0.0.0"
  port: 8020
-  reload: True
+  reload: False
  timeout_keep_alive: 300

 # Default LLM Configuration
--- a/deploy/docker/requirements.txt
+++ b/deploy/docker/requirements.txt
@@ -1,4 +1,3 @@
-crawl4ai
 fastapi
 uvicorn
 gunicorn>=23.0.0
--- a/deploy/docker/supervisord.conf
+++ b/deploy/docker/supervisord.conf
@@ -1,12 +1,28 @@
 [supervisord]
-nodaemon=true
+nodaemon=true                   ; Run supervisord in the foreground
+logfile=/dev/null               ; Log supervisord output to stdout/stderr
+logfile_maxbytes=0

 [program:redis]
-command=redis-server
+command=/usr/bin/redis-server --loglevel notice ; Path to redis-server on Alpine
+user=appuser                    ; Run redis as our non-root user
 autorestart=true
 priority=10
+stdout_logfile=/dev/stdout      ; Redirect redis stdout to container stdout
+stdout_logfile_maxbytes=0
+stderr_logfile=/dev/stderr      ; Redirect redis stderr to container stderr
+stderr_logfile_maxbytes=0

 [program:gunicorn]
-command=gunicorn --bind 0.0.0.0:8000 --workers 4 --threads 2 --timeout 300 --graceful-timeout 60 --keep-alive 65 --log-level debug --worker-class uvicorn.workers.UvicornWorker --max-requests 1000 --max-requests-jitter 50 server:app
+command=/usr/local/bin/gunicorn --bind 0.0.0.0:11235 --workers 2 --threads 2 --timeout 120 --graceful-timeout 30 --keep-alive 60 --log-level info --worker-class uvicorn.workers.UvicornWorker server:app
+directory=/app                  ; Working directory for the app
+user=appuser                    ; Run gunicorn as our non-root user
 autorestart=true
-priority=20
+priority=20
+environment=PYTHONUNBUFFERED=1  ; Ensure Python output is sent straight to logs
+stdout_logfile=/dev/stdout      ; Redirect gunicorn stdout to container stdout
+stdout_logfile_maxbytes=0
+stderr_logfile=/dev/stderr      ; Redirect gunicorn stderr to container stderr
+stderr_logfile_maxbytes=0
+
+# Optional: Add filebeat or other logging agents here if needed
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,15 +1,30 @@
-# Base configuration (not a service, just a reusable config block)
+# docker-compose.yml
+
+# Base configuration anchor for reusability
 x-base-config: &base-config
  ports:
+    # Map host port 11235 to container port 11235 (where Gunicorn will listen)
    - "11235:11235"
-    - "8000:8000"
-    - "9222:9222"
-    - "8080:8080"
+    # - "8080:8080" # Uncomment if needed
+
+  # Load API keys primarily from .llm.env file
+  # Create .llm.env in the root directory .llm.env.example
+  env_file:
+    - .llm.env
+
+  # Define environment variables, allowing overrides from host environment
+  # Syntax ${VAR:-} uses host env var 'VAR' if set, otherwise uses value from .llm.env
  environment:
-    - CRAWL4AI_API_TOKEN=${CRAWL4AI_API_TOKEN:-}
    - OPENAI_API_KEY=${OPENAI_API_KEY:-}
-    - CLAUDE_API_KEY=${CLAUDE_API_KEY:-}
+    - DEEPSEEK_API_KEY=${DEEPSEEK_API_KEY:-}
+    - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
+    - GROQ_API_KEY=${GROQ_API_KEY:-}
+    - TOGETHER_API_KEY=${TOGETHER_API_KEY:-}
+    - MISTRAL_API_KEY=${MISTRAL_API_KEY:-}
+    - GEMINI_API_TOKEN=${GEMINI_API_TOKEN:-}
+
  volumes:
+    # Mount /dev/shm for Chromium/Playwright performance
    - /dev/shm:/dev/shm
  deploy:
    resources:
@@ -19,47 +34,47 @@ x-base-config: &base-config
        memory: 1G
  restart: unless-stopped
  healthcheck:
+    # IMPORTANT: Ensure Gunicorn binds to 11235 in supervisord.conf
    test: ["CMD", "curl", "-f", "http://localhost:11235/health"]
    interval: 30s
    timeout: 10s
    retries: 3
-    start_period: 40s
+    start_period: 40s # Give the server time to start
+  # Run the container as the non-root user defined in the Dockerfile
+  user: "appuser"

 services:
-  # Local build services for different platforms
-  crawl4ai-amd64:
+  # --- Local Build Services ---
+  crawl4ai-local-amd64:
    build:
-      context: .
-      dockerfile: Dockerfile
+      context: . # Build context is the root directory
+      dockerfile: Dockerfile # Dockerfile is in the root directory
      args:
-        PYTHON_VERSION: "3.10"
-        INSTALL_TYPE: ${INSTALL_TYPE:-basic}
-        ENABLE_GPU: false
-      platforms:
-        - linux/amd64
+        INSTALL_TYPE: ${INSTALL_TYPE:-default}
+        ENABLE_GPU: ${ENABLE_GPU:-false}
+        # PYTHON_VERSION arg is omitted as it's fixed by 'FROM python:3.10-slim' in Dockerfile
+    platform: linux/amd64
    profiles: ["local-amd64"]
-    <<: *base-config  # extends yerine doğrudan yapılandırmayı dahil ettik
+    <<: *base-config # Inherit base configuration

-  crawl4ai-arm64:
+  crawl4ai-local-arm64:
    build:
-      context: .
-      dockerfile: Dockerfile
+      context: . # Build context is the root directory
+      dockerfile: Dockerfile # Dockerfile is in the root directory
      args:
-        PYTHON_VERSION: "3.10"
-        INSTALL_TYPE: ${INSTALL_TYPE:-basic}
-        ENABLE_GPU: false
-      platforms:
-        - linux/arm64
+        INSTALL_TYPE: ${INSTALL_TYPE:-default}
+        ENABLE_GPU: ${ENABLE_GPU:-false}
+    platform: linux/arm64
    profiles: ["local-arm64"]
    <<: *base-config

-  # Hub services for different platforms and versions
+  # --- Docker Hub Image Services ---
  crawl4ai-hub-amd64:
-    image: unclecode/crawl4ai:${VERSION:-basic}-amd64
+    image: unclecode/crawl4ai:${VERSION:-latest}-amd64
    profiles: ["hub-amd64"]
    <<: *base-config

  crawl4ai-hub-arm64:
-    image: unclecode/crawl4ai:${VERSION:-basic}-arm64
+    image: unclecode/crawl4ai:${VERSION:-latest}-arm64
    profiles: ["hub-arm64"]
    <<: *base-config
--- a/docs/examples/network_console_capture_example.py
+++ b/docs/examples/network_console_capture_example.py
@@ -357,8 +357,7 @@ async def demo_performance_analysis():
    async with AsyncWebCrawler() as crawler:
        config = CrawlerRunConfig(
            capture_network_requests=True,
-            wait_until="networkidle",
-            page_timeout=60000  # 60 seconds
+            page_timeout=60 * 2 * 1000  # 120 seconds
        )
        
        result = await crawler.arun(
@@ -406,6 +405,13 @@ async def demo_performance_analysis():
                            "url": url,
                            "duration_ms": duration
                        })
+                    if isinstance(timing, dict) and "requestStart" in timing and "responseStart" in timing and "startTime" in timing:
+                        # Convert to milliseconds
+                        duration = (timing["responseStart"] - timing["requestStart"]) * 1000
+                        resource_timings[resource_type].append({
+                            "url": url,
+                            "duration_ms": duration
+                        })
                
                # Calculate statistics for each resource type
                print("\nPerformance by resource type:")
@@ -455,14 +461,14 @@ async def main():
    os.makedirs(os.path.join(__cur_dir__, "tmp"), exist_ok=True)
    
    # Run basic examples
-    await demo_basic_network_capture()
+    # await demo_basic_network_capture()
    await demo_basic_console_capture()
-    await demo_combined_capture()
+    # await demo_combined_capture()
    
    # Run advanced examples
-    await analyze_spa_network_traffic()
-    await demo_security_analysis()
-    await demo_performance_analysis()
+    # await analyze_spa_network_traffic()
+    # await demo_security_analysis()
+    # await demo_performance_analysis()
    
    print("\n=== Examples Complete ===")
    print(f"Check the tmp directory for output files: {os.path.join(__cur_dir__, 'tmp')}")
--- a/docs/examples/quickstart_examples_set_1.py
+++ b/docs/examples/quickstart_examples_set_1.py
@@ -4,7 +4,7 @@ import json
 import base64
 from pathlib import Path
 from typing import List
-from crawl4ai.proxy_strategy import ProxyConfig
+from crawl4ai import ProxyConfig

 from crawl4ai import AsyncWebCrawler, CrawlerRunConfig, CacheMode, CrawlResult
 from crawl4ai import RoundRobinProxyStrategy
--- a/docs/examples/tutorial_v0.5.py
+++ b/docs/examples/tutorial_v0.5.py
@@ -13,7 +13,7 @@ from crawl4ai.deep_crawling import (
 )
 from crawl4ai.deep_crawling.scorers import KeywordRelevanceScorer
 from crawl4ai.async_crawler_strategy import AsyncHTTPCrawlerStrategy
-from crawl4ai.proxy_strategy import ProxyConfig
+from crawl4ai import ProxyConfig
 from crawl4ai import RoundRobinProxyStrategy
 from crawl4ai.content_filter_strategy import LLMContentFilter
 from crawl4ai import DefaultMarkdownGenerator
--- a/docs/md_v2/ask_ai/ask-ai.css
+++ b/docs/md_v2/ask_ai/ask-ai.css
@@ -0,0 +1,444 @@
+/* ==== File: docs/ask_ai/ask_ai.css ==== */
+
+/* --- Basic Reset & Font --- */
+body {
+    /* Attempt to inherit variables from parent window (iframe context) */
+    /* Fallback values if variables are not inherited */
+    --fallback-bg: #070708;
+    --fallback-font: #e8e9ed;
+    --fallback-secondary: #a3abba;
+    --fallback-primary: #50ffff;
+    --fallback-primary-dimmed: #09b5a5;
+    --fallback-border: #1d1d20;
+    --fallback-code-bg: #1e1e1e;
+    --fallback-invert-font: #222225;
+    --font-stack: dm, Monaco, Courier New, monospace, serif;
+
+    font-family: var(--font-stack, "Courier New", monospace); /* Use theme font stack */
+    background-color: var(--background-color, var(--fallback-bg));
+    color: var(--font-color, var(--fallback-font));
+    margin: 0;
+    padding: 0;
+    font-size: 14px; /* Match global font size */
+    line-height: 1.5em; /* Match global line height */
+    height: 100vh; /* Ensure body takes full height */
+    overflow: hidden; /* Prevent body scrollbars, panels handle scroll */
+    display: flex; /* Use flex for the main container */
+}
+
+a {
+    color: var(--secondary-color, var(--fallback-secondary));
+    text-decoration: none;
+    transition: color 0.2s;
+}
+a:hover {
+    color: var(--primary-color, var(--fallback-primary));
+}
+
+/* --- Main Container Layout --- */
+.ai-assistant-container {
+    display: flex;
+    width: 100%;
+    height: 100%;
+    background-color: var(--background-color, var(--fallback-bg));
+}
+
+/* --- Sidebar Styling --- */
+.sidebar {
+    flex-shrink: 0; /* Prevent sidebars from shrinking */
+    height: 100%;
+    display: flex;
+    flex-direction: column;
+    /* background-color: var(--code-bg-color, var(--fallback-code-bg)); */
+    overflow-y: hidden; /* Header fixed, list scrolls */
+}
+
+.left-sidebar {
+    flex-basis: 240px; /* Width of history panel */
+    border-right: 1px solid var(--progress-bar-background, var(--fallback-border));
+}
+
+.right-sidebar {
+    flex-basis: 280px; /* Width of citations panel */
+    border-left: 1px solid var(--progress-bar-background, var(--fallback-border));
+}
+
+.sidebar header {
+    padding: 0.6em 1em;
+    border-bottom: 1px solid var(--progress-bar-background, var(--fallback-border));
+    flex-shrink: 0;
+    display: flex;
+    justify-content: space-between;
+    align-items: center;
+}
+
+.sidebar header h3 {
+    margin: 0;
+    font-size: 1.1em;
+    color: var(--font-color, var(--fallback-font));
+}
+
+.sidebar ul {
+    list-style: none;
+    padding: 0;
+    margin: 0;
+    overflow-y: auto; /* Enable scrolling for the list */
+    flex-grow: 1; /* Allow list to take remaining space */
+    padding: 0.5em 0;
+}
+
+.sidebar ul li {
+    padding: 0.3em 1em;
+}
+.sidebar ul li.no-citations,
+.sidebar ul li.no-history {
+    color: var(--secondary-color, var(--fallback-secondary));
+    font-style: italic;
+    font-size: 0.9em;
+    padding-left: 1em;
+}
+
+.sidebar ul li a {
+    color: var(--secondary-color, var(--fallback-secondary));
+    text-decoration: none;
+    display: block;
+    padding: 0.2em 0.5em;
+    border-radius: 3px;
+    transition: background-color 0.2s, color 0.2s;
+}
+
+.sidebar ul li a:hover {
+    color: var(--primary-color, var(--fallback-primary));
+    background-color: rgba(80, 255, 255, 0.08); /* Use primary color with alpha */
+}
+/* Style for active history item */
+#history-list li.active a {
+    color: var(--primary-dimmed-color, var(--fallback-primary-dimmed));
+    font-weight: bold;
+    background-color: rgba(80, 255, 255, 0.12);
+}
+
+/* --- Chat Panel Styling --- */
+#chat-panel {
+    flex-grow: 1; /* Take remaining space */
+    display: flex;
+    flex-direction: column;
+    height: 100%;
+    overflow: hidden; /* Prevent overflow, internal elements handle scroll */
+}
+
+#chat-messages {
+    flex-grow: 1;
+    overflow-y: auto; /* Scrollable chat history */
+    padding: 1em 1.5em;
+    border-bottom: 1px solid var(--progress-bar-background, var(--fallback-border));
+}
+
+.message {
+    margin-bottom: 1em;
+    padding: 0.8em 1.2em;
+    border-radius: 8px;
+    max-width: 90%; /* Slightly wider */
+    line-height: 1.6;
+    /* Apply pre-wrap for better handling of spaces/newlines AND wrapping */
+    white-space: pre-wrap;
+    word-wrap: break-word; /* Ensure long words break */
+}
+
+.user-message {
+    background-color: var(--progress-bar-background, var(--fallback-border)); /* User message background */
+    color: var(--font-color, var(--fallback-font));
+    margin-left: auto; /* Align user messages to the right */
+    text-align: left;
+}
+
+.ai-message {
+    background-color: var(--code-bg-color, var(--fallback-code-bg)); /* AI message background */
+    color: var(--font-color, var(--fallback-font));
+    margin-right: auto; /* Align AI messages to the left */
+    border: 1px solid var(--progress-bar-background, var(--fallback-border));
+}
+.ai-message.welcome-message {
+    border: none;
+    background-color: transparent;
+    max-width: 100%;
+    text-align: center;
+    color: var(--secondary-color, var(--fallback-secondary));
+    white-space: normal;
+}
+
+/* Styles for code within messages */
+.ai-message code {
+    background-color: var(--invert-font-color, var(--fallback-invert-font)) !important; /* Use light bg for code */
+    /* color: var(--background-color, var(--fallback-bg)) !important; Dark text */
+    padding: 0.1em 0.4em;
+    border-radius: 4px;
+    font-size: 0.9em;
+}
+.ai-message pre {
+    background-color: var(--invert-font-color, var(--fallback-invert-font)) !important;
+    color: var(--background-color, var(--fallback-bg)) !important;
+    padding: 1em;
+    border-radius: 5px;
+    overflow-x: auto;
+    margin: 0.8em 0;
+    white-space: pre;
+}
+.ai-message pre code {
+    background-color: transparent !important;
+    padding: 0;
+    font-size: inherit;
+}
+
+/* Override white-space for specific elements generated by Markdown */
+.ai-message p,
+.ai-message ul,
+.ai-message ol,
+.ai-message blockquote {
+    white-space: normal; /* Allow standard wrapping for block elements */
+}
+
+/* --- Markdown Element Styling within Messages --- */
+.message p {
+    margin-top: 0;
+    margin-bottom: 0.5em;
+}
+.message p:last-child {
+    margin-bottom: 0;
+}
+.message ul,
+.message ol {
+    margin: 0.5em 0 0.5em 1.5em;
+    padding: 0;
+}
+.message li {
+    margin-bottom: 0.2em;
+}
+
+/* Code block styling (adjusts previous rules slightly) */
+.message code {
+    /* Inline code */
+    background-color: var(--invert-font-color, var(--fallback-invert-font)) !important;
+    color: var(--font-color);
+    padding: 0.1em 0.4em;
+    border-radius: 4px;
+    font-size: 0.9em;
+    /* Ensure inline code breaks nicely */
+    word-break: break-all;
+    white-space: normal; /* Allow inline code to wrap if needed */
+}
+.message pre {
+    /* Code block container */
+    background-color: var(--invert-font-color, var(--fallback-invert-font)) !important;
+    color: var(--background-color, var(--fallback-bg)) !important;
+    padding: 1em;
+    border-radius: 5px;
+    overflow-x: auto;
+    margin: 0.8em 0;
+    font-size: 0.9em; /* Slightly smaller code blocks */
+}
+.message pre code {
+    /* Code within code block */
+    background-color: transparent !important;
+    padding: 0;
+    font-size: inherit;
+    word-break: normal; /* Don't break words in code blocks */
+    white-space: pre; /* Preserve whitespace strictly in code blocks */
+}
+
+/* Thinking indicator */
+.message-thinking {
+    display: inline-block;
+    width: 5px;
+    height: 5px;
+    background-color: var(--primary-color, var(--fallback-primary));
+    border-radius: 50%;
+    margin-left: 8px;
+    vertical-align: middle;
+    animation: thinking 1s infinite ease-in-out;
+}
+@keyframes thinking {
+    0%,
+    100% {
+        opacity: 0.5;
+        transform: scale(0.8);
+    }
+    50% {
+        opacity: 1;
+        transform: scale(1.2);
+    }
+}
+
+/* --- Thinking Indicator (Blinking Cursor Style) --- */
+.thinking-indicator-cursor {
+    display: inline-block;
+    width: 10px; /* Width of the cursor */
+    height: 1.1em; /* Match line height */
+    background-color: var(--primary-color, var(--fallback-primary));
+    margin-left: 5px;
+    vertical-align: text-bottom; /* Align with text baseline */
+    animation: blink-cursor 1s step-end infinite;
+}
+
+@keyframes blink-cursor {
+    from,
+    to {
+        background-color: transparent;
+    }
+    50% {
+        background-color: var(--primary-color, var(--fallback-primary));
+    }
+}
+
+#chat-input-area {
+    flex-shrink: 0; /* Prevent input area from shrinking */
+    padding: 1em 1.5em;
+    display: flex;
+    align-items: flex-end; /* Align items to bottom */
+    gap: 10px;
+    background-color: var(--code-bg-color, var(--fallback-code-bg)); /* Match sidebars */
+}
+
+#chat-input-area textarea {
+    flex-grow: 1;
+    padding: 0.8em 1em;
+    border: 1px solid var(--progress-bar-background, var(--fallback-border));
+    background-color: var(--background-color, var(--fallback-bg));
+    color: var(--font-color, var(--fallback-font));
+    border-radius: 5px;
+    resize: none; /* Disable manual resize */
+    font-family: inherit;
+    font-size: 1em;
+    line-height: 1.4;
+    max-height: 150px; /* Limit excessive height */
+    overflow-y: auto;
+    /* rows: 2; */
+}
+
+#chat-input-area button {
+    /* Basic button styling - maybe inherit from main theme? */
+    padding: 0.6em 1.2em;
+    border: 1px solid var(--primary-dimmed-color, var(--fallback-primary-dimmed));
+    background-color: var(--primary-dimmed-color, var(--fallback-primary-dimmed));
+    color: var(--background-color, var(--fallback-bg));
+    border-radius: 5px;
+    cursor: pointer;
+    font-size: 0.9em;
+    transition: background-color 0.2s, border-color 0.2s;
+    height: min-content; /* Align with bottom of textarea */
+}
+
+#chat-input-area button:hover {
+    background-color: var(--primary-color, var(--fallback-primary));
+    border-color: var(--primary-color, var(--fallback-primary));
+}
+#chat-input-area button:disabled {
+    opacity: 0.6;
+    cursor: not-allowed;
+}
+
+.loading-indicator {
+    font-size: 0.9em;
+    color: var(--secondary-color, var(--fallback-secondary));
+    margin-right: 10px;
+    align-self: center;
+}
+
+/* --- Buttons --- */
+/* Inherit some button styles if possible */
+.btn.btn-sm {
+    color: var(--font-color, var(--fallback-font));
+    padding: 0.2em 0.5em;
+    font-size: 0.8em;
+    border: 1px solid var(--secondary-color, var(--fallback-secondary));
+    background: none;
+    border-radius: 3px;
+    cursor: pointer;
+}
+.btn.btn-sm:hover {
+    border-color: var(--font-color, var(--fallback-font));
+    background-color: var(--progress-bar-background, var(--fallback-border));
+}
+
+/* --- Basic Responsiveness --- */
+@media screen and (max-width: 900px) {
+    .left-sidebar {
+        flex-basis: 200px; /* Shrink history */
+    }
+    .right-sidebar {
+        flex-basis: 240px; /* Shrink citations */
+    }
+}
+
+@media screen and (max-width: 768px) {
+    /* Stack layout on mobile? Or hide sidebars? Hiding for now */
+    .sidebar {
+        display: none; /* Hide sidebars on small screens */
+    }
+    /* Could add toggle buttons later */
+}
+
+
+/* ==== File: docs/ask_ai/ask-ai.css (Updates V4 - Delete Button) ==== */
+
+
+.sidebar ul li {
+    /* Use flexbox to align link and delete button */
+    display: flex;
+    justify-content: space-between;
+    align-items: center;
+    padding: 0; /* Remove padding from li, add to link/button */
+    margin: 0.1em 0; /* Small vertical margin */
+}
+
+.sidebar ul li a {
+    /* Link takes most space */
+    flex-grow: 1;
+    padding: 0.3em 0.5em 0.3em 1em; /* Adjust padding */
+    /* Make ellipsis work for long titles */
+    white-space: nowrap;
+    overflow: hidden;
+    text-overflow: ellipsis;
+    /* Keep existing link styles */
+    color: var(--secondary-color, var(--fallback-secondary));
+    text-decoration: none;
+    display: block;
+    border-radius: 3px;
+    transition: background-color 0.2s, color 0.2s;
+}
+.sidebar ul li a:hover {
+    color: var(--primary-color, var(--fallback-primary));
+    background-color: rgba(80, 255, 255, 0.08);
+}
+
+/* Style for active history item's link */
+#history-list li.active a {
+    color: var(--primary-dimmed-color, var(--fallback-primary-dimmed));
+    font-weight: bold;
+    background-color: rgba(80, 255, 255, 0.12);
+}
+
+/* --- Delete Chat Button --- */
+.delete-chat-btn {
+    flex-shrink: 0; /* Don't shrink */
+    background: none;
+    border: none;
+    color: var(--secondary-color, var(--fallback-secondary));
+    cursor: pointer;
+    padding: 0.4em 0.8em; /* Padding around icon */
+    font-size: 0.9em;
+    opacity: 0.5; /* Dimmed by default */
+    transition: opacity 0.2s, color 0.2s;
+    margin-left: 5px; /* Space between link and button */
+    border-radius: 3px;
+}
+
+.sidebar ul li:hover .delete-chat-btn,
+.delete-chat-btn:hover {
+    opacity: 1; /* Show fully on hover */
+    color: var(--error-color, #ff3c74); /* Use error color on hover */
+}
+.delete-chat-btn:focus {
+    outline: 1px dashed var(--error-color, #ff3c74); /* Accessibility */
+     opacity: 1;
+}
--- a/docs/md_v2/ask_ai/ask-ai.js
+++ b/docs/md_v2/ask_ai/ask-ai.js
@@ -0,0 +1,603 @@
+// ==== File: docs/ask_ai/ask-ai.js (Marked, Streaming, History) ====
+
+document.addEventListener("DOMContentLoaded", () => {
+    console.log("AI Assistant JS V2 Loaded");
+
+    // --- DOM Element Selectors ---
+    const historyList = document.getElementById("history-list");
+    const newChatButton = document.getElementById("new-chat-button");
+    const chatMessages = document.getElementById("chat-messages");
+    const chatInput = document.getElementById("chat-input");
+    const sendButton = document.getElementById("send-button");
+    const citationsList = document.getElementById("citations-list");
+
+    // --- Constants ---
+    const CHAT_INDEX_KEY = "aiAssistantChatIndex_v1";
+    const CHAT_PREFIX = "aiAssistantChat_v1_";
+
+    // --- State ---
+    let currentChatId = null;
+    let conversationHistory = []; // Holds message objects { sender: 'user'/'ai', text: '...' }
+    let isThinking = false;
+    let streamInterval = null; // To control the streaming interval
+
+    // --- Event Listeners ---
+    sendButton.addEventListener("click", handleSendMessage);
+    chatInput.addEventListener("keydown", handleInputKeydown);
+    newChatButton.addEventListener("click", handleNewChat);
+    chatInput.addEventListener("input", autoGrowTextarea);
+
+    // --- Initialization ---
+    loadChatHistoryIndex(); // Load history list on startup
+    const initialQuery = checkForInitialQuery(window.parent.location); // Check for query param
+    if (!initialQuery) {
+        loadInitialChat(); // Load normally if no query
+    }
+
+    // --- Core Functions ---
+
+    function handleSendMessage() {
+        const userMessageText = chatInput.value.trim();
+        if (!userMessageText || isThinking) return;
+
+        setThinking(true); // Start thinking state
+
+        // Add user message to state and UI
+        const userMessage = { sender: "user", text: userMessageText };
+        conversationHistory.push(userMessage);
+        addMessageToChat(userMessage, false); // Add user message without parsing markdown
+
+        chatInput.value = "";
+        autoGrowTextarea(); // Reset textarea height
+
+        // Prepare for AI response (create empty div)
+        const aiMessageDiv = addMessageToChat({ sender: "ai", text: "" }, true); // Add empty div with thinking indicator
+
+        // TODO: Generate fingerprint/JWT here
+
+        // TODO: Send `conversationHistory` + JWT to backend API
+        // Replace placeholder below with actual API call
+        // The backend should ideally return a stream of text tokens
+
+        // --- Placeholder Streaming Simulation ---
+        const simulatedFullResponse = `Okay, Here’s a minimal Python script that creates an AsyncWebCrawler, fetches a webpage, and prints the first 300 characters of its Markdown output:
+
+\`\`\`python
+import asyncio
+from crawl4ai import AsyncWebCrawler
+
+async def main():
+    async with AsyncWebCrawler() as crawler:
+        result = await crawler.arun("https://example.com")
+        print(result.markdown[:300])  # Print first 300 chars
+
+if __name__ == "__main__":
+    asyncio.run(main())
+\`\`\`
+
+A code snippet: \`crawler.run()\`. Check the [quickstart](/core/quickstart).`;
+
+        // Simulate receiving the response stream
+        streamSimulatedResponse(aiMessageDiv, simulatedFullResponse);
+
+        // // Simulate receiving citations *after* stream starts (or with first chunk)
+        // setTimeout(() => {
+        //     addCitations([
+        //         { title: "Simulated Doc 1", url: "#sim1" },
+        //         { title: "Another Concept", url: "#sim2" },
+        //     ]);
+        // }, 500); // Citations appear shortly after thinking starts
+    }
+
+    function handleInputKeydown(event) {
+        if (event.key === "Enter" && !event.shiftKey) {
+            event.preventDefault();
+            handleSendMessage();
+        }
+    }
+
+    function addMessageToChat(message, addThinkingIndicator = false) {
+        const messageDiv = document.createElement("div");
+        messageDiv.classList.add("message", `${message.sender}-message`);
+
+        // Parse markdown and set HTML
+        messageDiv.innerHTML = message.text ? marked.parse(message.text) : "";
+
+        if (message.sender === "ai") {
+            // Apply Syntax Highlighting AFTER setting innerHTML
+            messageDiv.querySelectorAll("pre code:not(.hljs)").forEach((block) => {
+                if (typeof hljs !== "undefined") {
+                    // Check if already highlighted to prevent double-highlighting issues
+                    if (!block.classList.contains("hljs")) {
+                        hljs.highlightElement(block);
+                    }
+                } else {
+                    console.warn("highlight.js (hljs) not found for syntax highlighting.");
+                }
+            });
+
+            // Add thinking indicator if needed (and not already present)
+            if (addThinkingIndicator && !message.text && !messageDiv.querySelector(".thinking-indicator-cursor")) {
+                const thinkingDiv = document.createElement("div");
+                thinkingDiv.className = "thinking-indicator-cursor";
+                messageDiv.appendChild(thinkingDiv);
+            }
+        } else {
+            // User messages remain plain text
+            // messageDiv.textContent = message.text;
+        }
+
+        // wrap each pre in a div.terminal
+        messageDiv.querySelectorAll("pre").forEach((block) => {
+            const wrapper = document.createElement("div");
+            wrapper.className = "terminal";
+            block.parentNode.insertBefore(wrapper, block);
+            wrapper.appendChild(block);
+        });
+
+        chatMessages.appendChild(messageDiv);
+        // Scroll only if user is near the bottom? (More advanced)
+        // Simple scroll for now:
+        scrollToBottom();
+        return messageDiv; // Return the created element
+    }
+
+    function streamSimulatedResponse(messageDiv, fullText) {
+        const thinkingIndicator = messageDiv.querySelector(".thinking-indicator-cursor");
+        if (thinkingIndicator) thinkingIndicator.remove();
+
+        const tokens = fullText.split(/(\s+)/);
+        let currentText = "";
+        let tokenIndex = 0;
+        // Clear previous interval just in case
+        if (streamInterval) clearInterval(streamInterval);
+
+        streamInterval = setInterval(() => {
+            const cursorSpan = '<span class="thinking-indicator-cursor"></span>'; // Cursor for streaming
+            if (tokenIndex < tokens.length) {
+                currentText += tokens[tokenIndex];
+                // Render intermediate markdown + cursor
+                messageDiv.innerHTML = marked.parse(currentText + cursorSpan);
+                // Re-highlight code blocks on each stream update - might be slightly inefficient
+                // but ensures partial code blocks look okay. Highlight only final on completion.
+                // messageDiv.querySelectorAll('pre code:not(.hljs)').forEach((block) => {
+                //     hljs.highlightElement(block);
+                // });
+                scrollToBottom(); // Keep scrolling as content streams
+                tokenIndex++;
+            } else {
+                // Streaming finished
+                clearInterval(streamInterval);
+                streamInterval = null;
+
+                // Final render without cursor
+                messageDiv.innerHTML = marked.parse(currentText);
+
+                // === Final Syntax Highlighting ===
+                messageDiv.querySelectorAll("pre code:not(.hljs)").forEach((block) => {
+                    if (typeof hljs !== "undefined" && !block.classList.contains("hljs")) {
+                        hljs.highlightElement(block);
+                    }
+                });
+
+                // === Extract Citations ===
+                const citations = extractMarkdownLinks(currentText);
+
+                // Wrap each pre in a div.terminal
+                messageDiv.querySelectorAll("pre").forEach((block) => {
+                    const wrapper = document.createElement("div");
+                    wrapper.className = "terminal";
+                    block.parentNode.insertBefore(wrapper, block);
+                    wrapper.appendChild(block);
+                });
+
+                const aiMessage = { sender: "ai", text: currentText, citations: citations };
+                conversationHistory.push(aiMessage);
+                updateCitationsDisplay();
+                saveCurrentChat();
+                setThinking(false);
+            }
+        }, 50); // Adjust speed
+    }
+
+    // === NEW Function to Extract Links ===
+    function extractMarkdownLinks(markdownText) {
+        const regex = /\[([^\]]+)\]\(([^)]+)\)/g; // [text](url)
+        const citations = [];
+        let match;
+        while ((match = regex.exec(markdownText)) !== null) {
+            // Avoid adding self-links from within the citations list if AI includes them
+            if (!match[2].startsWith("#citation-")) {
+                citations.push({
+                    title: match[1].trim(),
+                    url: match[2].trim(),
+                });
+            }
+        }
+        // Optional: Deduplicate links based on URL
+        const uniqueCitations = citations.filter(
+            (citation, index, self) => index === self.findIndex((c) => c.url === citation.url)
+        );
+        return uniqueCitations;
+    }
+
+    // === REVISED Function to Display Citations ===
+    function updateCitationsDisplay() {
+        let lastCitations = null;
+        // Find the most recent AI message with citations
+        for (let i = conversationHistory.length - 1; i >= 0; i--) {
+            if (
+                conversationHistory[i].sender === "ai" &&
+                conversationHistory[i].citations &&
+                conversationHistory[i].citations.length > 0
+            ) {
+                lastCitations = conversationHistory[i].citations;
+                break; // Found the latest citations
+            }
+        }
+
+        citationsList.innerHTML = ""; // Clear previous
+        if (!lastCitations) {
+            citationsList.innerHTML = '<li class="no-citations">No citations available.</li>';
+            return;
+        }
+
+        lastCitations.forEach((citation, index) => {
+            const li = document.createElement("li");
+            const a = document.createElement("a");
+            // Generate a unique ID for potential internal linking if needed
+            // a.id = `citation-${index}`;
+            a.href = citation.url || "#";
+            a.textContent = citation.title;
+            a.target = "_top"; // Open in main window
+            li.appendChild(a);
+            citationsList.appendChild(li);
+        });
+    }
+
+    function addCitations(citations) {
+        citationsList.innerHTML = ""; // Clear
+        if (!citations || citations.length === 0) {
+            citationsList.innerHTML = '<li class="no-citations">No citations available.</li>';
+            return;
+        }
+        citations.forEach((citation) => {
+            const li = document.createElement("li");
+            const a = document.createElement("a");
+            a.href = citation.url || "#";
+            a.textContent = citation.title;
+            a.target = "_top"; // Open in main window
+            li.appendChild(a);
+            citationsList.appendChild(li);
+        });
+    }
+
+    function setThinking(thinking) {
+        isThinking = thinking;
+        sendButton.disabled = thinking;
+        chatInput.disabled = thinking;
+        chatInput.placeholder = thinking ? "AI is responding..." : "Ask about Crawl4AI...";
+        // Stop any existing stream if we start thinking again (e.g., rapid resend)
+        if (thinking && streamInterval) {
+            clearInterval(streamInterval);
+            streamInterval = null;
+        }
+    }
+
+    function autoGrowTextarea() {
+        chatInput.style.height = "auto";
+        chatInput.style.height = `${chatInput.scrollHeight}px`;
+    }
+
+    function scrollToBottom() {
+        chatMessages.scrollTop = chatMessages.scrollHeight;
+    }
+
+    // --- Query Parameter Handling ---
+    function checkForInitialQuery(locationToCheck) {
+        // <-- Receive location object
+        if (!locationToCheck) {
+            console.warn("Ask AI: Could not access parent window location.");
+            return false;
+        }
+        const urlParams = new URLSearchParams(locationToCheck.search); // <-- Use passed location's search string
+        const encodedQuery = urlParams.get("qq"); // <-- Use 'qq'
+
+        if (encodedQuery) {
+            console.log("Initial query found (qq):", encodedQuery);
+            try {
+                const decodedText = decodeURIComponent(escape(atob(encodedQuery)));
+                console.log("Decoded query:", decodedText);
+
+                // Start new chat immediately
+                handleNewChat(true);
+
+                // Delay setting input and sending message slightly
+                setTimeout(() => {
+                    chatInput.value = decodedText;
+                    autoGrowTextarea();
+                    handleSendMessage();
+
+                    // Clean the PARENT window's URL
+                    try {
+                        const cleanUrl = locationToCheck.pathname;
+                        // Use parent's history object
+                        window.parent.history.replaceState({}, window.parent.document.title, cleanUrl);
+                    } catch (e) {
+                        console.warn("Ask AI: Could not clean parent URL using replaceState.", e);
+                        // This might fail due to cross-origin restrictions if served differently,
+                        // but should work fine with mkdocs serve on the same origin.
+                    }
+                }, 100);
+
+                return true; // Query processed
+            } catch (e) {
+                console.error("Error decoding initial query (qq):", e);
+                // Clean the PARENT window's URL even on error
+                try {
+                    const cleanUrl = locationToCheck.pathname;
+                    window.parent.history.replaceState({}, window.parent.document.title, cleanUrl);
+                } catch (cleanError) {
+                    console.warn("Ask AI: Could not clean parent URL after decode error.", cleanError);
+                }
+                return false;
+            }
+        }
+        return false; // No 'qq' query found
+    }
+
+    // --- History Management ---
+
+    function handleNewChat(isFromQuery = false) {
+        if (isThinking) return; // Don't allow new chat while responding
+
+        // Only save if NOT triggered immediately by a query parameter load
+        if (!isFromQuery) {
+            saveCurrentChat();
+        }
+
+        currentChatId = `chat_${Date.now()}`;
+        conversationHistory = []; // Clear message history state
+        chatMessages.innerHTML = ""; // Start with clean slate for query
+        if (!isFromQuery) {
+            // Show welcome only if manually started
+            chatMessages.innerHTML =
+                '<div class="message ai-message welcome-message">Started a new chat! Ask me anything about Crawl4AI.</div>';
+        }
+        addCitations([]); // Clear citations
+        updateCitationsDisplay(); // Clear UI
+
+        // Add to index and save
+        let index = loadChatIndex();
+        // Generate a generic title initially, update later
+        const newTitle = isFromQuery ? "Chat from Selection" : `Chat ${new Date().toLocaleString()}`;
+        // index.unshift({ id: currentChatId, title: `Chat ${new Date().toLocaleString()}` }); // Add to start
+        index.unshift({ id: currentChatId, title: newTitle });
+        saveChatIndex(index);
+
+        renderHistoryList(index); // Update UI
+        setActiveHistoryItem(currentChatId);
+        saveCurrentChat(); // Save the empty new chat state
+    }
+
+    function loadChat(chatId) {
+        if (isThinking || chatId === currentChatId) return;
+
+        // Check if chat data actually exists before proceeding
+        const storedChat = localStorage.getItem(CHAT_PREFIX + chatId);
+        if (storedChat === null) {
+            console.warn(`Attempted to load non-existent chat: ${chatId}. Removing from index.`);
+            deleteChatData(chatId); // Clean up index
+            loadChatHistoryIndex(); // Reload history list
+            loadInitialChat(); // Load next available chat
+            return;
+        }
+
+        console.log(`Loading chat: ${chatId}`);
+        saveCurrentChat(); // Save current before switching
+
+        try {
+            conversationHistory = JSON.parse(storedChat);
+            currentChatId = chatId;
+            renderChatMessages(conversationHistory);
+            updateCitationsDisplay();
+            setActiveHistoryItem(chatId);
+        } catch (e) {
+            console.error("Error loading chat:", chatId, e);
+            alert("Failed to load chat data.");
+            conversationHistory = [];
+            renderChatMessages(conversationHistory);
+            updateCitationsDisplay();
+        }
+    }
+
+    function saveCurrentChat() {
+        if (currentChatId && conversationHistory.length > 0) {
+            try {
+                localStorage.setItem(CHAT_PREFIX + currentChatId, JSON.stringify(conversationHistory));
+                console.log(`Chat ${currentChatId} saved.`);
+
+                // Update title in index (e.g., use first user message)
+                let index = loadChatIndex();
+                const currentItem = index.find((item) => item.id === currentChatId);
+                if (
+                    currentItem &&
+                    conversationHistory[0]?.sender === "user" &&
+                    !currentItem.title.startsWith("Chat about:")
+                ) {
+                    currentItem.title = `Chat about: ${conversationHistory[0].text.substring(0, 30)}...`;
+                    saveChatIndex(index);
+                    // Re-render history list if title changed - small optimization needed here maybe
+                    renderHistoryList(index);
+                    setActiveHistoryItem(currentChatId); // Re-set active after re-render
+                }
+            } catch (e) {
+                console.error("Error saving chat:", currentChatId, e);
+                // Handle potential storage full errors
+                if (e.name === "QuotaExceededError") {
+                    alert("Local storage is full. Cannot save chat history.");
+                    // Consider implementing history pruning logic here
+                }
+            }
+        } else if (currentChatId) {
+            // Save empty state for newly created chats if needed, or remove?
+            localStorage.setItem(CHAT_PREFIX + currentChatId, JSON.stringify([]));
+        }
+    }
+
+    function loadChatIndex() {
+        try {
+            const storedIndex = localStorage.getItem(CHAT_INDEX_KEY);
+            return storedIndex ? JSON.parse(storedIndex) : [];
+        } catch (e) {
+            console.error("Error loading chat index:", e);
+            return []; // Return empty array on error
+        }
+    }
+
+    function saveChatIndex(indexArray) {
+        try {
+            localStorage.setItem(CHAT_INDEX_KEY, JSON.stringify(indexArray));
+        } catch (e) {
+            console.error("Error saving chat index:", e);
+        }
+    }
+
+    function renderHistoryList(indexArray) {
+        historyList.innerHTML = ""; // Clear existing
+        if (!indexArray || indexArray.length === 0) {
+            historyList.innerHTML = '<li class="no-history">No past chats found.</li>';
+            return;
+        }
+        indexArray.forEach((item) => {
+            const li = document.createElement("li");
+            li.dataset.chatId = item.id; // Add ID to li for easier selection
+
+            const a = document.createElement("a");
+            a.href = "#";
+            a.dataset.chatId = item.id;
+            a.textContent = item.title || `Chat ${item.id.split("_")[1] || item.id}`;
+            a.title = a.textContent; // Tooltip for potentially long titles
+            a.addEventListener("click", (e) => {
+                e.preventDefault();
+                loadChat(item.id);
+            });
+
+            // === Add Delete Button ===
+            const deleteBtn = document.createElement("button");
+            deleteBtn.className = "delete-chat-btn";
+            deleteBtn.innerHTML = "✕"; // Trash can emoji/icon (or use text/SVG/FontAwesome)
+            deleteBtn.title = "Delete Chat";
+            deleteBtn.dataset.chatId = item.id; // Store ID on button too
+            deleteBtn.addEventListener("click", handleDeleteChat);
+
+            li.appendChild(a);
+            li.appendChild(deleteBtn); // Append button to the list item
+            historyList.appendChild(li);
+        });
+    }
+
+    function renderChatMessages(messages) {
+        chatMessages.innerHTML = ""; // Clear existing messages
+        messages.forEach((message) => {
+            // Ensure highlighting is applied when loading from history
+            addMessageToChat(message, false);
+        });
+        if (messages.length === 0) {
+            chatMessages.innerHTML =
+                '<div class="message ai-message welcome-message">Chat history loaded. Ask a question!</div>';
+        }
+        // Scroll to bottom after loading messages
+        scrollToBottom();
+    }
+
+    function setActiveHistoryItem(chatId) {
+        document.querySelectorAll("#history-list li").forEach((li) => li.classList.remove("active"));
+        // Select the LI element directly now
+        const activeLi = document.querySelector(`#history-list li[data-chat-id="${chatId}"]`);
+        if (activeLi) {
+            activeLi.classList.add("active");
+        }
+    }
+
+    function loadInitialChat() {
+        const index = loadChatIndex();
+        if (index.length > 0) {
+            loadChat(index[0].id);
+        } else {
+            // Check if handleNewChat wasn't already called by query handler
+            if (!currentChatId) {
+                handleNewChat();
+            }
+        }
+    }
+
+    function loadChatHistoryIndex() {
+        const index = loadChatIndex();
+        renderHistoryList(index);
+        if (currentChatId) setActiveHistoryItem(currentChatId);
+    }
+
+    // === NEW Function to Handle Delete Click ===
+    function handleDeleteChat(event) {
+        event.stopPropagation(); // Prevent triggering loadChat on the link behind it
+        const button = event.currentTarget;
+        const chatIdToDelete = button.dataset.chatId;
+
+        if (!chatIdToDelete) return;
+
+        // Confirmation dialog
+        if (
+            window.confirm(
+                `Are you sure you want to delete this chat session?\n"${
+                    button.previousElementSibling?.textContent || "Chat " + chatIdToDelete
+                }"`
+            )
+        ) {
+            console.log(`Deleting chat: ${chatIdToDelete}`);
+
+            // Perform deletion
+            const updatedIndex = deleteChatData(chatIdToDelete);
+
+            // If the deleted chat was the currently active one, load another chat
+            if (currentChatId === chatIdToDelete) {
+                currentChatId = null; // Reset current ID
+                conversationHistory = []; // Clear state
+                if (updatedIndex.length > 0) {
+                    // Load the new top chat (most recent remaining)
+                    loadChat(updatedIndex[0].id);
+                } else {
+                    // No chats left, start a new one
+                    handleNewChat();
+                }
+            } else {
+                // If a different chat was deleted, just re-render the list
+                renderHistoryList(updatedIndex);
+                // Re-apply active state in case IDs shifted (though they shouldn't)
+                setActiveHistoryItem(currentChatId);
+            }
+        }
+    }
+
+    // === NEW Function to Delete Chat Data ===
+    function deleteChatData(chatId) {
+        // Remove chat data
+        localStorage.removeItem(CHAT_PREFIX + chatId);
+
+        // Update index
+        let index = loadChatIndex();
+        index = index.filter((item) => item.id !== chatId);
+        saveChatIndex(index);
+
+        console.log(`Chat ${chatId} data and index entry removed.`);
+        return index; // Return the updated index
+    }
+
+    // --- Virtual Scrolling Placeholder ---
+    // NOTE: Virtual scrolling is complex. For now, we do direct rendering.
+    // If performance becomes an issue with very long chats/history,
+    // investigate libraries like 'simple-virtual-scroll' or 'virtual-scroller'.
+    // You would replace parts of `renderChatMessages` and `renderHistoryList`
+    // to work with the chosen library's API (providing data and item renderers).
+    console.warn("Virtual scrolling not implemented. Performance may degrade with very long chat histories.");
+});
--- a/docs/md_v2/ask_ai/index.html
+++ b/docs/md_v2/ask_ai/index.html
@@ -0,0 +1,64 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Crawl4AI Assistant</title>
+    <!-- Link main styles first for variable access -->
+    <link rel="stylesheet" href="../assets/layout.css">
+    <link rel="stylesheet" href="../assets/styles.css">
+    <!-- Link specific AI styles -->
+    <link rel="stylesheet" href="../assets/highlight.css">
+    <link rel="stylesheet" href="ask-ai.css">
+</head>
+<body>
+    <div class="ai-assistant-container">
+
+        <!-- Left Sidebar: Conversation History -->
+        <aside id="history-panel" class="sidebar left-sidebar">
+            <header>
+                <h3>History</h3>
+                <button id="new-chat-button" class="btn btn-sm">New Chat</button>
+            </header>
+            <ul id="history-list">
+                <!-- History items populated by JS -->
+            </ul>
+        </aside>
+
+        <!-- Main Area: Chat Interface -->
+        <main id="chat-panel">
+            <div id="chat-messages">
+                <!-- Chat messages populated by JS -->
+                 <div class="message ai-message welcome-message">
+                    Welcome to the Crawl4AI Assistant! How can I help you today?
+                 </div>
+            </div>
+            <div id="chat-input-area">
+                <!-- Loading indicator for general waiting (optional) -->
+                <!-- <div class="loading-indicator" style="display: none;">Thinking...</div> -->
+                <textarea id="chat-input" placeholder="Ask about Crawl4AI..." rows="2"></textarea> 
+                <button id="send-button">Send</button>
+            </div>
+        </main>
+
+        <!-- Right Sidebar: Citations / Context -->
+        <aside id="citations-panel" class="sidebar right-sidebar">
+            <header>
+                <h3>Citations</h3>
+            </header>
+            <ul id="citations-list">
+                <!-- Citations populated by JS -->
+                <li class="no-citations">No citations for this response yet.</li>
+            </ul>
+        </aside>
+
+    </div>
+
+    <!-- Include Marked.js library -->
+    <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
+    <script src="../assets/highlight.min.js"></script> 
+
+    <!-- Your AI Assistant Logic -->
+    <script src="ask-ai.js"></script>
+</body>
+</html>
--- a/docs/md_v2/assets/copy_code.js
+++ b/docs/md_v2/assets/copy_code.js
@@ -0,0 +1,62 @@
+// ==== File: docs/assets/copy_code.js ====
+
+document.addEventListener('DOMContentLoaded', () => {
+    // Target specifically code blocks within the main content area
+    const codeBlocks = document.querySelectorAll('#terminal-mkdocs-main-content pre > code');
+
+    codeBlocks.forEach((codeElement) => {
+        const preElement = codeElement.parentElement; // The <pre> tag
+
+        // Ensure the <pre> tag can contain a positioned button
+        if (window.getComputedStyle(preElement).position === 'static') {
+            preElement.style.position = 'relative';
+        }
+
+        // Create the button
+        const copyButton = document.createElement('button');
+        copyButton.className = 'copy-code-button';
+        copyButton.type = 'button';
+        copyButton.setAttribute('aria-label', 'Copy code to clipboard');
+        copyButton.title = 'Copy code to clipboard';
+        copyButton.innerHTML = 'Copy'; // Or use an icon like an SVG or FontAwesome class
+
+        // Append the button to the <pre> element
+        preElement.appendChild(copyButton);
+
+        // Add click event listener
+        copyButton.addEventListener('click', () => {
+            copyCodeToClipboard(codeElement, copyButton);
+        });
+    });
+
+    async function copyCodeToClipboard(codeElement, button) {
+        // Use innerText to get the rendered text content, preserving line breaks
+        const textToCopy = codeElement.innerText;
+
+        try {
+            await navigator.clipboard.writeText(textToCopy);
+
+            // Visual feedback
+            button.innerHTML = 'Copied!';
+            button.classList.add('copied');
+            button.disabled = true; // Temporarily disable
+
+            // Revert button state after a short delay
+            setTimeout(() => {
+                button.innerHTML = 'Copy';
+                button.classList.remove('copied');
+                button.disabled = false;
+            }, 2000); // Show "Copied!" for 2 seconds
+
+        } catch (err) {
+            console.error('Failed to copy code: ', err);
+            // Optional: Provide error feedback on the button
+            button.innerHTML = 'Error';
+            setTimeout(() => {
+                button.innerHTML = 'Copy';
+            }, 2000);
+        }
+    }
+
+    console.log("Copy Code Button script loaded.");
+});
--- a/docs/md_v2/assets/floating_ask_ai_button.js
+++ b/docs/md_v2/assets/floating_ask_ai_button.js
@@ -0,0 +1,39 @@
+// ==== File: docs/assets/floating_ask_ai_button.js ====
+
+document.addEventListener('DOMContentLoaded', () => {
+    const askAiPagePath = '/core/ask-ai/'; // IMPORTANT: Adjust this path if needed!
+    const currentPath = window.location.pathname;
+
+    // Determine the base URL for constructing the link correctly,
+    // especially if deployed in a sub-directory.
+    // This assumes a simple structure; adjust if needed.
+    const baseUrl = window.location.origin + (currentPath.startsWith('/core/') ? '../..' : '');
+
+
+    // Check if the current page IS the Ask AI page
+    // Use includes() for flexibility (handles trailing slash or .html)
+    if (currentPath.includes(askAiPagePath.replace(/\/$/, ''))) { // Remove trailing slash for includes check
+        console.log("Floating Ask AI Button: Not adding button on the Ask AI page itself.");
+        return; // Don't add the button on the target page
+    }
+
+    // --- Create the button ---
+    const fabLink = document.createElement('a');
+    fabLink.className = 'floating-ask-ai-button';
+    fabLink.href = askAiPagePath; // Construct the correct URL
+    fabLink.title = 'Ask Crawl4AI Assistant';
+    fabLink.setAttribute('aria-label', 'Ask Crawl4AI Assistant');
+
+    // Add content (using SVG icon for better visuals)
+    fabLink.innerHTML = `
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="24" height="24" fill="currentColor">
+            <path d="M20 2H4c-1.1 0-2 .9-2 2v12c0 1.1.9 2 2 2h14l4 4V4c0-1.1-.9-2-2-2zm-2 12H6v-2h12v2zm0-3H6V9h12v2zm0-3H6V6h12v2z"/>
+        </svg>
+        <span>Ask AI</span>
+    `;
+
+    // Append to body
+    document.body.appendChild(fabLink);
+
+    console.log("Floating Ask AI Button added.");
+});
--- a/docs/md_v2/assets/github_stats.js
+++ b/docs/md_v2/assets/github_stats.js
@@ -0,0 +1,119 @@
+// ==== File: assets/github_stats.js ====
+
+document.addEventListener('DOMContentLoaded', async () => {
+    // --- Configuration ---
+    const targetHeaderSelector = '.terminal .container:first-child'; // Selector for your header container
+    const insertBeforeSelector = '.terminal-nav'; // Selector for the element to insert the badge BEFORE (e.g., the main nav)
+                                                  // Or set to null to append at the end of the header.
+
+    // --- Find elements ---
+    const headerContainer = document.querySelector(targetHeaderSelector);
+    if (!headerContainer) {
+        console.warn('GitHub Stats: Header container not found with selector:', targetHeaderSelector);
+        return;
+    }
+
+    const repoLinkElement = headerContainer.querySelector('a[href*="github.com/"]'); // Find the existing GitHub link
+    let repoUrl = 'https://github.com/unclecode/crawl4ai';
+    // if (repoLinkElement) {
+    //     repoUrl = repoLinkElement.href;
+    // } else {
+    //     // Fallback: Try finding from config (requires template injection - harder)
+    //     // Or hardcode if necessary, but reading from the link is better.
+    //      console.warn('GitHub Stats: GitHub repo link not found in header.');
+    //      // Try to get repo_url from mkdocs config if available globally (less likely)
+    //      // repoUrl = window.mkdocs_config?.repo_url; // Requires setting this variable
+    //      // if (!repoUrl) return; // Exit if still no URL
+    //      return; // Exit for now if link isn't found
+    // }
+
+
+    // --- Extract Repo Owner/Name ---
+    let owner = '';
+    let repo = '';
+    try {
+        const url = new URL(repoUrl);
+        const pathParts = url.pathname.split('/').filter(part => part.length > 0);
+        if (pathParts.length >= 2) {
+            owner = pathParts[0];
+            repo = pathParts[1];
+        }
+    } catch (e) {
+        console.error('GitHub Stats: Could not parse repository URL:', repoUrl, e);
+        return;
+    }
+
+    if (!owner || !repo) {
+        console.warn('GitHub Stats: Could not extract owner/repo from URL:', repoUrl);
+        return;
+    }
+
+    // --- Get Version (Attempt to extract from site title) ---
+    let version = '';
+    const siteTitleElement = headerContainer.querySelector('.terminal-title, .site-title'); // Adjust selector based on theme's title element
+    // Example title: "Crawl4AI Documentation (v0.5.x)"
+    if (siteTitleElement) {
+         const match = siteTitleElement.textContent.match(/\((v?[^)]+)\)/); // Look for text in parentheses starting with 'v' (optional)
+         if (match && match[1]) {
+             version = match[1].trim();
+         }
+    }
+     if (!version) {
+        console.info('GitHub Stats: Could not extract version from title. You might need to adjust the selector or regex.');
+        // You could fallback to config.extra.version if injected into JS
+        // version = window.mkdocs_config?.extra?.version || 'N/A';
+     }
+
+
+    // --- Fetch GitHub API Data ---
+    let stars = '...';
+    let forks = '...';
+    try {
+        const apiUrl = `https://api.github.com/repos/${owner}/${repo}`;
+        const response = await fetch(apiUrl);
+
+        if (response.ok) {
+            const data = await response.json();
+            // Format large numbers (optional)
+            stars = data.stargazers_count > 1000 ? `${(data.stargazers_count / 1000).toFixed(1)}k` : data.stargazers_count;
+            forks = data.forks_count > 1000 ? `${(data.forks_count / 1000).toFixed(1)}k` : data.forks_count;
+        } else {
+            console.warn(`GitHub Stats: API request failed with status ${response.status}. Rate limit exceeded?`);
+            stars = 'N/A';
+            forks = 'N/A';
+        }
+    } catch (error) {
+        console.error('GitHub Stats: Error fetching repository data:', error);
+        stars = 'N/A';
+        forks = 'N/A';
+    }
+
+    // --- Create Badge HTML ---
+    const badgeContainer = document.createElement('div');
+    badgeContainer.className = 'github-stats-badge';
+
+    // Use innerHTML for simplicity, including potential icons (requires FontAwesome or similar)
+    // Ensure your theme loads FontAwesome or add it yourself if you want icons.
+    badgeContainer.innerHTML = `
+        <a href="${repoUrl}" target="_blank" rel="noopener">
+            <!-- Optional Icon (FontAwesome example) -->
+            <!-- <i class="fab fa-github"></i> -->
+             <span class="repo-name">${owner}/${repo}</span>
+             ${version ? `<span class="stat version"><i class="fas fa-tag"></i> ${version}</span>` : ''}
+            <span class="stat stars"><i class="fas fa-star"></i> ${stars}</span>
+            <span class="stat forks"><i class="fas fa-code-branch"></i> ${forks}</span>
+        </a>
+    `;
+
+    // --- Inject Badge into Header ---
+    const insertBeforeElement = insertBeforeSelector ? headerContainer.querySelector(insertBeforeSelector) : null;
+    if (insertBeforeElement) {
+        // headerContainer.insertBefore(badgeContainer, insertBeforeElement);
+        headerContainer.querySelector(insertBeforeSelector).appendChild(badgeContainer); 
+    } else {
+        headerContainer.appendChild(badgeContainer); 
+    }
+
+     console.info('GitHub Stats: Badge added to header.');
+
+});
--- a/docs/md_v2/assets/layout.css
+++ b/docs/md_v2/assets/layout.css
@@ -0,0 +1,441 @@
+/* ==== File: assets/layout.css (Non-Fluid Centered Layout) ==== */
+
+:root {
+    --header-height: 55px; /* Adjust if needed */
+    --sidebar-width: 280px; /* Adjust if needed */
+    --toc-width: 340px; /* As specified */
+    --content-max-width: 90em; /* Max width for the centered content */
+    --layout-transition-speed: 0.2s;
+    --global-space: 10px;
+}
+
+/* --- Basic Setup --- */
+html {
+    scroll-behavior: smooth;
+    scroll-padding-top: calc(var(--header-height) + 15px);
+    box-sizing: border-box;
+}
+*, *:before, *:after {
+    box-sizing: inherit;
+}
+
+body {
+    padding-top: 0;
+    padding-bottom: 0;
+    background-color: var(--background-color);
+    color: var(--font-color);
+    /* Prevents horizontal scrollbars during transitions */
+    overflow-x: hidden;
+}
+
+/* --- Fixed Header --- */
+/* Full width, fixed header */
+.terminal .container:first-child { /* Assuming this targets the header container */
+    position: fixed;
+    top: 0;
+    left: 0;
+    right: 0;
+    height: var(--header-height);
+    background-color: var(--background-color);
+    z-index: 1000;
+    border-bottom: 1px solid var(--progress-bar-background);
+    max-width: none; /* Override any container max-width */
+    padding: 0 calc(var(--global-space) * 2);
+}
+
+/* --- Main Layout Container (Below Header) --- */
+/* This container just provides space for the fixed header */
+.container:has(.terminal-mkdocs-main-grid) {
+    margin: 0 auto;
+    padding: 0;
+    padding-top: var(--header-height); /* Space for fixed header */
+}
+
+/* --- Flex Container: Grid holding content and toc (CENTERED) --- */
+/* THIS is the main centered block */
+.terminal-mkdocs-main-grid {
+    display: flex;
+    align-items: flex-start;
+    /* Enforce max-width and center */
+    max-width: var(--content-max-width);
+    margin-left: auto;
+    margin-right: auto;
+    position: relative;
+    /* Apply side padding within the centered block */
+    padding-left: calc(var(--global-space) * 2);
+    padding-right: calc(var(--global-space) * 2);
+    /* Add margin-left to clear the fixed sidebar */
+    margin-left: var(--sidebar-width);
+}
+
+/* --- 1. Fixed Left Sidebar (Viewport Relative) --- */
+#terminal-mkdocs-side-panel {
+    position: fixed;
+    top: var(--header-height);
+    left: max(0px, calc((90vw - var(--content-max-width)) / 2)); 
+    bottom: 0;
+    width: var(--sidebar-width);
+    background-color: var(--background-color);
+    border-right: 1px solid var(--progress-bar-background);
+    overflow-y: auto;
+    z-index: 900;
+    padding: 1em calc(var(--global-space) * 2);
+    padding-bottom: 2em;
+    /* transition: left var(--layout-transition-speed) ease-in-out; */
+}
+
+/* --- 2. Main Content Area (Within Centered Grid) --- */
+#terminal-mkdocs-main-content {
+    flex-grow: 1;
+    flex-shrink: 1;
+    min-width: 0; /* Flexbox shrink fix */
+
+    /* No left/right margins needed here - handled by parent grid */
+    margin-left: 0;
+    margin-right: 0;
+
+    /* Internal Padding */
+    padding: 1.5em 2em;
+
+    position: relative;
+    z-index: 1;
+}
+
+/* --- 3. Right Table of Contents (Sticky, Within Centered Grid) --- */
+#toc-sidebar {
+    flex-basis: var(--toc-width);
+    flex-shrink: 0;
+    width: var(--toc-width);
+
+    position: sticky; /* Sticks within the centered grid */
+    top: var(--header-height);
+    align-self: stretch;
+    height: calc(100vh - var(--header-height));
+    overflow-y: auto;
+
+    padding: 1.5em 1em;
+    font-size: 0.85em;
+    border-left: 1px solid var(--progress-bar-background);
+    z-index: 800;
+    /* display: none; /* JS handles */
+}
+
+/* (ToC link styles remain the same) */
+#toc-sidebar h4 { margin-top: 0; margin-bottom: 1em; font-size: 1.1em; color: var(--secondary-color); padding-left: 0.8em; }
+#toc-sidebar ul { list-style: none; padding: 0; margin: 0; }
+#toc-sidebar ul li a { display: block; padding: 0.3em 0; color: var(--secondary-color); text-decoration: none; border-left: 3px solid transparent; padding-left: 0.8em; transition: all 0.1s ease-in-out; line-height: 1.4; word-break: break-word; }
+#toc-sidebar ul li.toc-level-3 a { padding-left: 1.8em; }
+#toc-sidebar ul li.toc-level-4 a { padding-left: 2.8em; }
+#toc-sidebar ul li a:hover { color: var(--font-color); background-color: rgba(255, 255, 255, 0.05); }
+#toc-sidebar ul li a.active { color: var(--primary-color); border-left-color: var(--primary-color); background-color: rgba(80, 255, 255, 0.08); }
+
+
+/* --- Footer Styling (Respects Centered Layout) --- */
+footer {
+    background-color: var(--code-bg-color);
+    color: var(--secondary-color);
+    position: relative;
+    z-index: 10;
+    margin-top: 2em;
+
+    /* Apply margin-left to clear the fixed sidebar */
+    margin-left: var(--sidebar-width);
+
+    /* Constrain width relative to the centered grid it follows */
+    max-width: calc(var(--content-max-width) - var(--sidebar-width));
+    margin-right: auto; /* Keep it left-aligned within the space next to sidebar */
+
+    /* Use padding consistent with the grid */
+    padding: 2em calc(var(--global-space) * 2);
+}
+
+/* Adjust footer grid if needed */
+.terminal-mkdocs-footer-grid {
+    display: grid;
+    grid-template-columns: 1fr auto;
+    gap: 1em;
+    align-items: center;
+}
+
+/* ==========================================================================
+   RESPONSIVENESS (Adapting the Non-Fluid Layout)
+   ========================================================================== */
+
+/* --- Medium screens: Hide ToC --- */
+@media screen and (max-width: 1200px) {
+    #toc-sidebar {
+        display: none;
+    }
+
+    .terminal-mkdocs-main-grid {
+        /* Grid adjusts automatically as ToC is removed */
+        /* Ensure grid padding remains */
+         padding-left: calc(var(--global-space) * 2);
+         padding-right: calc(var(--global-space) * 2);
+    }
+
+    #terminal-mkdocs-main-content {
+        /* Content area naturally expands */
+    }
+
+    footer {
+        /* Footer still respects the left sidebar and overall max width */
+        margin-left: var(--sidebar-width);
+        max-width: calc(var(--content-max-width) - var(--sidebar-width));
+        /* Padding remains consistent */
+         padding-left: calc(var(--global-space) * 2);
+         padding-right: calc(var(--global-space) * 2);
+    }
+}
+
+/* --- Small screens: Hide left sidebar, full width content & footer --- */
+@media screen and (max-width: 768px) {
+
+    #terminal-mkdocs-side-panel {
+        left: calc(-1 * var(--sidebar-width));
+        z-index: 1100;
+        box-shadow: 2px 0 10px rgba(0,0,0,0.3);
+    }
+    #terminal-mkdocs-side-panel.sidebar-visible {
+        left: 0;
+    }
+
+    .terminal-mkdocs-main-grid {
+        /* Grid now takes full width (minus body padding) */
+        margin-left: 0; /* Override sidebar margin */
+        margin-right: 0; /* Override auto margin */
+        max-width: 100%; /* Allow full width */
+        padding-left: var(--global-space); /* Reduce padding */
+        padding-right: var(--global-space);
+    }
+
+    #terminal-mkdocs-main-content {
+        padding: 1.5em 1em; /* Adjust internal padding */
+    }
+
+    footer {
+        margin-left: 0; /* Full width footer */
+        max-width: 100%; /* Allow full width */
+        padding: 2em 1em; /* Adjust internal padding */
+    }
+
+    .terminal-mkdocs-footer-grid {
+         grid-template-columns: 1fr; /* Stack footer items */
+         text-align: center;
+         gap: 0.5em;
+    }
+    /* Remember JS for toggle button & overlay */
+}
+
+
+/* ==== GitHub Stats Badge Styling ==== */
+
+.github-stats-badge {
+    display: inline-block; /* Or flex if needed */
+    margin-left: 2em; /* Adjust spacing */
+    vertical-align: middle; /* Align with other header items */
+    font-size: 0.9em; /* Slightly smaller font */
+}
+
+.github-stats-badge a {
+    color: var(--secondary-color); /* Use secondary color */
+    text-decoration: none;
+    display: flex; /* Use flex for alignment */
+    align-items: center;
+    gap: 0.8em; /* Space between items */
+    padding: 0.2em 0.5em;
+    border: 1px solid var(--progress-bar-background); /* Subtle border */
+    border-radius: 4px;
+    transition: color 0.2s, background-color 0.2s;
+}
+
+.github-stats-badge a:hover {
+    color: var(--font-color); /* Brighter color on hover */
+    background-color: var(--progress-bar-background); /* Subtle background on hover */
+}
+
+.github-stats-badge .repo-name {
+    color: var(--font-color); /* Make repo name stand out slightly */
+    font-weight: 500; /* Optional bolder weight */
+}
+
+.github-stats-badge .stat {
+    /* Styles for individual stats (version, stars, forks) */
+    white-space: nowrap; /* Prevent wrapping */
+}
+
+.github-stats-badge .stat i {
+    /* Optional: Style for FontAwesome icons */
+    margin-right: 0.3em;
+    color: var(--secondary-dimmed-color); /* Dimmer color for icons */
+}
+
+
+/* Adjust positioning relative to search/nav if needed */
+/* Example: If search is floated right */
+/* .terminal-nav { float: left; } */
+/* .github-stats-badge { float: left; } */
+/* #mkdocs-search-query { float: right; } */
+
+/* --- Responsive adjustments --- */
+@media screen and (max-width: 900px) { /* Example breakpoint */
+    .github-stats-badge .repo-name {
+        display: none; /* Hide full repo name on smaller screens */
+    }
+    .github-stats-badge {
+        margin-left: 1em;
+    }
+     .github-stats-badge a {
+        gap: 0.5em;
+    }
+}
+@media screen and (max-width: 768px) {
+    /* Further hide or simplify on mobile if needed */
+     .github-stats-badge {
+        display: none; /* Example: Hide completely on smallest screens */
+     }
+}
+
+/* --- Ask AI Selection Button --- */
+.ask-ai-selection-button {
+    background-color: var(--primary-dimmed-color, #09b5a5);
+    color: var(--background-color, #070708);
+    border: none;
+    padding: 4px 8px;
+    font-size: 0.8em;
+    border-radius: 4px;
+    cursor: pointer;
+    box-shadow: 0 2px 5px rgba(0, 0, 0, 0.3);
+    transition: background-color 0.2s ease;
+    white-space: nowrap;
+}
+
+.ask-ai-selection-button:hover {
+    background-color: var(--primary-color, #50ffff);
+}
+
+/* ==== File: docs/assets/layout.css (Additions) ==== */
+
+/* ... (keep all existing layout CSS) ... */
+
+/* --- Copy Code Button Styling --- */
+
+/* Ensure the parent <pre> can contain the absolutely positioned button */
+#terminal-mkdocs-main-content pre {
+    position: relative; /* Needed for absolute positioning of child */
+    /* Add a little padding top/right to make space for the button */
+    padding-top: 2.5em;
+    padding-right: 1em; /* Ensure padding is sufficient */
+}
+
+.copy-code-button {
+    position: absolute;
+    top: 0.5em; /* Adjust spacing from top */
+    left: 0.5em; /* Adjust spacing from left */
+    z-index: 1; /* Sit on top of code */
+
+    background-color: var(--progress-bar-background, #444); /* Use a background */
+    color: var(--font-color, #eaeaea);
+    border: 1px solid var(--secondary-color, #727578);
+    padding: 3px 8px;
+    font-size: 0.8em;
+    font-family: var(--font-stack, monospace);
+    border-radius: 4px;
+    cursor: pointer;
+    opacity: 0; /* Hidden by default */
+    transition: opacity 0.2s ease-in-out, background-color 0.2s ease, color 0.2s ease;
+    white-space: nowrap;
+}
+
+/* Show button on hover of the <pre> container */
+#terminal-mkdocs-main-content pre:hover .copy-code-button {
+    opacity: 0.8; /* Show partially */
+}
+
+.copy-code-button:hover {
+    opacity: 1; /* Fully visible on button hover */
+    background-color: var(--secondary-color, #727578);
+}
+
+.copy-code-button:focus {
+     opacity: 1; /* Ensure visible when focused */
+     outline: 1px dashed var(--primary-color);
+}
+
+
+/* Style for "Copied!" state */
+.copy-code-button.copied {
+    background-color: var(--primary-dimmed-color, #09b5a5);
+    color: var(--background-color, #070708);
+    border-color: var(--primary-dimmed-color, #09b5a5);
+    opacity: 1; /* Ensure visible */
+}
+.copy-code-button.copied:hover {
+     background-color: var(--primary-dimmed-color, #09b5a5); /* Prevent hover change */
+}
+
+/* ==== File: docs/assets/layout.css (Additions) ==== */
+
+/* ... (keep all existing layout CSS) ... */
+
+/* --- Floating Ask AI Button --- */
+.floating-ask-ai-button {
+    position: fixed;
+    bottom: 25px;
+    right: 25px;
+    z-index: 1050; /* Below modals, above most content */
+
+    background-color: var(--primary-dimmed-color, #09b5a5);
+    color: var(--background-color, #070708);
+    border: none;
+    border-radius: 50%; /* Make it circular */
+    width: 60px; /* Adjust size */
+    height: 60px; /* Adjust size */
+    padding: 10px; /* Adjust padding */
+    box-shadow: 0 4px 10px rgba(0, 0, 0, 0.4);
+    cursor: pointer;
+    transition: background-color 0.2s ease, transform 0.2s ease;
+
+    display: flex;
+    flex-direction: column; /* Stack icon and text */
+    align-items: center;
+    justify-content: center;
+    text-decoration: none;
+    text-align: center;
+}
+
+.floating-ask-ai-button svg {
+    width: 24px; /* Control icon size */
+    height: 24px;
+}
+
+.floating-ask-ai-button span {
+    font-size: 0.7em;
+    margin-top: 2px; /* Space between icon and text */
+    display: block; /* Ensure it takes space */
+     line-height: 1;
+}
+
+
+.floating-ask-ai-button:hover {
+    background-color: var(--primary-color, #50ffff);
+    transform: scale(1.05); /* Slight grow effect */
+}
+
+.floating-ask-ai-button:focus {
+     outline: 2px solid var(--primary-color);
+     outline-offset: 2px;
+}
+
+/* Optional: Hide text on smaller screens if needed */
+@media screen and (max-width: 768px) {
+     .floating-ask-ai-button span {
+        /* display: none; */ /* Uncomment to hide text */
+     }
+     .floating-ask-ai-button {
+        width: 55px;
+        height: 55px;
+        bottom: 20px;
+        right: 20px;
+     }
+}
--- a/docs/md_v2/assets/selection_ask_ai.js
+++ b/docs/md_v2/assets/selection_ask_ai.js
@@ -0,0 +1,109 @@
+// ==== File: docs/assets/selection_ask_ai.js ====
+
+document.addEventListener('DOMContentLoaded', () => {
+    let askAiButton = null;
+    const askAiPageUrl = '/core/ask-ai/'; // Adjust if your Ask AI page path is different
+
+    function createAskAiButton() {
+        const button = document.createElement('button');
+        button.id = 'ask-ai-selection-btn';
+        button.className = 'ask-ai-selection-button';
+        button.textContent = 'Ask AI'; // Or use an icon
+        button.style.display = 'none'; // Initially hidden
+        button.style.position = 'absolute';
+        button.style.zIndex = '1500'; // Ensure it's on top
+        document.body.appendChild(button);
+
+        button.addEventListener('click', handleAskAiClick);
+        return button;
+    }
+
+    function getSafeSelectedText() {
+        const selection = window.getSelection();
+        if (!selection || selection.rangeCount === 0) {
+            return null;
+        }
+        // Avoid selecting text within the button itself if it was somehow selected
+        const container = selection.getRangeAt(0).commonAncestorContainer;
+        if (askAiButton && askAiButton.contains(container)) {
+             return null;
+        }
+
+        const text = selection.toString().trim();
+        return text.length > 0 ? text : null;
+    }
+
+    function positionButton(event) {
+         const selection = window.getSelection();
+         if (!selection || selection.rangeCount === 0 || selection.isCollapsed) {
+             hideButton();
+             return;
+         }
+
+        const range = selection.getRangeAt(0);
+        const rect = range.getBoundingClientRect();
+
+        // Calculate position: top-right of the selection
+        const scrollX = window.scrollX;
+        const scrollY = window.scrollY;
+        const buttonTop = rect.top + scrollY - askAiButton.offsetHeight - 5; // 5px above
+        const buttonLeft = rect.right + scrollX + 5; // 5px to the right
+
+        askAiButton.style.top = `${buttonTop}px`;
+        askAiButton.style.left = `${buttonLeft}px`;
+        askAiButton.style.display = 'block'; // Show the button
+    }
+
+    function hideButton() {
+        if (askAiButton) {
+            askAiButton.style.display = 'none';
+        }
+    }
+
+    function handleAskAiClick(event) {
+        event.stopPropagation(); // Prevent mousedown from hiding button immediately
+        const selectedText = getSafeSelectedText();
+        if (selectedText) {
+            console.log("Selected Text:", selectedText);
+            // Base64 encode for URL safety (handles special chars, line breaks)
+            // Use encodeURIComponent first for proper Unicode handling before btoa
+            const encodedText = btoa(unescape(encodeURIComponent(selectedText)));
+            const targetUrl = `${askAiPageUrl}?qq=${encodedText}`;
+            console.log("Navigating to:", targetUrl);
+            window.location.href = targetUrl; // Navigate to Ask AI page
+        }
+        hideButton(); // Hide after click
+    }
+
+    // --- Event Listeners ---
+
+    // Show button on mouse up after selection
+    document.addEventListener('mouseup', (event) => {
+        // Slight delay to ensure selection is registered
+        setTimeout(() => {
+            const selectedText = getSafeSelectedText();
+            if (selectedText) {
+                if (!askAiButton) {
+                    askAiButton = createAskAiButton();
+                }
+                // Don't position if the click was ON the button itself
+                if (event.target !== askAiButton) {
+                     positionButton(event);
+                }
+            } else {
+                hideButton();
+            }
+        }, 10); // Small delay
+    });
+
+    // Hide button on scroll or click elsewhere
+    document.addEventListener('mousedown', (event) => {
+        // Hide if clicking anywhere EXCEPT the button itself
+        if (askAiButton && event.target !== askAiButton) {
+            hideButton();
+        }
+    });
+    document.addEventListener('scroll', hideButton, true); // Capture scroll events
+
+    console.log("Selection Ask AI script loaded.");
+});
--- a/docs/md_v2/assets/styles.css
+++ b/docs/md_v2/assets/styles.css
@@ -6,8 +6,8 @@
 }

 :root {
-    --global-font-size: 16px;
-    --global-code-font-size: 16px;
+    --global-font-size: 14px;
+    --global-code-font-size: 13px;
    --global-line-height: 1.5em;
    --global-space: 10px;
    --font-stack: Menlo, Monaco, Lucida Console, Liberation Mono, DejaVu Sans Mono, Bitstream Vera Sans Mono,
@@ -50,8 +50,17 @@
    --display-h1-decoration: none;

    --display-h1-decoration: none;
+
+    --header-height: 65px; /* Adjust based on your actual header height */
+    --sidebar-width: 280px; /* Adjust based on your desired sidebar width */
+    --toc-width: 240px; /* Adjust based on your desired ToC width */
+    --layout-transition-speed: 0.2s; /* For potential future animations */
+
+    --page-width : 100em; /* Adjust based on your design */
 }

+
+
 /* body {
    background-color: var(--background-color);
    color: var(--font-color);
@@ -256,4 +265,6 @@ div.badges a {
 }
 div.badges a > img {
    width: auto;
-}
+}
+
+
--- a/docs/md_v2/assets/toc.js
+++ b/docs/md_v2/assets/toc.js
@@ -0,0 +1,144 @@
+// ==== File: assets/toc.js ====
+
+document.addEventListener('DOMContentLoaded', () => {
+    const mainContent = document.getElementById('terminal-mkdocs-main-content');
+    const tocContainer = document.getElementById('toc-sidebar');
+    const mainGrid = document.querySelector('.terminal-mkdocs-main-grid'); // Get the flex container
+
+    if (!mainContent) {
+        console.warn("TOC Generator: Main content area '#terminal-mkdocs-main-content' not found.");
+        return;
+    }
+
+    // --- Create ToC container if it doesn't exist ---
+    let tocElement = tocContainer;
+    if (!tocElement) {
+        if (!mainGrid) {
+            console.warn("TOC Generator: Flex container '.terminal-mkdocs-main-grid' not found to append ToC.");
+            return;
+        }
+        tocElement = document.createElement('aside');
+        tocElement.id = 'toc-sidebar';
+        tocElement.style.display = 'none'; // Keep hidden initially
+        // Append it as the last child of the flex grid
+        mainGrid.appendChild(tocElement);
+        console.info("TOC Generator: Created '#toc-sidebar' element.");
+    }
+
+    // --- Find Headings (h2, h3, h4 are common for ToC) ---
+    const headings = mainContent.querySelectorAll('h2, h3, h4');
+    if (headings.length === 0) {
+        console.info("TOC Generator: No headings found on this page. ToC not generated.");
+        tocElement.style.display = 'none'; // Ensure it's hidden
+        return;
+    }
+
+    // --- Generate ToC List ---
+    const tocList = document.createElement('ul');
+    const observerTargets = []; // Store headings for IntersectionObserver
+
+    headings.forEach((heading, index) => {
+        // Ensure heading has an ID for linking
+        if (!heading.id) {
+            // Create a simple slug-like ID
+            heading.id = `toc-heading-${index}-${heading.textContent.toLowerCase().replace(/\s+/g, '-').replace(/[^a-z0-9-]/g, '')}`;
+        }
+
+        const listItem = document.createElement('li');
+        const link = document.createElement('a');
+
+        link.href = `#${heading.id}`;
+        link.textContent = heading.textContent;
+
+        // Add class for styling based on heading level
+        const level = parseInt(heading.tagName.substring(1), 10); // Get 2, 3, or 4
+        listItem.classList.add(`toc-level-${level}`);
+
+        listItem.appendChild(link);
+        tocList.appendChild(listItem);
+        observerTargets.push(heading); // Add to observer list
+    });
+
+    // --- Populate and Show ToC ---
+    // Optional: Add a title
+    const tocTitle = document.createElement('h4');
+    tocTitle.textContent = 'On this page'; // Customize title if needed
+
+    tocElement.innerHTML = ''; // Clear previous content if any
+    tocElement.appendChild(tocTitle);
+    tocElement.appendChild(tocList);
+    tocElement.style.display = ''; // Show the ToC container
+
+    console.info(`TOC Generator: Generated ToC with ${headings.length} items.`);
+
+    // --- Scroll Spy using Intersection Observer ---
+    const tocLinks = tocElement.querySelectorAll('a');
+    let activeLink = null; // Keep track of the current active link
+
+    const observerOptions = {
+        // Observe changes relative to the viewport, offset by the header height
+        // Negative top margin pushes the intersection trigger point down
+        // Negative bottom margin ensures elements low on the screen can trigger before they exit
+        rootMargin: `-${getComputedStyle(document.documentElement).getPropertyValue('--header-height').trim()} 0px -60% 0px`,
+        threshold: 0 // Trigger as soon as any part enters/exits the boundary
+    };
+
+    const observerCallback = (entries) => {
+        let topmostVisibleHeading = null;
+
+        entries.forEach(entry => {
+            const link = tocElement.querySelector(`a[href="#${entry.target.id}"]`);
+            if (!link) return;
+
+            // Check if the heading is intersecting (partially or fully visible within rootMargin)
+            if (entry.isIntersecting) {
+                 // Among visible headings, find the one closest to the top edge (within the rootMargin)
+                if (!topmostVisibleHeading || entry.boundingClientRect.top < topmostVisibleHeading.boundingClientRect.top) {
+                    topmostVisibleHeading = entry.target;
+                 }
+            }
+        });
+
+        // If we found a topmost visible heading, activate its link
+        if (topmostVisibleHeading) {
+            const newActiveLink = tocElement.querySelector(`a[href="#${topmostVisibleHeading.id}"]`);
+            if (newActiveLink && newActiveLink !== activeLink) {
+                 // Remove active class from previous link
+                 if (activeLink) {
+                     activeLink.classList.remove('active');
+                     activeLink.parentElement.classList.remove('active-parent'); // Optional parent styling
+                 }
+                 // Add active class to the new link
+                 newActiveLink.classList.add('active');
+                 newActiveLink.parentElement.classList.add('active-parent'); // Optional parent styling
+                 activeLink = newActiveLink;
+
+                 // Optional: Scroll the ToC sidebar to keep the active link visible
+                 // newActiveLink.scrollIntoView({ behavior: 'smooth', block: 'nearest' });
+            }
+        }
+        // If no headings are intersecting (scrolled past the last one?), maybe deactivate all
+        // Or keep the last one active - depends on desired behavior. Current logic keeps last active.
+    };
+
+    const observer = new IntersectionObserver(observerCallback, observerOptions);
+
+    // Observe all target headings
+    observerTargets.forEach(heading => observer.observe(heading));
+
+    // Initial check in case a heading is already in view on load
+    // (Requires slight delay for accurate layout calculation)
+    setTimeout(() => {
+        observerCallback(observer.takeRecords()); // Process initial state
+    }, 100);
+
+    // move footer and the hr before footer to the end of the main content
+    const footer = document.querySelector('footer');
+    const hr = footer.previousElementSibling;
+    if (hr && hr.tagName === 'HR') {
+        mainContent.appendChild(hr);
+    }
+    mainContent.appendChild(footer);
+    console.info("TOC Generator: Footer moved to the end of the main content.");
+
+});
--- a/docs/md_v2/blog/releases/0.5.0.md
+++ b/docs/md_v2/blog/releases/0.5.0.md
@@ -251,7 +251,7 @@ from crawl4ai import (
    RoundRobinProxyStrategy,
 )
 import asyncio
-from crawl4ai.proxy_strategy import ProxyConfig
+from crawl4ai import ProxyConfig
 async def main():
    # Load proxies and create rotation strategy
    proxies = ProxyConfig.from_env()
--- a/docs/md_v2/core/ask-ai.md
+++ b/docs/md_v2/core/ask-ai.md
@@ -0,0 +1,74 @@
+<div class="ask-ai-container">
+<iframe id="ask-ai-frame" src="../../ask_ai/index.html" width="100%" style="border:none; display: block;" title="Crawl4AI Assistant"></iframe>
+</div>
+
+<script>
+// Iframe height adjustment
+function resizeAskAiIframe() {
+  const iframe = document.getElementById('ask-ai-frame');
+  if (iframe) {
+    const headerHeight = parseFloat(getComputedStyle(document.documentElement).getPropertyValue('--header-height') || '55');
+    // Footer is removed by JS below, so calculate height based on header + small buffer
+    const topOffset = headerHeight + 20; // Header + buffer/margin
+
+    const availableHeight = window.innerHeight - topOffset;
+    iframe.style.height = Math.max(600, availableHeight) + 'px'; // Min height 600px
+  }
+}
+
+// Run immediately and on resize/load
+resizeAskAiIframe(); // Initial call
+let resizeTimer;
+window.addEventListener('load', resizeAskAiIframe);
+window.addEventListener('resize', () => {
+    clearTimeout(resizeTimer);
+    resizeTimer = setTimeout(resizeAskAiIframe, 150);
+});
+
+// Remove Footer & HR from parent page (DOM Ready might be safer)
+document.addEventListener('DOMContentLoaded', () => {
+    setTimeout(() => { // Add slight delay just in case elements render slowly
+        const footer = window.parent.document.querySelector('footer'); // Target parent document
+        if (footer) {
+            const hrBeforeFooter = footer.previousElementSibling;
+            if (hrBeforeFooter && hrBeforeFooter.tagName === 'HR') {
+                hrBeforeFooter.remove();
+            }
+            footer.remove();
+            // Trigger resize again after removing footer
+            resizeAskAiIframe();
+        } else {
+             console.warn("Ask AI Page: Could not find footer in parent document to remove.");
+        }
+    }, 100); // Shorter delay
+});
+</script>
+
+<style>
+#terminal-mkdocs-main-content {
+    padding: 0 !important;
+    margin: 0;
+    width: 100%;
+    height: 100%;
+    overflow: hidden; /* Prevent body scrollbars, panels handle scroll */
+}
+
+/* Ensure iframe container takes full space */
+#terminal-mkdocs-main-content .ask-ai-container {
+    /* Remove negative margins if footer removal handles space */
+     margin: 0;
+    padding: 0;
+    max-width: none;
+    /* Let the JS set the height */
+    /* height: 600px; Initial fallback height */
+    overflow: hidden; /* Hide potential overflow before JS resize */
+}
+
+/* Hide title/paragraph if they were part of the markdown */
+/* Alternatively, just remove them from the .md file directly */
+/* #terminal-mkdocs-main-content > h1,
+#terminal-mkdocs-main-content > p:first-of-type {
+    display: none;
+} */
+
+</style>
--- a/docs/md_v2/core/docker-deployment.md
+++ b/docs/md_v2/core/docker-deployment.md
--- a/docs/tutorials/coming_soon.md
+++ b/docs/tutorials/coming_soon.md
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -7,10 +7,11 @@ docs_dir: docs/md_v2

 nav:
  - Home: 'index.md'
+  - "Ask AI": "core/ask-ai.md"
+  - "Quick Start": "core/quickstart.md"
  - Setup & Installation:
    - "Installation": "core/installation.md"
    - "Docker Deployment": "core/docker-deployment.md"
-  - "Quick Start": "core/quickstart.md"
  - "Blog & Changelog":
    - "Blog Home": "blog/index.md"
    - "Changelog": "https://github.com/unclecode/crawl4ai/blob/main/CHANGELOG.md"
@@ -76,6 +77,7 @@ extra:
  version: !ENV [CRAWL4AI_VERSION, 'development']

 extra_css:
+  - assets/layout.css
  - assets/styles.css
  - assets/highlight.css
  - assets/dmvendor.css
@@ -83,4 +85,9 @@ extra_css:
 extra_javascript:
  - assets/highlight.min.js
  - assets/highlight_init.js
-  - https://buttons.github.io/buttons.js
+  - https://buttons.github.io/buttons.js
+  - assets/toc.js
+  - assets/github_stats.js 
+  - assets/selection_ask_ai.js
+  - assets/copy_code.js
+  - assets/floating_ask_ai_button.js
--- a/parameter_updates.txt
+++ b/parameter_updates.txt
@@ -1,20 +0,0 @@
-The file /docs/md_v2/api/parameters.md should be updated to include the new network and console capturing parameters. 
-
-Here's what needs to be updated:
-
-1. Change section title from:
-```
-### G) **Debug & Logging**
-```
-to:
-```
-### G) **Debug, Logging & Capturing**
-```
-
-2. Add new parameters to the table:
-```
-| **`capture_network_requests`** | `bool` (False) | Captures all network requests, responses, and failures during the crawl. Available in `result.network_requests`. |
-| **`capture_console_messages`** | `bool` (False) | Captures all browser console messages (logs, warnings, errors) during the crawl. Available in `result.console_messages`. |
-```
-
-These changes demonstrate how to use the new network and console capturing features in the CrawlerRunConfig.
--- a/tests/docker/test_rest_api_deep_crawl.py
+++ b/tests/docker/test_rest_api_deep_crawl.py
@@ -0,0 +1,596 @@
+# ==== File: test_rest_api_deep_crawl.py ====
+
+import pytest
+import pytest_asyncio
+import httpx
+import json
+import asyncio
+import os
+from typing import List, Dict, Any, AsyncGenerator
+
+from dotenv import load_dotenv
+load_dotenv() # Load environment variables from .env file if present
+
+# --- Test Configuration ---
+BASE_URL = os.getenv("CRAWL4AI_TEST_URL", "http://localhost:11235") # If server is running in Docker, use the host's IP
+BASE_URL = os.getenv("CRAWL4AI_TEST_URL", "http://localhost:8020") # If server is running in dev debug mode
+DEEP_CRAWL_BASE_URL = "https://docs.crawl4ai.com/samples/deepcrawl/"
+DEEP_CRAWL_DOMAIN = "docs.crawl4ai.com" # Used for domain filter
+
+# --- Helper Functions ---
+def load_proxies_from_env() -> List[Dict]:
+    """Load proxies from PROXIES environment variable"""
+    proxies = []
+    proxies_str = os.getenv("PROXIES", "")
+    if not proxies_str:
+        print("PROXIES environment variable not set or empty.")
+        return proxies
+    try:
+        proxy_list = proxies_str.split(",")
+        for proxy in proxy_list:
+            proxy = proxy.strip()
+            if not proxy:
+                continue
+            parts = proxy.split(":")
+            if len(parts) == 4:
+                ip, port, username, password = parts
+                proxies.append({
+                    "server": f"http://{ip}:{port}", # Assuming http, adjust if needed
+                    "username": username,
+                    "password": password,
+                    "ip": ip  # Store original IP if available
+                })
+            elif len(parts) == 2: # ip:port only
+                 ip, port = parts
+                 proxies.append({
+                    "server": f"http://{ip}:{port}",
+                    "ip": ip
+                 })
+            else:
+                 print(f"Skipping invalid proxy string format: {proxy}")
+
+    except Exception as e:
+        print(f"Error loading proxies from environment: {e}")
+    return proxies
+
+
+async def check_server_health(client: httpx.AsyncClient):
+    """Check if the server is healthy before running tests."""
+    try:
+        response = await client.get("/health")
+        response.raise_for_status()
+        print(f"\nServer healthy: {response.json()}")
+        return True
+    except (httpx.RequestError, httpx.HTTPStatusError) as e:
+        pytest.fail(f"Server health check failed: {e}. Is the server running at {BASE_URL}?", pytrace=False)
+
+async def assert_crawl_result_structure(result: Dict[str, Any], check_ssl=False):
+    """Asserts the basic structure of a single crawl result."""
+    assert isinstance(result, dict)
+    assert "url" in result
+    assert "success" in result
+    assert "html" in result # Basic crawls should return HTML
+    assert "metadata" in result
+    assert isinstance(result["metadata"], dict)
+    assert "depth" in result["metadata"] # Deep crawls add depth
+
+    if check_ssl:
+        assert "ssl_certificate" in result # Check if SSL info is present
+        assert isinstance(result["ssl_certificate"], dict) or result["ssl_certificate"] is None
+
+
+async def process_streaming_response(response: httpx.Response) -> List[Dict[str, Any]]:
+    """Processes an NDJSON streaming response."""
+    results = []
+    completed = False
+    async for line in response.aiter_lines():
+        if line:
+            try:
+                data = json.loads(line)
+                if data.get("status") == "completed":
+                    completed = True
+                    break # Stop processing after completion marker
+                elif data.get("url"): # Ensure it looks like a result object
+                    results.append(data)
+                else:
+                    print(f"Received non-result JSON line: {data}") # Log other status messages if needed
+            except json.JSONDecodeError:
+                pytest.fail(f"Failed to decode JSON line: {line}")
+    assert completed, "Streaming response did not end with a completion marker."
+    return results
+
+
+# --- Pytest Fixtures ---
+@pytest_asyncio.fixture(scope="function")
+async def async_client() -> AsyncGenerator[httpx.AsyncClient, None]:
+    """Provides an async HTTP client"""
+    # Increased timeout for potentially longer deep crawls
+    async with httpx.AsyncClient(base_url=BASE_URL, timeout=300.0) as client:
+        yield client
+    # No explicit close needed with 'async with'
+
+# --- Test Class ---
+@pytest.mark.asyncio
+class TestDeepCrawlEndpoints:
+
+    @pytest_asyncio.fixture(autouse=True)
+    async def check_health_before_tests(self, async_client: httpx.AsyncClient):
+        """Fixture to ensure server is healthy before each test in the class."""
+        await check_server_health(async_client)
+
+    # 1. Basic Deep Crawl
+    async def test_deep_crawl_basic_bfs(self, async_client: httpx.AsyncClient):
+        """Test BFS deep crawl with limited depth and pages."""
+        max_depth = 1
+        max_pages = 3 # start_url + 2 more
+        payload = {
+            "urls": [DEEP_CRAWL_BASE_URL],
+            "browser_config": {"type": "BrowserConfig", "params": {"headless": True}},
+            "crawler_config": {
+                "type": "CrawlerRunConfig",
+                "params": {
+                    "stream": False,
+                    "cache_mode": "BYPASS", # Use string value for CacheMode
+                    "deep_crawl_strategy": {
+                        "type": "BFSDeepCrawlStrategy",
+                        "params": {
+                            "max_depth": max_depth,
+                            "max_pages": max_pages,
+                            # Minimal filters for basic test
+                            "filter_chain": {
+                                "type": "FilterChain",
+                                "params": {
+                                    "filters": [
+                                        {
+                                            "type": "DomainFilter",
+                                            "params": {"allowed_domains": [DEEP_CRAWL_DOMAIN]}
+                                        }
+                                    ]
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        response = await async_client.post("/crawl", json=payload)
+        response.raise_for_status()
+        data = response.json()
+
+        assert data["success"] is True
+        assert isinstance(data["results"], list)
+        assert len(data["results"]) > 1 # Should be more than just the start URL
+        assert len(data["results"]) <= max_pages # Respect max_pages
+
+        found_depth_0 = False
+        found_depth_1 = False
+        for result in data["results"]:
+            await assert_crawl_result_structure(result)
+            assert result["success"] is True
+            assert DEEP_CRAWL_DOMAIN in result["url"]
+            depth = result["metadata"]["depth"]
+            assert depth <= max_depth
+            if depth == 0: found_depth_0 = True
+            if depth == 1: found_depth_1 = True
+
+        assert found_depth_0
+        assert found_depth_1
+
+    # 2. Deep Crawl with Filtering
+    async def test_deep_crawl_with_filters(self, async_client: httpx.AsyncClient):
+        """Test BFS deep crawl with content type and domain filters."""
+        max_depth = 1
+        max_pages = 5
+        payload = {
+            "urls": [DEEP_CRAWL_BASE_URL],
+            "browser_config": {"type": "BrowserConfig", "params": {"headless": True}},
+            "crawler_config": {
+                "type": "CrawlerRunConfig",
+                "params": {
+                    "stream": False,
+                    "cache_mode": "BYPASS",
+                    "deep_crawl_strategy": {
+                        "type": "BFSDeepCrawlStrategy",
+                        "params": {
+                            "max_depth": max_depth,
+                            "max_pages": max_pages,
+                            "filter_chain": {
+                                "type": "FilterChain",
+                                "params": {
+                                    "filters": [
+                                        {
+                                            "type": "DomainFilter",
+                                            "params": {"allowed_domains": [DEEP_CRAWL_DOMAIN]}
+                                        },
+                                        {
+                                            "type": "ContentTypeFilter",
+                                            "params": {"allowed_types": ["text/html"]}
+                                        },
+                                        # Example: Exclude specific paths using regex
+                                        {
+                                            "type": "URLPatternFilter",
+                                             "params": {
+                                                 "patterns": ["*/category-3/*"], # Block category 3
+                                                 "reverse": True # Block if match
+                                             }
+                                        }
+                                    ]
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        response = await async_client.post("/crawl", json=payload)
+        response.raise_for_status()
+        data = response.json()
+
+        assert data["success"] is True
+        assert len(data["results"]) > 0
+        assert len(data["results"]) <= max_pages
+
+        for result in data["results"]:
+            await assert_crawl_result_structure(result)
+            assert result["success"] is True
+            assert DEEP_CRAWL_DOMAIN in result["url"]
+            assert "category-3" not in result["url"] # Check if filter worked
+            assert result["metadata"]["depth"] <= max_depth
+
+    # 3. Deep Crawl with Scoring
+    async def test_deep_crawl_with_scoring(self, async_client: httpx.AsyncClient):
+        """Test BFS deep crawl with URL scoring."""
+        max_depth = 1
+        max_pages = 4
+        payload = {
+            "urls": [DEEP_CRAWL_BASE_URL],
+            "browser_config": {"type": "BrowserConfig", "params": {"headless": True}},
+            "crawler_config": {
+                "type": "CrawlerRunConfig",
+                "params": {
+                    "stream": False,
+                    "cache_mode": "BYPASS",
+                    "deep_crawl_strategy": {
+                        "type": "BFSDeepCrawlStrategy",
+                        "params": {
+                            "max_depth": max_depth,
+                            "max_pages": max_pages,
+                            "filter_chain": { # Keep basic domain filter
+                                "type": "FilterChain",
+                                "params": { "filters": [{"type": "DomainFilter", "params": {"allowed_domains": [DEEP_CRAWL_DOMAIN]}}]}
+                            },
+                            "url_scorer": { # Add scorer
+                                "type": "CompositeScorer",
+                                "params": {
+                                    "scorers": [
+                                        {   # Favor pages with 'product' in the URL
+                                            "type": "KeywordRelevanceScorer",
+                                            "params": {"keywords": ["product"], "weight": 1.0}
+                                        },
+                                        {   # Penalize deep paths slightly
+                                            "type": "PathDepthScorer",
+                                            "params": {"optimal_depth": 2, "weight": -0.2}
+                                        }
+                                    ]
+                                }
+                            },
+                            # Set a threshold if needed: "score_threshold": 0.1
+                        }
+                    }
+                }
+            }
+        }
+        response = await async_client.post("/crawl", json=payload)
+        response.raise_for_status()
+        data = response.json()
+
+        assert data["success"] is True
+        assert len(data["results"]) > 0
+        assert len(data["results"]) <= max_pages
+
+        # Check if results seem biased towards products (harder to assert strictly without knowing exact scores)
+        product_urls_found = any("product_" in result["url"] for result in data["results"] if result["metadata"]["depth"] > 0)
+        print(f"Product URLs found among depth > 0 results: {product_urls_found}")
+        # We expect scoring to prioritize product pages if available within limits
+        # assert product_urls_found # This might be too strict depending on site structure and limits
+
+        for result in data["results"]:
+            await assert_crawl_result_structure(result)
+            assert result["success"] is True
+            assert result["metadata"]["depth"] <= max_depth
+
+    # 4. Deep Crawl with CSS Extraction
+    async def test_deep_crawl_with_css_extraction(self, async_client: httpx.AsyncClient):
+        """Test BFS deep crawl combined with JsonCssExtractionStrategy."""
+        max_depth = 6 # Go deep enough to reach product pages
+        max_pages = 20
+        # Schema to extract product details
+        product_schema = {
+            "name": "ProductDetails",
+            "baseSelector": "div.container", # Base for product page
+            "fields": [
+                {"name": "product_title", "selector": "h1", "type": "text"},
+                {"name": "price", "selector": ".product-price", "type": "text"},
+                {"name": "description", "selector": ".product-description p", "type": "text"},
+                {"name": "specs", "selector": ".product-specs li", "type": "list", "fields":[
+                     {"name": "spec_name", "selector": ".spec-name", "type": "text"},
+                     {"name": "spec_value", "selector": ".spec-value", "type": "text"}
+                ]}
+            ]
+        }
+        payload = {
+            "urls": [DEEP_CRAWL_BASE_URL],
+            "browser_config": {"type": "BrowserConfig", "params": {"headless": True}},
+            "crawler_config": {
+                "type": "CrawlerRunConfig",
+                "params": {
+                    "stream": False,
+                    "cache_mode": "BYPASS",
+                    "extraction_strategy": { # Apply extraction to ALL crawled pages
+                        "type": "JsonCssExtractionStrategy",
+                        "params": {"schema": {"type": "dict", "value": product_schema}}
+                    },
+                    "deep_crawl_strategy": {
+                        "type": "BFSDeepCrawlStrategy",
+                        "params": {
+                            "max_depth": max_depth,
+                            "max_pages": max_pages,
+                            "filter_chain": { # Only crawl HTML on our domain
+                                "type": "FilterChain",
+                                "params": {
+                                    "filters": [
+                                        {"type": "DomainFilter", "params": {"allowed_domains": [DEEP_CRAWL_DOMAIN]}},
+                                        {"type": "ContentTypeFilter", "params": {"allowed_types": ["text/html"]}}
+                                    ]
+                                }
+                            }
+                            # Optional: Add scoring to prioritize product pages for extraction
+                        }
+                    }
+                }
+            }
+        }
+        response = await async_client.post("/crawl", json=payload)
+        response.raise_for_status()
+        data = response.json()
+
+        assert data["success"] is True
+        assert len(data["results"]) > 0
+        # assert len(data["results"]) <= max_pages
+
+        found_extracted_product = False
+        for result in data["results"]:
+            await assert_crawl_result_structure(result)
+            assert result["success"] is True
+            assert "extracted_content" in result
+            if "product_" in result["url"]: # Check product pages specifically
+                 assert result["extracted_content"] is not None
+                 try:
+                     extracted = json.loads(result["extracted_content"])
+                     # Schema returns list even if one base match
+                     assert isinstance(extracted, list)
+                     if extracted:
+                         item = extracted[0]
+                         assert "product_title" in item and item["product_title"]
+                         assert "price" in item and item["price"]
+                         # Specs might be empty list if not found
+                         assert "specs" in item and isinstance(item["specs"], list)
+                         found_extracted_product = True
+                         print(f"Extracted product: {item.get('product_title')}")
+                 except (json.JSONDecodeError, AssertionError, IndexError) as e:
+                      pytest.fail(f"Extraction validation failed for {result['url']}: {e}\nContent: {result['extracted_content']}")
+            # else:
+            #      # Non-product pages might have None or empty list depending on schema match
+            #      assert result["extracted_content"] is None or json.loads(result["extracted_content"]) == []
+
+        assert found_extracted_product, "Did not find any pages where product data was successfully extracted."
+
+    # 5. Deep Crawl with LLM Extraction (Requires Server LLM Setup)
+    async def test_deep_crawl_with_llm_extraction(self, async_client: httpx.AsyncClient):
+        """Test BFS deep crawl combined with LLMExtractionStrategy."""
+        max_depth = 1 # Limit depth to keep LLM calls manageable
+        max_pages = 3
+        payload = {
+            "urls": [DEEP_CRAWL_BASE_URL],
+            "browser_config": {"type": "BrowserConfig", "params": {"headless": True}},
+            "crawler_config": {
+                "type": "CrawlerRunConfig",
+                "params": {
+                    "stream": False,
+                    "cache_mode": "BYPASS",
+                    "extraction_strategy": { # Apply LLM extraction to crawled pages
+                        "type": "LLMExtractionStrategy",
+                        "params": {
+                            "instruction": "Extract the main H1 title and the text content of the first paragraph.",
+                            "llm_config": { # Example override, rely on server default if possible
+                               "type": "LLMConfig",
+                               "params": {"provider": "openai/gpt-4.1-mini"} # Use a cheaper model for testing
+                            },
+                             "schema": { # Expected JSON output
+                                "type": "dict",
+                                "value": {
+                                    "title": "PageContent", "type": "object",
+                                    "properties": {
+                                        "h1_title": {"type": "string"},
+                                        "first_paragraph": {"type": "string"}
+                                    }
+                                }
+                            }
+                        }
+                    },
+                    "deep_crawl_strategy": {
+                        "type": "BFSDeepCrawlStrategy",
+                        "params": {
+                            "max_depth": max_depth,
+                            "max_pages": max_pages,
+                            "filter_chain": {
+                                "type": "FilterChain",
+                                "params": {
+                                    "filters": [
+                                        {"type": "DomainFilter", "params": {"allowed_domains": [DEEP_CRAWL_DOMAIN]}},
+                                        {"type": "ContentTypeFilter", "params": {"allowed_types": ["text/html"]}}
+                                    ]
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        try:
+            response = await async_client.post("/crawl", json=payload)
+            response.raise_for_status()
+            data = response.json()
+        except httpx.HTTPStatusError as e:
+            pytest.fail(f"Deep Crawl + LLM extraction request failed: {e}. Response: {e.response.text}. Check server logs and LLM API key setup.")
+        except httpx.RequestError as e:
+             pytest.fail(f"Deep Crawl + LLM extraction request failed: {e}.")
+
+
+        assert data["success"] is True
+        assert len(data["results"]) > 0
+        assert len(data["results"]) <= max_pages
+
+        found_llm_extraction = False
+        for result in data["results"]:
+            await assert_crawl_result_structure(result)
+            assert result["success"] is True
+            assert "extracted_content" in result
+            assert result["extracted_content"] is not None
+            try:
+                extracted = json.loads(result["extracted_content"])
+                if isinstance(extracted, list): extracted = extracted[0] # Handle list output
+                assert isinstance(extracted, dict)
+                assert "h1_title" in extracted # Check keys based on schema
+                assert "first_paragraph" in extracted
+                found_llm_extraction = True
+                print(f"LLM extracted from {result['url']}: Title='{extracted.get('h1_title')}'")
+            except (json.JSONDecodeError, AssertionError, IndexError, TypeError) as e:
+                pytest.fail(f"LLM extraction validation failed for {result['url']}: {e}\nContent: {result['extracted_content']}")
+
+        assert found_llm_extraction, "LLM extraction did not yield expected data on any crawled page."
+
+
+    # 6. Deep Crawl with SSL Certificate Fetching
+    async def test_deep_crawl_with_ssl(self, async_client: httpx.AsyncClient):
+        """Test BFS deep crawl with fetch_ssl_certificate enabled."""
+        max_depth = 0 # Only fetch for start URL to keep test fast
+        max_pages = 1
+        payload = {
+            "urls": [DEEP_CRAWL_BASE_URL],
+            "browser_config": {"type": "BrowserConfig", "params": {"headless": True}},
+            "crawler_config": {
+                "type": "CrawlerRunConfig",
+                "params": {
+                    "stream": False,
+                    "cache_mode": "BYPASS",
+                    "fetch_ssl_certificate": True, # <-- Enable SSL fetching
+                    "deep_crawl_strategy": {
+                        "type": "BFSDeepCrawlStrategy",
+                        "params": {
+                            "max_depth": max_depth,
+                            "max_pages": max_pages,
+                        }
+                    }
+                }
+            }
+        }
+        response = await async_client.post("/crawl", json=payload)
+        response.raise_for_status()
+        data = response.json()
+
+        assert data["success"] is True
+        assert len(data["results"]) == 1
+        result = data["results"][0]
+
+        await assert_crawl_result_structure(result, check_ssl=True) # <-- Tell helper to check SSL field
+        assert result["success"] is True
+                # Check if SSL info was actually retrieved
+        if result["ssl_certificate"]:
+            # Assert directly using dictionary keys
+            assert isinstance(result["ssl_certificate"], dict) # Verify it's a dict
+            assert "issuer" in result["ssl_certificate"]
+            assert "subject" in result["ssl_certificate"]
+            # --- MODIFIED ASSERTIONS ---
+            assert "not_before" in result["ssl_certificate"] # Check for the actual key
+            assert "not_after" in result["ssl_certificate"]  # Check for the actual key
+            # --- END MODIFICATIONS ---
+            assert "fingerprint" in result["ssl_certificate"] # Check another key
+
+            # This print statement using .get() already works correctly with dictionaries
+            print(f"SSL Issuer Org: {result['ssl_certificate'].get('issuer', {}).get('O', 'N/A')}")
+            print(f"SSL Valid From: {result['ssl_certificate'].get('not_before', 'N/A')}")
+        else:
+            # This part remains the same
+            print("SSL Certificate was null in the result.")
+
+
+    # 7. Deep Crawl with Proxy Rotation (Requires PROXIES env var)
+    async def test_deep_crawl_with_proxies(self, async_client: httpx.AsyncClient):
+        """Test BFS deep crawl using proxy rotation."""
+        proxies = load_proxies_from_env()
+        if not proxies:
+            pytest.skip("Skipping proxy test: PROXIES environment variable not set or empty.")
+
+        print(f"\nTesting with {len(proxies)} proxies loaded from environment.")
+
+        max_depth = 1
+        max_pages = 3
+        payload = {
+            "urls": [DEEP_CRAWL_BASE_URL], # Use the dummy site
+             # Use a BrowserConfig that *might* pick up proxy if set, but rely on CrawlerRunConfig
+            "browser_config": {"type": "BrowserConfig", "params": {"headless": True}},
+            "crawler_config": {
+                "type": "CrawlerRunConfig",
+                "params": {
+                    "stream": False,
+                    "cache_mode": "BYPASS",
+                    "proxy_rotation_strategy": { # <-- Define the strategy
+                        "type": "RoundRobinProxyStrategy",
+                        "params": {
+                             # Convert ProxyConfig dicts back to the serialized format expected by server
+                             "proxies": [{"type": "ProxyConfig", "params": p} for p in proxies]
+                        }
+                    },
+                    "deep_crawl_strategy": {
+                        "type": "BFSDeepCrawlStrategy",
+                        "params": {
+                            "max_depth": max_depth,
+                            "max_pages": max_pages,
+                            "filter_chain": {
+                                "type": "FilterChain",
+                                "params": { "filters": [{"type": "DomainFilter", "params": {"allowed_domains": [DEEP_CRAWL_DOMAIN]}}]}
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        try:
+            response = await async_client.post("/crawl", json=payload)
+            response.raise_for_status()
+            data = response.json()
+        except httpx.HTTPStatusError as e:
+            # Proxies often cause connection errors, catch them
+            pytest.fail(f"Proxy deep crawl failed: {e}. Response: {e.response.text}. Are proxies valid and accessible by the server?")
+        except httpx.RequestError as e:
+             pytest.fail(f"Proxy deep crawl request failed: {e}. Are proxies valid and accessible?")
+
+        assert data["success"] is True
+        assert len(data["results"]) > 0
+        assert len(data["results"]) <= max_pages
+        # Primary assertion is that the crawl succeeded *with* proxy config
+        print(f"Proxy deep crawl completed successfully for {len(data['results'])} pages.")
+
+        # Verifying specific proxy usage requires server logs or custom headers/responses
+
+
+# --- Main Execution Block (for running script directly) ---
+if __name__ == "__main__":
+    pytest_args = ["-v", "-s", __file__]
+    # Example: Run only proxy test
+    # pytest_args.append("-k test_deep_crawl_with_proxies")
+    print(f"Running pytest with args: {pytest_args}")
+    exit_code = pytest.main(pytest_args)
+    print(f"Pytest finished with exit code: {exit_code}")
--- a/tests/docker/test_server_requests.py
+++ b/tests/docker/test_server_requests.py
@@ -0,0 +1,655 @@
+import pytest
+import pytest_asyncio
+import httpx
+import json
+import asyncio
+import os
+from typing import List, Dict, Any, AsyncGenerator
+
+from dotenv import load_dotenv
+load_dotenv()
+
+
+# Optional: Import crawl4ai classes directly for reference/easier payload creation aid
+# You don't strictly NEED these imports for the tests to run against the server,
+# but they help in understanding the structure you are mimicking in JSON.
+from crawl4ai import (
+    BrowserConfig,
+    CrawlerRunConfig,
+    CacheMode,
+    DefaultMarkdownGenerator,
+    PruningContentFilter,
+    BM25ContentFilter,
+    BFSDeepCrawlStrategy,
+    FilterChain,
+    ContentTypeFilter,
+    DomainFilter,
+    CompositeScorer,
+    KeywordRelevanceScorer,
+    PathDepthScorer,
+    JsonCssExtractionStrategy,
+    LLMExtractionStrategy,
+    LLMConfig
+)
+
+# --- Test Configuration ---
+# BASE_URL = os.getenv("CRAWL4AI_TEST_URL", "http://localhost:8020") # Make base URL configurable
+BASE_URL = os.getenv("CRAWL4AI_TEST_URL", "http://localhost:11235") # Make base URL configurable
+# Use a known simple HTML page for basic tests
+SIMPLE_HTML_URL = "https://httpbin.org/html"
+# Use a site suitable for scraping tests
+SCRAPE_TARGET_URL = "http://books.toscrape.com/"
+# Use a site with internal links for deep crawl tests
+DEEP_CRAWL_URL = "https://python.org"
+
+# --- Pytest Fixtures ---
+
+# Use the built-in event_loop fixture from pytest_asyncio
+# The custom implementation was causing issues with closing the loop
+
+@pytest_asyncio.fixture(scope="function")  # Changed to function scope to avoid event loop issues
+async def async_client() -> AsyncGenerator[httpx.AsyncClient, None]:
+    """Provides an async HTTP client"""
+    client = httpx.AsyncClient(base_url=BASE_URL, timeout=120.0)
+    yield client
+    await client.aclose()
+
+# --- Helper Functions ---
+
+async def check_server_health(client: httpx.AsyncClient):
+    """Check if the server is healthy before running tests."""
+    try:
+        response = await client.get("/health")
+        response.raise_for_status()
+        print(f"\nServer healthy: {response.json()}")
+        return True
+    except (httpx.RequestError, httpx.HTTPStatusError) as e:
+        pytest.fail(f"Server health check failed: {e}. Is the server running at {BASE_URL}?", pytrace=False)
+
+async def assert_crawl_result_structure(result: Dict[str, Any]):
+    """Asserts the basic structure of a single crawl result."""
+    assert isinstance(result, dict)
+    assert "url" in result
+    assert "success" in result
+    assert "html" in result
+    # Add more common checks if needed
+
+async def process_streaming_response(response: httpx.Response) -> List[Dict[str, Any]]:
+    """Processes an NDJSON streaming response."""
+    results = []
+    completed = False
+    async for line in response.aiter_lines():
+        if line:
+            try:
+                data = json.loads(line)
+                if data.get("status") == "completed":
+                    completed = True
+                    break # Stop processing after completion marker
+                else:
+                    results.append(data)
+            except json.JSONDecodeError:
+                pytest.fail(f"Failed to decode JSON line: {line}")
+    assert completed, "Streaming response did not end with a completion marker."
+    return results
+
+
+# --- Test Class ---
+
+@pytest.mark.asyncio
+class TestCrawlEndpoints:
+
+    @pytest_asyncio.fixture(autouse=True)
+    async def check_health_before_tests(self, async_client: httpx.AsyncClient):
+        """Fixture to ensure server is healthy before each test in the class."""
+        await check_server_health(async_client)
+
+    # 1. Simple Requests (Primitives)
+    async def test_simple_crawl_single_url(self, async_client: httpx.AsyncClient):
+        """Test /crawl with a single URL and simple config values."""
+        payload = {
+            "urls": [SIMPLE_HTML_URL],
+            "browser_config": {
+                "type": "BrowserConfig",
+                "params": {
+                    "headless": True,
+                }
+            },
+            "crawler_config": {
+                "type": "CrawlerRunConfig",
+                "params": {
+                    "stream": False, # Explicitly false for /crawl
+                    "screenshot": False,
+                    "cache_mode": CacheMode.BYPASS.value # Use enum value
+                }
+            }
+        }
+        try:
+            response = await async_client.post("/crawl", json=payload)
+            print(f"Response status: {response.status_code}")
+            response.raise_for_status()
+            data = response.json()
+        except httpx.HTTPStatusError as e:
+            print(f"Server error: {e}")
+            print(f"Response content: {e.response.text}")
+            raise
+
+        assert data["success"] is True
+        assert isinstance(data["results"], list)
+        assert len(data["results"]) == 1
+        result = data["results"][0]
+        await assert_crawl_result_structure(result)
+        assert result["success"] is True
+        assert result["url"] == SIMPLE_HTML_URL
+        assert "<h1>Herman Melville - Moby-Dick</h1>" in result["html"]
+        # We don't specify a markdown generator in this test, so don't make assumptions about markdown field
+        # It might be null, missing, or populated depending on the server's default behavior
+
+    async def test_simple_crawl_single_url_streaming(self, async_client: httpx.AsyncClient):
+        """Test /crawl/stream with a single URL and simple config values."""
+        payload = {
+            "urls": [SIMPLE_HTML_URL],
+            "browser_config": {
+                "type": "BrowserConfig",
+                "params": {
+                    "headless": True,
+                }
+            },
+            "crawler_config": {
+                "type": "CrawlerRunConfig",
+                "params": {
+                    "stream": True, # Must be true for /crawl/stream
+                    "screenshot": False,
+                    "cache_mode": CacheMode.BYPASS.value
+                }
+            }
+        }
+        async with async_client.stream("POST", "/crawl/stream", json=payload) as response:
+            response.raise_for_status()
+            results = await process_streaming_response(response)
+
+        assert len(results) == 1
+        result = results[0]
+        await assert_crawl_result_structure(result)
+        assert result["success"] is True
+        assert result["url"] == SIMPLE_HTML_URL
+        assert "<h1>Herman Melville - Moby-Dick</h1>" in result["html"]
+
+
+    # 2. Multi-URL and Dispatcher
+    async def test_multi_url_crawl(self, async_client: httpx.AsyncClient):
+        """Test /crawl with multiple URLs, implicitly testing dispatcher."""
+        urls = [SIMPLE_HTML_URL, "https://httpbin.org/links/10/0"]
+        payload = {
+            "urls": urls,
+            "browser_config": {
+                "type": "BrowserConfig",
+                "params": {"headless": True}
+            },
+            "crawler_config": {
+                "type": "CrawlerRunConfig",
+                "params": {"stream": False, "cache_mode": CacheMode.BYPASS.value}
+            }
+        }
+        try:
+            print(f"Sending deep crawl request to server...")
+            response = await async_client.post("/crawl", json=payload)
+            print(f"Response status: {response.status_code}")
+            
+            if response.status_code >= 400:
+                error_detail = response.json().get('detail', 'No detail provided')
+                print(f"Error detail: {error_detail}")
+                print(f"Full response: {response.text}")
+            
+            response.raise_for_status()
+            data = response.json()
+        except httpx.HTTPStatusError as e:
+            print(f"Server error status: {e.response.status_code}")
+            print(f"Server error response: {e.response.text}")
+            try:
+                error_json = e.response.json()
+                print(f"Parsed error: {error_json}")
+            except:
+                print("Could not parse error response as JSON")
+            raise
+
+        assert data["success"] is True
+        assert isinstance(data["results"], list)
+        assert len(data["results"]) == len(urls)
+        for result in data["results"]:
+            await assert_crawl_result_structure(result)
+            assert result["success"] is True
+            assert result["url"] in urls
+
+    async def test_multi_url_crawl_streaming(self, async_client: httpx.AsyncClient):
+        """Test /crawl/stream with multiple URLs."""
+        urls = [SIMPLE_HTML_URL, "https://httpbin.org/links/10/0"]
+        payload = {
+            "urls": urls,
+            "browser_config": {
+                "type": "BrowserConfig",
+                "params": {"headless": True}
+            },
+            "crawler_config": {
+                "type": "CrawlerRunConfig",
+                "params": {"stream": True, "cache_mode": CacheMode.BYPASS.value}
+            }
+        }
+        async with async_client.stream("POST", "/crawl/stream", json=payload) as response:
+            response.raise_for_status()
+            results = await process_streaming_response(response)
+
+        assert len(results) == len(urls)
+        processed_urls = set()
+        for result in results:
+            await assert_crawl_result_structure(result)
+            assert result["success"] is True
+            assert result["url"] in urls
+            processed_urls.add(result["url"])
+        assert processed_urls == set(urls) # Ensure all URLs were processed
+
+
+    # 3. Class Values and Nested Classes (Markdown Generator)
+    async def test_crawl_with_markdown_pruning_filter(self, async_client: httpx.AsyncClient):
+        """Test /crawl with MarkdownGenerator using PruningContentFilter."""
+        payload = {
+            "urls": [SIMPLE_HTML_URL],
+            "browser_config": {"type": "BrowserConfig", "params": {"headless": True}},
+            "crawler_config": {
+                "type": "CrawlerRunConfig",
+                "params": {
+                    "cache_mode": CacheMode.ENABLED.value, # Test different cache mode
+                    "markdown_generator": {
+                        "type": "DefaultMarkdownGenerator",
+                        "params": {
+                            "content_filter": {
+                                "type": "PruningContentFilter",
+                                "params": {
+                                    "threshold": 0.5, # Example param
+                                    "threshold_type": "relative"
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        try:
+            print(f"Sending deep crawl request to server...")
+            response = await async_client.post("/crawl", json=payload)
+            print(f"Response status: {response.status_code}")
+            
+            if response.status_code >= 400:
+                error_detail = response.json().get('detail', 'No detail provided')
+                print(f"Error detail: {error_detail}")
+                print(f"Full response: {response.text}")
+            
+            response.raise_for_status()
+            data = response.json()
+        except httpx.HTTPStatusError as e:
+            print(f"Server error status: {e.response.status_code}")
+            print(f"Server error response: {e.response.text}")
+            try:
+                error_json = e.response.json()
+                print(f"Parsed error: {error_json}")
+            except:
+                print("Could not parse error response as JSON")
+            raise
+
+        assert data["success"] is True
+        assert len(data["results"]) == 1
+        result = data["results"][0]
+        await assert_crawl_result_structure(result)
+        assert result["success"] is True
+        assert "markdown" in result
+        assert isinstance(result["markdown"], dict)
+        assert "raw_markdown" in result["markdown"]
+        assert "fit_markdown" in result["markdown"] # Pruning creates fit_markdown
+        assert "Moby-Dick" in result["markdown"]["raw_markdown"]
+        # Fit markdown content might be different/shorter due to pruning
+        assert len(result["markdown"]["fit_markdown"]) <= len(result["markdown"]["raw_markdown"])
+
+    async def test_crawl_with_markdown_bm25_filter(self, async_client: httpx.AsyncClient):
+        """Test /crawl with MarkdownGenerator using BM25ContentFilter."""
+        payload = {
+            "urls": [SIMPLE_HTML_URL],
+            "browser_config": {"type": "BrowserConfig", "params": {"headless": True}},
+            "crawler_config": {
+                "type": "CrawlerRunConfig",
+                "params": {
+                    "markdown_generator": {
+                        "type": "DefaultMarkdownGenerator",
+                        "params": {
+                            "content_filter": {
+                                "type": "BM25ContentFilter",
+                                "params": {
+                                    "user_query": "Herman Melville", # Query for BM25
+                                    "bm25_threshold": 0.1, # Lower threshold to increase matches
+                                    "language": "english"  # Valid parameters
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        try:
+            print(f"Payload for BM25 test: {json.dumps(payload)}")
+            response = await async_client.post("/crawl", json=payload)
+            print(f"Response status: {response.status_code}")
+            
+            if response.status_code >= 400:
+                error_detail = response.json().get('detail', 'No detail provided')
+                print(f"Error detail: {error_detail}")
+                print(f"Full response: {response.text}")
+            
+            response.raise_for_status()
+            data = response.json()
+        except httpx.HTTPStatusError as e:
+            print(f"Server error status: {e.response.status_code}")
+            print(f"Server error response: {e.response.text}")
+            try:
+                error_json = e.response.json()
+                print(f"Parsed error: {error_json}")
+            except:
+                print("Could not parse error response as JSON")
+            raise
+
+        assert data["success"] is True
+        assert len(data["results"]) == 1
+        result = data["results"][0]
+        await assert_crawl_result_structure(result)
+        assert result["success"] is True
+        assert "markdown" in result
+        assert isinstance(result["markdown"], dict)
+        assert "raw_markdown" in result["markdown"]
+        assert "fit_markdown" in result["markdown"] # BM25 creates fit_markdown
+        
+        # Print values for debug
+        print(f"Raw markdown length: {len(result['markdown']['raw_markdown'])}")
+        print(f"Fit markdown length: {len(result['markdown']['fit_markdown'])}")
+        
+        # Either fit_markdown has content (possibly including our query terms)
+        # or it might be empty if no good BM25 matches were found
+        # Don't assert specific content since it can be environment-dependent
+
+
+    # 4. Deep Crawling
+    async def test_deep_crawl(self, async_client: httpx.AsyncClient):
+        """Test /crawl with a deep crawl strategy."""
+        payload = {
+            "urls": [DEEP_CRAWL_URL], # Start URL
+            "browser_config": {"type": "BrowserConfig", "params": {"headless": True}},
+            "crawler_config": {
+                "type": "CrawlerRunConfig",
+                "params": {
+                    "stream": False,
+                    "cache_mode": CacheMode.BYPASS.value,
+                    "deep_crawl_strategy": {
+                        "type": "BFSDeepCrawlStrategy",
+                        "params": {
+                            "max_depth": 1, # Limit depth for testing speed
+                            "max_pages": 5, # Limit pages to crawl
+                            "filter_chain": {
+                                "type": "FilterChain",
+                                "params": {
+                                    "filters": [
+                                        {
+                                            "type": "ContentTypeFilter",
+                                            "params": {"allowed_types": ["text/html"]}
+                                        },
+                                        {
+                                            "type": "DomainFilter",
+                                            "params": {"allowed_domains": ["python.org", "docs.python.org"]} # Include important subdomains
+                                        }
+                                    ]
+                                }
+                            },
+                            "url_scorer": {
+                                "type": "CompositeScorer",
+                                "params": {
+                                    "scorers": [
+                                        {
+                                            "type": "KeywordRelevanceScorer",
+                                            "params": {"keywords": ["documentation", "tutorial"]}
+                                        },
+                                        {
+                                            "type": "PathDepthScorer",
+                                            "params": {"weight": 0.5, "optimal_depth": 2}
+                                        }
+                                    ]
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        try:
+            print(f"Sending deep crawl request to server...")
+            response = await async_client.post("/crawl", json=payload)
+            print(f"Response status: {response.status_code}")
+            
+            if response.status_code >= 400:
+                error_detail = response.json().get('detail', 'No detail provided')
+                print(f"Error detail: {error_detail}")
+                print(f"Full response: {response.text}")
+            
+            response.raise_for_status()
+            data = response.json()
+        except httpx.HTTPStatusError as e:
+            print(f"Server error status: {e.response.status_code}")
+            print(f"Server error response: {e.response.text}")
+            try:
+                error_json = e.response.json()
+                print(f"Parsed error: {error_json}")
+            except:
+                print("Could not parse error response as JSON")
+            raise
+
+        assert data["success"] is True
+        assert isinstance(data["results"], list)
+        # Expect more than 1 result due to deep crawl (start URL + crawled links)
+        assert len(data["results"]) > 1
+        assert len(data["results"]) <= 6 # Start URL + max_links=5
+
+        start_url_found = False
+        crawled_urls_found = False
+        for result in data["results"]:
+            await assert_crawl_result_structure(result)
+            assert result["success"] is True
+            
+            # Print URL for debugging
+            print(f"Crawled URL: {result['url']}")
+            
+            # Allow URLs that contain python.org (including subdomains like docs.python.org)
+            assert "python.org" in result["url"]
+            if result["url"] == DEEP_CRAWL_URL:
+                start_url_found = True
+            else:
+                crawled_urls_found = True
+
+        assert start_url_found
+        assert crawled_urls_found
+
+
+    # 5. Extraction without LLM (JSON/CSS)
+    async def test_json_css_extraction(self, async_client: httpx.AsyncClient):
+        """Test /crawl with JsonCssExtractionStrategy."""
+        payload = {
+            "urls": [SCRAPE_TARGET_URL],
+            "browser_config": {"type": "BrowserConfig", "params": {"headless": True}},
+            "crawler_config": {
+                "type": "CrawlerRunConfig",
+                "params": {
+                    "cache_mode": CacheMode.BYPASS.value,
+                    "extraction_strategy": {
+                        "type": "JsonCssExtractionStrategy",
+                        "params": {
+                            "schema": { 
+                                "type": "dict", # IMPORTANT: Wrap schema dict with type/value structure
+                                "value": {
+                                    "name": "BookList",
+                                    "baseSelector": "ol.row li.col-xs-6", # Select each book item
+                                    "fields": [
+                                        {"name": "title", "selector": "article.product_pod h3 a", "type": "attribute", "attribute": "title"},
+                                        {"name": "price", "selector": "article.product_pod .price_color", "type": "text"},
+                                        {"name": "rating", "selector": "article.product_pod p.star-rating", "type": "attribute", "attribute": "class"}
+                                    ]
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        try:
+            print(f"Sending deep crawl request to server...")
+            response = await async_client.post("/crawl", json=payload)
+            print(f"Response status: {response.status_code}")
+            
+            if response.status_code >= 400:
+                error_detail = response.json().get('detail', 'No detail provided')
+                print(f"Error detail: {error_detail}")
+                print(f"Full response: {response.text}")
+            
+            response.raise_for_status()
+            data = response.json()
+        except httpx.HTTPStatusError as e:
+            print(f"Server error status: {e.response.status_code}")
+            print(f"Server error response: {e.response.text}")
+            try:
+                error_json = e.response.json()
+                print(f"Parsed error: {error_json}")
+            except:
+                print("Could not parse error response as JSON")
+            raise
+
+        assert data["success"] is True
+        assert len(data["results"]) == 1
+        result = data["results"][0]
+        await assert_crawl_result_structure(result)
+        assert result["success"] is True
+        assert "extracted_content" in result
+        assert result["extracted_content"] is not None
+
+        # Extracted content should be a JSON string representing a list of dicts
+        try:
+            extracted_data = json.loads(result["extracted_content"])
+            assert isinstance(extracted_data, list)
+            assert len(extracted_data) > 0 # Should find some books
+            # Check structure of the first extracted item
+            first_item = extracted_data[0]
+            assert "title" in first_item
+            assert "price" in first_item
+            assert "rating" in first_item
+            assert "star-rating" in first_item["rating"] # e.g., "star-rating Three"
+        except (json.JSONDecodeError, AssertionError) as e:
+            pytest.fail(f"Extracted content parsing or validation failed: {e}\nContent: {result['extracted_content']}")
+
+
+    # 6. Extraction with LLM
+    async def test_llm_extraction(self, async_client: httpx.AsyncClient):
+        """
+        Test /crawl with LLMExtractionStrategy.
+        NOTE: Requires the server to have appropriate LLM API keys (e.g., OPENAI_API_KEY)
+              configured via .llm.env or environment variables.
+              This test uses the default provider configured in the server's config.yml.
+        """
+        payload = {
+            "urls": [SIMPLE_HTML_URL],
+            "browser_config": {"type": "BrowserConfig", "params": {"headless": True}},
+            "crawler_config": {
+                "type": "CrawlerRunConfig",
+                "params": {
+                    "cache_mode": CacheMode.BYPASS.value,
+                    "extraction_strategy": {
+                        "type": "LLMExtractionStrategy",
+                        "params": {
+                            "instruction": "Extract the main title and the author mentioned in the text into JSON.",
+                            # LLMConfig is implicitly defined by server's config.yml and .llm.env
+                            # If you needed to override provider/token PER REQUEST:
+                            "llm_config": {
+                               "type": "LLMConfig",
+                               "params": {
+                                  "provider": "openai/gpt-4o", # Example override
+                                  "api_token": os.getenv("OPENAI_API_KEY") # Example override
+                               }
+                            },
+                            "schema": { # Optional: Provide a schema for structured output
+                                "type": "dict", # IMPORTANT: Wrap schema dict
+                                "value": {
+                                    "title": "Book Info",
+                                    "type": "object",
+                                    "properties": {
+                                        "title": {"type": "string", "description": "The main title of the work"},
+                                        "author": {"type": "string", "description": "The author of the work"}
+                                    },
+                                     "required": ["title", "author"]
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        try:
+            response = await async_client.post("/crawl", json=payload)
+            response.raise_for_status() # Will raise if server returns 500 (e.g., bad API key)
+            data = response.json()
+        except httpx.HTTPStatusError as e:
+            # Catch potential server errors (like 500 due to missing/invalid API keys)
+            pytest.fail(f"LLM extraction request failed: {e}. Response: {e.response.text}. Check server logs and ensure API keys are correctly configured for the server.")
+        except httpx.RequestError as e:
+             pytest.fail(f"LLM extraction request failed: {e}.")
+
+        assert data["success"] is True
+        assert len(data["results"]) == 1
+        result = data["results"][0]
+        await assert_crawl_result_structure(result)
+        assert result["success"] is True
+        assert "extracted_content" in result
+        assert result["extracted_content"] is not None
+
+        # Extracted content should be JSON (because we provided a schema)
+        try:
+            extracted_data = json.loads(result["extracted_content"])
+            print(f"\nLLM Extracted Data: {extracted_data}") # Print for verification
+            
+            # Handle both dict and list formats (server returns a list)
+            if isinstance(extracted_data, list):
+                assert len(extracted_data) > 0
+                extracted_item = extracted_data[0]  # Take first item
+                assert isinstance(extracted_item, dict)
+                assert "title" in extracted_item
+                assert "author" in extracted_item
+                assert "Moby-Dick" in extracted_item.get("title", "")
+                assert "Herman Melville" in extracted_item.get("author", "")
+            else:
+                assert isinstance(extracted_data, dict)
+                assert "title" in extracted_data
+                assert "author" in extracted_data
+                assert "Moby-Dick" in extracted_data.get("title", "")
+                assert "Herman Melville" in extracted_data.get("author", "")
+        except (json.JSONDecodeError, AssertionError) as e:
+            pytest.fail(f"LLM extracted content parsing or validation failed: {e}\nContent: {result['extracted_content']}")
+        except Exception as e: # Catch any other unexpected error
+            pytest.fail(f"An unexpected error occurred during LLM result processing: {e}\nContent: {result['extracted_content']}")
+            
+if __name__ == "__main__":
+    # Define arguments for pytest programmatically
+    # -v: verbose output
+    # -s: show print statements immediately (useful for debugging)
+    # __file__: tells pytest to run tests in the current file
+    pytest_args = ["-v", "-s", __file__]
+
+    # You can add more pytest arguments here if needed, for example:
+    # '-k test_llm_extraction': Run only the LLM test function
+    # pytest_args.append("-k test_llm_extraction")
+
+    print(f"Running pytest with args: {pytest_args}")
+
+    # Execute pytest
+    exit_code = pytest.main(pytest_args)
+
+    print(f"Pytest finished with exit code: {exit_code}")
--- a/tests/general/generate_dummy_site.py
+++ b/tests/general/generate_dummy_site.py
@@ -0,0 +1,335 @@
+# ==== File: build_dummy_site.py ====
+
+import os
+import random
+import argparse
+from pathlib import Path
+from urllib.parse import quote
+
+# --- Configuration ---
+NUM_CATEGORIES = 3
+NUM_SUBCATEGORIES_PER_CAT = 2 # Results in NUM_CATEGORIES * NUM_SUBCATEGORIES_PER_CAT total L2 categories
+NUM_PRODUCTS_PER_SUBCAT = 5 # Products listed on L3 pages
+MAX_DEPTH_TARGET = 5 # Explicitly set target depth
+
+# --- Helper Functions ---
+
+def generate_lorem(words=20):
+    """Generates simple placeholder text."""
+    lorem_words = ["lorem", "ipsum", "dolor", "sit", "amet", "consectetur",
+                   "adipiscing", "elit", "sed", "do", "eiusmod", "tempor",
+                   "incididunt", "ut", "labore", "et", "dolore", "magna", "aliqua"]
+    return " ".join(random.choice(lorem_words) for _ in range(words)).capitalize() + "."
+
+def create_html_page(filepath: Path, title: str, body_content: str, breadcrumbs: list = [], head_extras: str = ""):
+    """Creates an HTML file with basic structure and inline CSS."""
+    os.makedirs(filepath.parent, exist_ok=True)
+
+    # Generate breadcrumb HTML using the 'link' provided in the breadcrumbs list
+    breadcrumb_html = ""
+    if breadcrumbs:
+        links_html = " » ".join(f'<a href="{bc["link"]}">{bc["name"]}</a>' for bc in breadcrumbs)
+        breadcrumb_html = f"<nav class='breadcrumbs'>{links_html} » {title}</nav>"
+
+    # Basic CSS for structure identification (kept the same)
+    css = """
+<style>
+  body {
+    font-family: sans-serif;
+    padding: 20px;
+    background-color: #1e1e1e;
+    color: #d1d1d1;
+  }
+
+  .container {
+    max-width: 960px;
+    margin: auto;
+    background: #2c2c2c;
+    padding: 20px;
+    border-radius: 5px;
+    box-shadow: 0 2px 5px rgba(0, 0, 0, 0.5);
+  }
+
+  h1, h2 {
+    color: #ccc;
+  }
+
+  a {
+    color: #9bcdff;
+    text-decoration: none;
+  }
+
+  a:hover {
+    text-decoration: underline;
+  }
+
+  ul {
+    list-style: none;
+    padding-left: 0;
+  }
+
+  li {
+    margin-bottom: 10px;
+  }
+
+  .category-link,
+  .subcategory-link,
+  .product-link,
+  .details-link,
+  .reviews-link {
+    display: block;
+    padding: 8px;
+    background-color: #3a3a3a;
+    border-radius: 3px;
+  }
+
+  .product-preview {
+    border: 1px solid #444;
+    padding: 10px;
+    margin-bottom: 10px;
+    border-radius: 4px;
+    background-color: #2a2a2a;
+  }
+
+  .product-title {
+    color: #d1d1d1;
+  }
+
+  .product-price {
+    font-weight: bold;
+    color: #85e085;
+  }
+
+  .product-description,
+  .product-specs,
+  .product-reviews {
+    margin-top: 15px;
+    line-height: 1.6;
+  }
+
+  .product-specs li {
+    margin-bottom: 5px;
+    font-size: 0.9em;
+  }
+
+  .spec-name {
+    font-weight: bold;
+  }
+
+  .breadcrumbs {
+    margin-bottom: 20px;
+    font-size: 0.9em;
+    color: #888;
+  }
+
+  .breadcrumbs a {
+    color: #9bcdff;
+  }
+</style>
+    """
+    html_content = f"""<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>{title} - FakeShop</title>
+    {head_extras}
+    {css}
+</head>
+<body>
+    <div class="container">
+        {breadcrumb_html}
+        <h1>{title}</h1>
+        {body_content}
+    </div>
+</body>
+</html>"""
+    with open(filepath, "w", encoding="utf-8") as f:
+        f.write(html_content)
+    # Keep print statement concise for clarity
+    # print(f"Created: {filepath}")
+
+def generate_site(base_dir: Path, site_name: str = "FakeShop", base_path: str = ""):
+    """Generates the dummy website structure."""
+    base_dir.mkdir(parents=True, exist_ok=True)
+
+    # --- Clean and prepare the base path for URL construction ---
+    # Ensure it starts with '/' if not empty, and remove any trailing '/'
+    if base_path:
+        full_base_path = "/" + base_path.strip('/')
+    else:
+        full_base_path = "" # Represents the root
+
+    print(f"Using base path for links: '{full_base_path}'")
+
+    # --- Level 0: Homepage ---
+    home_body = "<h2>Welcome to FakeShop!</h2><p>Your one-stop shop for imaginary items.</p><h3>Categories:</h3>\n<ul>"
+    # Define the *actual* link path for the homepage breadcrumb
+    home_link_path = f"{full_base_path}/index.html"
+    breadcrumbs_home = [{"name": "Home", "link": home_link_path}] # Base breadcrumb
+
+    # Links *within* the page content should remain relative
+    for i in range(NUM_CATEGORIES):
+        cat_name = f"Category-{i+1}"
+        cat_folder_name = quote(cat_name.lower().replace(" ", "-"))
+        # This path is relative to the current directory (index.html)
+        cat_relative_page_path = f"{cat_folder_name}/index.html"
+        home_body += f'<li><a class="category-link" href="{cat_relative_page_path}">{cat_name}</a> - {generate_lorem(10)}</li>'
+    home_body += "</ul>"
+    create_html_page(base_dir / "index.html", "Homepage", home_body, []) # No breadcrumbs *on* the homepage itself
+
+    # --- Levels 1-5 ---
+    for i in range(NUM_CATEGORIES):
+        cat_name = f"Category-{i+1}"
+        cat_folder_name = quote(cat_name.lower().replace(" ", "-"))
+        cat_dir = base_dir / cat_folder_name
+        # This is the *absolute* path for the breadcrumb link
+        cat_link_path = f"{full_base_path}/{cat_folder_name}/index.html"
+        # Update breadcrumbs list for this level
+        breadcrumbs_cat = breadcrumbs_home + [{"name": cat_name, "link": cat_link_path}]
+
+        # --- Level 1: Category Page ---
+        cat_body = f"<p>{generate_lorem(15)} for {cat_name}.</p><h3>Sub-Categories:</h3>\n<ul>"
+        for j in range(NUM_SUBCATEGORIES_PER_CAT):
+            subcat_name = f"{cat_name}-Sub-{j+1}"
+            subcat_folder_name = quote(subcat_name.lower().replace(" ", "-"))
+            # Path relative to the category page
+            subcat_relative_page_path = f"{subcat_folder_name}/index.html"
+            cat_body += f'<li><a class="subcategory-link" href="{subcat_relative_page_path}">{subcat_name}</a> - {generate_lorem(8)}</li>'
+        cat_body += "</ul>"
+        # Pass the updated breadcrumbs list
+        create_html_page(cat_dir / "index.html", cat_name, cat_body, breadcrumbs_home) # Parent breadcrumb needed here
+
+        for j in range(NUM_SUBCATEGORIES_PER_CAT):
+            subcat_name = f"{cat_name}-Sub-{j+1}"
+            subcat_folder_name = quote(subcat_name.lower().replace(" ", "-"))
+            subcat_dir = cat_dir / subcat_folder_name
+            # Absolute path for the breadcrumb link
+            subcat_link_path = f"{full_base_path}/{cat_folder_name}/{subcat_folder_name}/index.html"
+            # Update breadcrumbs list for this level
+            breadcrumbs_subcat = breadcrumbs_cat + [{"name": subcat_name, "link": subcat_link_path}]
+
+            # --- Level 2: Sub-Category Page (Product List) ---
+            subcat_body = f"<p>Explore products in {subcat_name}. {generate_lorem(12)}</p><h3>Products:</h3>\n<ul class='product-list'>"
+            for k in range(NUM_PRODUCTS_PER_SUBCAT):
+                prod_id = f"P{i+1}{j+1}{k+1:03d}" # e.g., P11001
+                prod_name = f"{subcat_name} Product {k+1} ({prod_id})"
+                # Filename relative to the subcategory page
+                prod_filename = f"product_{prod_id}.html"
+                # Absolute path for the breadcrumb link
+                prod_link_path = f"{full_base_path}/{cat_folder_name}/{subcat_folder_name}/{prod_filename}"
+
+                # Preview on list page (link remains relative)
+                subcat_body += f"""
+                <li>
+                    <div class="product-preview">
+                        <a class="product-link" href="{prod_filename}"><strong>{prod_name}</strong></a>
+                        <p>{generate_lorem(10)}</p>
+                        <span class="product-price">£{random.uniform(10, 500):.2f}</span>
+                    </div>
+                </li>"""
+
+                # --- Level 3: Product Page ---
+                prod_price = random.uniform(10, 500)
+                prod_desc = generate_lorem(40)
+                prod_specs = {f"Spec {s+1}": generate_lorem(3) for s in range(random.randint(3,6))}
+                prod_reviews_count = random.randint(0, 150)
+                # Relative filenames for links on this page
+                details_filename_relative = f"product_{prod_id}_details.html"
+                reviews_filename_relative = f"product_{prod_id}_reviews.html"
+
+                prod_body = f"""
+                <p class="product-price">Price: £{prod_price:.2f}</p>
+                <div class="product-description">
+                    <h2>Description</h2>
+                    <p>{prod_desc}</p>
+                </div>
+                <div class="product-specs">
+                    <h2>Specifications</h2>
+                    <ul>
+                        {''.join(f'<li><span class="spec-name">{name}</span>: <span class="spec-value">{value}</span></li>' for name, value in prod_specs.items())}
+                    </ul>
+                </div>
+                <div class="product-reviews">
+                    <h2>Reviews</h2>
+                    <p>Total Reviews: <span class="review-count">{prod_reviews_count}</span></p>
+                </div>
+                <hr>
+                <p>
+                    <a class="details-link" href="{details_filename_relative}">View More Details</a> |
+                    <a class="reviews-link" href="{reviews_filename_relative}">See All Reviews</a>
+                </p>
+                """
+                # Update breadcrumbs list for this level
+                breadcrumbs_prod = breadcrumbs_subcat + [{"name": prod_name, "link": prod_link_path}]
+                # Pass the updated breadcrumbs list
+                create_html_page(subcat_dir / prod_filename, prod_name, prod_body, breadcrumbs_subcat) # Parent breadcrumb needed here
+
+                # --- Level 4: Product Details Page ---
+                details_filename = f"product_{prod_id}_details.html" # Actual filename
+                # Absolute path for the breadcrumb link
+                details_link_path = f"{full_base_path}/{cat_folder_name}/{subcat_folder_name}/{details_filename}"
+                details_body = f"<p>This page contains extremely detailed information about {prod_name}.</p>{generate_lorem(100)}"
+                # Update breadcrumbs list for this level
+                breadcrumbs_details = breadcrumbs_prod + [{"name": "Details", "link": details_link_path}]
+                # Pass the updated breadcrumbs list
+                create_html_page(subcat_dir / details_filename, f"{prod_name} - Details", details_body, breadcrumbs_prod) # Parent breadcrumb needed here
+
+                # --- Level 5: Product Reviews Page ---
+                reviews_filename = f"product_{prod_id}_reviews.html" # Actual filename
+                # Absolute path for the breadcrumb link
+                reviews_link_path = f"{full_base_path}/{cat_folder_name}/{subcat_folder_name}/{reviews_filename}"
+                reviews_body = f"<p>All {prod_reviews_count} reviews for {prod_name} are listed here.</p><ul>"
+                for r in range(prod_reviews_count):
+                     reviews_body += f"<li>Review {r+1}: {generate_lorem(random.randint(15, 50))}</li>"
+                reviews_body += "</ul>"
+                # Update breadcrumbs list for this level
+                breadcrumbs_reviews = breadcrumbs_prod + [{"name": "Reviews", "link": reviews_link_path}]
+                # Pass the updated breadcrumbs list
+                create_html_page(subcat_dir / reviews_filename, f"{prod_name} - Reviews", reviews_body, breadcrumbs_prod) # Parent breadcrumb needed here
+
+
+            subcat_body += "</ul>" # Close product-list ul
+            # Pass the correct breadcrumbs list for the subcategory index page
+            create_html_page(subcat_dir / "index.html", subcat_name, subcat_body, breadcrumbs_cat) # Parent breadcrumb needed here
+
+
+# --- Main Execution ---
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Generate a dummy multi-level retail website.")
+    parser.add_argument(
+        "-o", "--output-dir",
+        type=str,
+        default="dummy_retail_site",
+        help="Directory to generate the website in."
+    )
+    parser.add_argument(
+        "-n", "--site-name",
+        type=str,
+        default="FakeShop",
+        help="Name of the fake shop."
+    )
+    parser.add_argument(
+        "-b", "--base-path",
+        type=str,
+        default="",
+        help="Base path for hosting the site (e.g., 'samples/deepcrawl'). Leave empty if hosted at the root."
+    )
+    # Optional: Add more args to configure counts if needed
+
+    args = parser.parse_args()
+
+    output_directory = Path(args.output_dir)
+    site_name = args.site_name
+    base_path = args.base_path
+
+    print(f"Generating dummy site '{site_name}' in '{output_directory}'...")
+    # Pass the base_path to the generation function
+    generate_site(output_directory, site_name, base_path)
+    print(f"\nCreated {sum(1 for _ in output_directory.rglob('*.html'))} HTML pages.")
+    print("Dummy site generation complete.")
+    print(f"To serve locally (example): python -m http.server --directory {output_directory} 8000")
+    if base_path:
+        print(f"Access the site at: http://localhost:8000/{base_path.strip('/')}/index.html")
+    else:
+         print(f"Access the site at: http://localhost:8000/index.html")