Merge branch 'main' into main

2025-07-29 14:05:35 +03:00
parent 1e1c887a2f e3281935bc
commit 8e3c411a3e
330 changed files with 116430 additions and 589 deletions
--- a/deploy/docker/README.md
+++ b/deploy/docker/README.md
@@ -58,13 +58,15 @@ Pull and run images directly from Docker Hub without building locally.

 #### 1. Pull the Image

-Our latest release candidate is `0.6.0-r1`. Images are built with multi-arch manifests, so Docker automatically pulls the correct version for your system.
+Our latest release candidate is `0.7.0-r1`. Images are built with multi-arch manifests, so Docker automatically pulls the correct version for your system.
+
+> ⚠️ **Important Note**: The `latest` tag currently points to the stable `0.6.0` version. After testing and validation, `0.7.0` (without -r1) will be released and `latest` will be updated. For now, please use `0.7.0-r1` to test the new features.

 ```bash
-# Pull the release candidate (recommended for latest features)
-docker pull unclecode/crawl4ai:0.6.0-rN # Use your favorite revision number
+# Pull the release candidate (for testing new features)
+docker pull unclecode/crawl4ai:0.7.0-r1

-# Or pull the latest stable version
+# Or pull the current stable version (0.6.0)
 docker pull unclecode/crawl4ai:latest
 ```

@@ -99,7 +101,7 @@ EOL
      -p 11235:11235 \
      --name crawl4ai \
      --shm-size=1g \
-      unclecode/crawl4ai:0.6.0-rN # Use your favorite revision number
+      unclecode/crawl4ai:0.7.0-r1
    ```

 *   **With LLM support:**
@@ -110,7 +112,7 @@ EOL
      --name crawl4ai \
      --env-file .llm.env \
      --shm-size=1g \
-      unclecode/crawl4ai:0.6.0-rN # Use your favorite revision number
+      unclecode/crawl4ai:0.7.0-r1
    ```

 > The server will be available at `http://localhost:11235`. Visit `/playground` to access the interactive testing interface.
@@ -124,7 +126,7 @@ docker stop crawl4ai && docker rm crawl4ai
 #### Docker Hub Versioning Explained

 *   **Image Name:** `unclecode/crawl4ai`
-*   **Tag Format:** `LIBRARY_VERSION[-SUFFIX]` (e.g., `0.6.0-r1`)
+*   **Tag Format:** `LIBRARY_VERSION[-SUFFIX]` (e.g., `0.7.0-r1`)
    *   `LIBRARY_VERSION`: The semantic version of the core `crawl4ai` Python library
    *   `SUFFIX`: Optional tag for release candidates (``) and revisions (`r1`)
 *   **`latest` Tag:** Points to the most recent stable version
@@ -160,7 +162,7 @@ The `docker-compose.yml` file in the project root provides a simplified approach
    ```bash
    # Pulls and runs the release candidate from Docker Hub
    # Automatically selects the correct architecture
-    IMAGE=unclecode/crawl4ai:0.6.0-rN # Use your favorite revision number docker compose up -d
+    IMAGE=unclecode/crawl4ai:0.7.0-r1 docker compose up -d
    ```

 *   **Build and Run Locally:**
--- a/deploy/docker/api.py
+++ b/deploy/docker/api.py
@@ -5,6 +5,7 @@ from typing import List, Tuple, Dict
 from functools import partial
 from uuid import uuid4
 from datetime import datetime, timezone
+from base64 import b64encode

 import logging
 from typing import Optional, AsyncGenerator
@@ -371,6 +372,9 @@ async def stream_results(crawler: AsyncWebCrawler, results_gen: AsyncGenerator)
                server_memory_mb = _get_memory_mb()
                result_dict = result.model_dump()
                result_dict['server_memory_mb'] = server_memory_mb
+                # If PDF exists, encode it to base64
+                if result_dict.get('pdf') is not None:
+                    result_dict['pdf'] = b64encode(result_dict['pdf']).decode('utf-8')
                logger.info(f"Streaming result for {result_dict.get('url', 'unknown')}")
                data = json.dumps(result_dict, default=datetime_handler) + "\n"
                yield data.encode('utf-8')
@@ -443,10 +447,19 @@ async def handle_crawl_request(
            mem_delta_mb = end_mem_mb - start_mem_mb # <--- Calculate delta
            peak_mem_mb = max(peak_mem_mb if peak_mem_mb else 0, end_mem_mb) # <--- Get peak memory
        logger.info(f"Memory usage: Start: {start_mem_mb} MB, End: {end_mem_mb} MB, Delta: {mem_delta_mb} MB, Peak: {peak_mem_mb} MB")
-                              
+
+        # Process results to handle PDF bytes
+        processed_results = []
+        for result in results:
+            result_dict = result.model_dump()
+            # If PDF exists, encode it to base64
+            if result_dict.get('pdf') is not None:
+                result_dict['pdf'] = b64encode(result_dict['pdf']).decode('utf-8')
+            processed_results.append(result_dict)
+            
        return {
            "success": True,
-            "results": [result.model_dump() for result in results],
+            "results": processed_results,
            "server_processing_time_s": end_time - start_time,
            "server_memory_delta_mb": mem_delta_mb,
            "server_peak_memory_mb": peak_mem_mb
@@ -459,7 +472,7 @@ async def handle_crawl_request(
            #      await crawler.close()
            #  except Exception as close_e:
            #       logger.error(f"Error closing crawler during exception handling: {close_e}")
-            logger.error(f"Error closing crawler during exception handling: {close_e}")
+            logger.error(f"Error closing crawler during exception handling: {str(e)}")

        # Measure memory even on error if possible
        end_mem_mb_error = _get_memory_mb()
@@ -518,7 +531,7 @@ async def handle_stream_crawl_request(
            #       await crawler.close()
            #  except Exception as close_e:
            #       logger.error(f"Error closing crawler during stream setup exception: {close_e}")
-            logger.error(f"Error closing crawler during stream setup exception: {close_e}")
+            logger.error(f"Error closing crawler during stream setup exception: {str(e)}")
        logger.error(f"Stream crawl error: {str(e)}", exc_info=True)
        # Raising HTTPException here will prevent streaming response
        raise HTTPException(
--- a/deploy/docker/c4ai-code-context.md
+++ b/deploy/docker/c4ai-code-context.md
@@ -1263,7 +1263,7 @@ class LLMConfig:
        provider: str = DEFAULT_PROVIDER,
        api_token: Optional[str] = None,
        base_url: Optional[str] = None,
-        temprature: Optional[float] = None,
+        temperature: Optional[float] = None,
        max_tokens: Optional[int] = None,
        top_p: Optional[float] = None,
        frequency_penalty: Optional[float] = None,
@@ -1291,7 +1291,7 @@ class LLMConfig:
                self.provider = DEFAULT_PROVIDER
                self.api_token = os.getenv(DEFAULT_PROVIDER_API_KEY)
        self.base_url = base_url
-        self.temprature = temprature
+        self.temperature = temperature
        self.max_tokens = max_tokens
        self.top_p = top_p
        self.frequency_penalty = frequency_penalty
@@ -1305,7 +1305,7 @@ class LLMConfig:
            provider=kwargs.get("provider", DEFAULT_PROVIDER),
            api_token=kwargs.get("api_token"),
            base_url=kwargs.get("base_url"),
-            temprature=kwargs.get("temprature"),
+            temperature=kwargs.get("temperature"),
            max_tokens=kwargs.get("max_tokens"),
            top_p=kwargs.get("top_p"),
            frequency_penalty=kwargs.get("frequency_penalty"),
@@ -1319,7 +1319,7 @@ class LLMConfig:
            "provider": self.provider,
            "api_token": self.api_token,
            "base_url": self.base_url,
-            "temprature": self.temprature,
+            "temperature": self.temperature,
            "max_tokens": self.max_tokens,
            "top_p": self.top_p,
            "frequency_penalty": self.frequency_penalty,
@@ -4075,7 +4075,7 @@ class LLMExtractionStrategy(ExtractionStrategy):
            api_token: The API token for the provider.
            base_url: The base URL for the API request.
            api_base: The base URL for the API request.
-            extra_args: Additional arguments for the API request, such as temprature, max_tokens, etc.
+            extra_args: Additional arguments for the API request, such as temperature, max_tokens, etc.
        """
        super().__init__( input_format=input_format, **kwargs)
        self.llm_config = llm_config
@@ -7901,7 +7901,7 @@ from pydantic import BaseModel, Field
 from crawl4ai import AsyncWebCrawler, CacheMode, BrowserConfig, CrawlerRunConfig
 from crawl4ai.markdown_generation_strategy import DefaultMarkdownGenerator
 from crawl4ai.content_filter_strategy import PruningContentFilter
-from crawl4ai.extraction_strategy import (
+from crawl4ai import (
    JsonCssExtractionStrategy,
    LLMExtractionStrategy,
 )
@@ -8301,7 +8301,7 @@ async def crawl_dynamic_content_pages_method_2():


 async def cosine_similarity_extraction():
-    from crawl4ai.extraction_strategy import CosineStrategy
+    from crawl4ai import CosineStrategy
    crawl_config = CrawlerRunConfig(
        cache_mode=CacheMode.BYPASS,
        extraction_strategy=CosineStrategy(
--- a/deploy/docker/c4ai-doc-context.md
+++ b/deploy/docker/c4ai-doc-context.md
@@ -332,7 +332,7 @@ The `clone()` method:
 ### Key fields to note

 1. **`provider`**:  
- Which LLM provoder to use. 
+- Which LLM provider to use. 
 - Possible values are `"ollama/llama3","groq/llama3-70b-8192","groq/llama3-8b-8192", "openai/gpt-4o-mini" ,"openai/gpt-4o","openai/o1-mini","openai/o1-preview","openai/o3-mini","openai/o3-mini-high","anthropic/claude-3-haiku-20240307","anthropic/claude-3-opus-20240229","anthropic/claude-3-sonnet-20240229","anthropic/claude-3-5-sonnet-20240620","gemini/gemini-pro","gemini/gemini-1.5-pro","gemini/gemini-2.0-flash","gemini/gemini-2.0-flash-exp","gemini/gemini-2.0-flash-lite-preview-02-05","deepseek/deepseek-chat"`<br/>*(default: `"openai/gpt-4o-mini"`)*

 2. **`api_token`**:  
@@ -354,7 +354,7 @@ In a typical scenario, you define **one** `BrowserConfig` for your crawler sessi
 ```python
 import asyncio
 from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode, LLMConfig
-from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
+from crawl4ai import JsonCssExtractionStrategy

 async def main():
    # 1) Browser config: headless, bigger viewport, no proxy
@@ -403,7 +403,7 @@ async def main():

    md_generator = DefaultMarkdownGenerator(
    content_filter=filter,
-    options={"ignore_links": True}
+    options={"ignore_links": True})

    # 4) Crawler run config: skip cache, use extraction
    run_conf = CrawlerRunConfig(
@@ -1042,7 +1042,7 @@ You can combine content selection with a more advanced extraction strategy. For
 import asyncio
 import json
 from crawl4ai import AsyncWebCrawler, CrawlerRunConfig, CacheMode
-from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
+from crawl4ai import JsonCssExtractionStrategy

 async def main():
    # Minimal schema for repeated items
@@ -1094,7 +1094,7 @@ import asyncio
 import json
 from pydantic import BaseModel, Field
 from crawl4ai import AsyncWebCrawler, CrawlerRunConfig, LLMConfig
-from crawl4ai.extraction_strategy import LLMExtractionStrategy
+from crawl4ai import LLMExtractionStrategy

 class ArticleData(BaseModel):
    headline: str
@@ -1139,7 +1139,7 @@ Below is a short function that unifies **CSS selection**, **exclusion** logic, a
 import asyncio
 import json
 from crawl4ai import AsyncWebCrawler, CrawlerRunConfig, CacheMode
-from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
+from crawl4ai import JsonCssExtractionStrategy

 async def extract_main_articles(url: str):
    schema = {
@@ -1488,7 +1488,7 @@ If you run a JSON-based extraction strategy (CSS, XPath, LLM, etc.), the structu
 import asyncio
 import json
 from crawl4ai import AsyncWebCrawler, CrawlerRunConfig, CacheMode
-from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
+from crawl4ai import JsonCssExtractionStrategy

 async def main():
    schema = {
@@ -3760,11 +3760,11 @@ To crawl a live web page, provide the URL starting with `http://` or `https://`,

 ```python
 import asyncio
-from crawl4ai import AsyncWebCrawler
+from crawl4ai import AsyncWebCrawler, CacheMode
 from crawl4ai.async_configs import CrawlerRunConfig

 async def crawl_web():
-    config = CrawlerRunConfig(bypass_cache=True)
+    config = CrawlerRunConfig(cache_mode=CacheMode.BYPASS)
    async with AsyncWebCrawler() as crawler:
        result = await crawler.arun(
            url="https://en.wikipedia.org/wiki/apple", 
@@ -3785,13 +3785,13 @@ To crawl a local HTML file, prefix the file path with `file://`.

 ```python
 import asyncio
-from crawl4ai import AsyncWebCrawler
+from crawl4ai import AsyncWebCrawler, CacheMode
 from crawl4ai.async_configs import CrawlerRunConfig

 async def crawl_local_file():
    local_file_path = "/path/to/apple.html"  # Replace with your file path
    file_url = f"file://{local_file_path}"
-    config = CrawlerRunConfig(bypass_cache=True)
+    config = CrawlerRunConfig(cache_mode=CacheMode.BYPASS)
    
    async with AsyncWebCrawler() as crawler:
        result = await crawler.arun(url=file_url, config=config)
@@ -3810,13 +3810,13 @@ To crawl raw HTML content, prefix the HTML string with `raw:`.

 ```python
 import asyncio
-from crawl4ai import AsyncWebCrawler
+from crawl4ai import AsyncWebCrawler, CacheMode
 from crawl4ai.async_configs import CrawlerRunConfig

 async def crawl_raw_html():
    raw_html = "<html><body><h1>Hello, World!</h1></body></html>"
    raw_html_url = f"raw:{raw_html}"
-    config = CrawlerRunConfig(bypass_cache=True)
+    config = CrawlerRunConfig(cache_mode=CacheMode.BYPASS)
    
    async with AsyncWebCrawler() as crawler:
        result = await crawler.arun(url=raw_html_url, config=config)
@@ -3845,7 +3845,7 @@ import os
 import sys
 import asyncio
 from pathlib import Path
-from crawl4ai import AsyncWebCrawler
+from crawl4ai import AsyncWebCrawler, CacheMode
 from crawl4ai.async_configs import CrawlerRunConfig

 async def main():
@@ -3856,7 +3856,7 @@ async def main():
    async with AsyncWebCrawler() as crawler:
        # Step 1: Crawl the Web URL
        print("\n=== Step 1: Crawling the Wikipedia URL ===")
-        web_config = CrawlerRunConfig(bypass_cache=True)
+        web_config = CrawlerRunConfig(cache_mode=CacheMode.BYPASS)
        result = await crawler.arun(url=wikipedia_url, config=web_config)

        if not result.success:
@@ -3871,7 +3871,7 @@ async def main():
        # Step 2: Crawl from the Local HTML File
        print("=== Step 2: Crawling from the Local HTML File ===")
        file_url = f"file://{html_file_path.resolve()}"
-        file_config = CrawlerRunConfig(bypass_cache=True)
+        file_config = CrawlerRunConfig(cache_mode=CacheMode.BYPASS)
        local_result = await crawler.arun(url=file_url, config=file_config)

        if not local_result.success:
@@ -3887,7 +3887,7 @@ async def main():
        with open(html_file_path, 'r', encoding='utf-8') as f:
            raw_html_content = f.read()
        raw_html_url = f"raw:{raw_html_content}"
-        raw_config = CrawlerRunConfig(bypass_cache=True)
+        raw_config = CrawlerRunConfig(cache_mode=CacheMode.BYPASS)
        raw_result = await crawler.arun(url=raw_html_url, config=raw_config)

        if not raw_result.success:
@@ -4152,7 +4152,7 @@ prune_filter = PruningContentFilter(
 For intelligent content filtering and high-quality markdown generation, you can use the **LLMContentFilter**. This filter leverages LLMs to generate relevant markdown while preserving the original content's meaning and structure:

 ```python
-from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, LLMConfig
+from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, LLMConfig, DefaultMarkdownGenerator
 from crawl4ai.content_filter_strategy import LLMContentFilter

 async def main():
@@ -4175,8 +4175,13 @@ async def main():
        verbose=True
    )

+    md_generator = DefaultMarkdownGenerator(
+        content_filter=filter,
+        options={"ignore_links": True}
+    )
+
    config = CrawlerRunConfig(
-        content_filter=filter
+        markdown_generator=md_generator
    )

    async with AsyncWebCrawler() as crawler:
@@ -4722,7 +4727,7 @@ if __name__ == "__main__":
 Once dynamic content is loaded, you can attach an **`extraction_strategy`** (like `JsonCssExtractionStrategy` or `LLMExtractionStrategy`). For example:

 ```python
-from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
+from crawl4ai import JsonCssExtractionStrategy

 schema = {
    "name": "Commits",
@@ -4902,7 +4907,7 @@ Crawl4AI can also extract structured data (JSON) using CSS or XPath selectors. B
 > **New!** Crawl4AI now provides a powerful utility to automatically generate extraction schemas using LLM. This is a one-time cost that gives you a reusable schema for fast, LLM-free extractions:

 ```python
-from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
+from crawl4ai import JsonCssExtractionStrategy
 from crawl4ai import LLMConfig

 # Generate a schema (one-time cost)
@@ -4932,7 +4937,7 @@ Here's a basic extraction example:
 import asyncio
 import json
 from crawl4ai import AsyncWebCrawler, CrawlerRunConfig, CacheMode
-from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
+from crawl4ai import JsonCssExtractionStrategy

 async def main():
    schema = {
@@ -4987,7 +4992,7 @@ import json
 import asyncio
 from pydantic import BaseModel, Field
 from crawl4ai import AsyncWebCrawler, CrawlerRunConfig, LLMConfig
-from crawl4ai.extraction_strategy import LLMExtractionStrategy
+from crawl4ai import LLMExtractionStrategy

 class OpenAIModelFee(BaseModel):
    model_name: str = Field(..., description="Name of the OpenAI model.")
@@ -5103,7 +5108,7 @@ Some sites require multiple “page clicks” or dynamic JavaScript updates. Bel
 ```python
 import asyncio
 from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode
-from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
+from crawl4ai import JsonCssExtractionStrategy

 async def extract_structured_data_using_css_extractor():
    print("\n--- Using JsonCssExtractionStrategy for Fast Structured Output ---")
@@ -5428,29 +5433,38 @@ Sometimes you need a visual record of a page or a PDF “printout.” Crawl4AI c
 ```python
 import os, asyncio
 from base64 import b64decode
-from crawl4ai import AsyncWebCrawler, CacheMode
+from crawl4ai import AsyncWebCrawler, CacheMode, CrawlerRunConfig

 async def main():
+    run_config = CrawlerRunConfig(
+        cache_mode=CacheMode.BYPASS,
+        screenshot=True,
+        pdf=True
+    )
+
    async with AsyncWebCrawler() as crawler:
        result = await crawler.arun(
            url="https://en.wikipedia.org/wiki/List_of_common_misconceptions",
-            cache_mode=CacheMode.BYPASS,
-            pdf=True,
-            screenshot=True
+            config=run_config
        )
-        
        if result.success:
-            # Save screenshot
+            print(f"Screenshot data present: {result.screenshot is not None}")
+            print(f"PDF data present: {result.pdf is not None}")
+
            if result.screenshot:
+                print(f"[OK] Screenshot captured, size: {len(result.screenshot)} bytes")
                with open("wikipedia_screenshot.png", "wb") as f:
                    f.write(b64decode(result.screenshot))
-            
-            # Save PDF
+            else:
+                print("[WARN] Screenshot data is None.")
+
            if result.pdf:
+                print(f"[OK] PDF captured, size: {len(result.pdf)} bytes")
                with open("wikipedia_page.pdf", "wb") as f:
                    f.write(result.pdf)
-            
-            print("[OK] PDF & screenshot captured.")
+            else:
+                print("[WARN] PDF data is None.")
+
        else:
            print("[ERROR]", result.error_message)

@@ -6705,7 +6719,7 @@ dispatcher = MemoryAdaptiveDispatcher(
 3. **`max_session_permit`** (`int`, default: `10`)  
   The maximum number of concurrent crawling tasks allowed. This ensures resource limits are respected while maintaining concurrency.

-4. **`memory_wait_timeout`** (`float`, default: `300.0`)  
+4. **`memory_wait_timeout`** (`float`, default: `600.0`)
   Optional timeout (in seconds). If memory usage exceeds `memory_threshold_percent` for longer than this duration, a `MemoryError` is raised.

 5. **`rate_limiter`** (`RateLimiter`, default: `None`)  
@@ -7300,7 +7314,7 @@ Here's an example of crawling GitHub commits across multiple pages while preserv

 ```python
 from crawl4ai.async_configs import CrawlerRunConfig
-from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
+from crawl4ai import JsonCssExtractionStrategy
 from crawl4ai.cache_context import CacheMode

 async def crawl_dynamic_content():
@@ -7850,7 +7864,7 @@ The Cosine Strategy:
 ## Basic Usage

 ```python
-from crawl4ai.extraction_strategy import CosineStrategy
+from crawl4ai import CosineStrategy

 strategy = CosineStrategy(
    semantic_filter="product reviews",    # Target content type
@@ -8161,7 +8175,7 @@ import json
 from pydantic import BaseModel, Field
 from typing import List
 from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode, LLMConfig
-from crawl4ai.extraction_strategy import LLMExtractionStrategy
+from crawl4ai import LLMExtractionStrategy

 class Product(BaseModel):
    name: str
@@ -8278,7 +8292,7 @@ import asyncio
 from typing import List
 from pydantic import BaseModel, Field
 from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode
-from crawl4ai.extraction_strategy import LLMExtractionStrategy
+from crawl4ai import LLMExtractionStrategy

 class Entity(BaseModel):
    name: str
@@ -8423,7 +8437,7 @@ Let’s begin with a **simple** schema-based extraction using the `JsonCssExtrac
 import json
 import asyncio
 from crawl4ai import AsyncWebCrawler, CrawlerRunConfig, CacheMode
-from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
+from crawl4ai import JsonCssExtractionStrategy

 async def extract_crypto_prices():
    # 1. Define a simple extraction schema
@@ -8493,7 +8507,7 @@ Below is a short example demonstrating **XPath** extraction plus the **`raw://`*
 import json
 import asyncio
 from crawl4ai import AsyncWebCrawler, CrawlerRunConfig
-from crawl4ai.extraction_strategy import JsonXPathExtractionStrategy
+from crawl4ai import JsonXPathExtractionStrategy

 async def extract_crypto_prices_xpath():
    # 1. Minimal dummy HTML with some repeating rows
@@ -8694,7 +8708,7 @@ Key Takeaways:
 import json
 import asyncio
 from crawl4ai import AsyncWebCrawler, CrawlerRunConfig
-from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
+from crawl4ai import JsonCssExtractionStrategy

 ecommerce_schema = {
    # ... the advanced schema from above ...
@@ -8804,7 +8818,7 @@ While manually crafting schemas is powerful and precise, Crawl4AI now offers a c
 The schema generator is available as a static method on both `JsonCssExtractionStrategy` and `JsonXPathExtractionStrategy`. You can choose between OpenAI's GPT-4 or the open-source Ollama for schema generation:

 ```python
-from crawl4ai.extraction_strategy import JsonCssExtractionStrategy, JsonXPathExtractionStrategy
+from crawl4ai import JsonCssExtractionStrategy, JsonXPathExtractionStrategy
 from crawl4ai import LLMConfig

 # Sample HTML with product information
--- a/deploy/docker/requirements.txt
+++ b/deploy/docker/requirements.txt
@@ -14,3 +14,4 @@ anyio==4.9.0
 PyJWT==2.10.1
 mcp>=1.6.0
 websockets>=15.0.1
+httpx[http2]>=0.27.2
--- a/deploy/docker/schemas.py
+++ b/deploy/docker/schemas.py
@@ -12,8 +12,7 @@ class CrawlRequest(BaseModel):
 class MarkdownRequest(BaseModel):
    """Request body for the /md endpoint."""
    url: str                    = Field(...,  description="Absolute http/https URL to fetch")
-    f:   FilterType             = Field(FilterType.FIT,
-                                        description="Content‑filter strategy: FIT, RAW, BM25, or LLM")
+    f:   FilterType             = Field(FilterType.FIT, description="Content‑filter strategy: fit, raw, bm25, or llm")
    q:   Optional[str] = Field(None,  description="Query string used by BM25/LLM filters")
    c:   Optional[str] = Field("0",   description="Cache‑bust / revision counter")

--- a/deploy/docker/static/playground/index.html
+++ b/deploy/docker/static/playground/index.html
@@ -671,6 +671,16 @@
                        method: 'GET',
                        headers: { 'Accept': 'application/json' }
                    });
+                    responseData = await response.json();
+                    const time = Math.round(performance.now() - startTime);
+                    if (!response.ok) {
+                        updateStatus('error', time);
+                        throw new Error(responseData.error || 'Request failed');
+                    }
+                    updateStatus('success', time);
+                    document.querySelector('#response-content code').textContent = JSON.stringify(responseData, null, 2);
+                    document.querySelector('#response-content code').className = 'json hljs';
+                    forceHighlightElement(document.querySelector('#response-content code'));
                } else if (endpoint === 'crawl_stream') {
                    // Stream processing
                    response = await fetch(api, {