From 159207b86fc3920bd12192cbaab8374b2e7c6375 Mon Sep 17 00:00:00 2001
From: ntohidi <nasrin@kidocode.com>
Date: Tue, 26 Aug 2025 16:44:07 +0800
Subject: [PATCH] feat(docker): Add temperature and base_url parameters for LLM
 configuration. ref #1035

  Implement hierarchical configuration for LLM parameters with support for:
  - Temperature control (0.0-2.0) to adjust response creativity
  - Custom base_url for proxy servers and alternative endpoints
  - 4-tier priority: request params > provider env > global env > defaults

  Add helper functions in utils.py, update API schemas and handlers,
  support environment variables (LLM_TEMPERATURE, OPENAI_TEMPERATURE, etc.),
  and provide comprehensive documentation with examples.
---
 deploy/docker/.llm.env.example       |  21 +-
 deploy/docker/api.py                 |  38 ++-
 deploy/docker/job.py                 |   6 +-
 deploy/docker/schemas.py             |   2 +
 deploy/docker/server.py              |   3 +-
 deploy/docker/utils.py               |  63 +++++
 docs/md_v2/core/docker-deployment.md | 144 ++++++++++-
 tests/docker/test_llm_params.py      | 349 +++++++++++++++++++++++++++
 8 files changed, 603 insertions(+), 23 deletions(-)
 create mode 100755 tests/docker/test_llm_params.py

diff --git a/deploy/docker/.llm.env.example b/deploy/docker/.llm.env.example
index 254002f4..012435d8 100644
--- a/deploy/docker/.llm.env.example
+++ b/deploy/docker/.llm.env.example
@@ -10,4 +10,23 @@ GEMINI_API_TOKEN=your_gemini_key_here
 # Optional: Override the default LLM provider
 # Examples: "openai/gpt-4", "anthropic/claude-3-opus", "deepseek/chat", etc.
 # If not set, uses the provider specified in config.yml (default: openai/gpt-4o-mini)
-# LLM_PROVIDER=anthropic/claude-3-opus
\ No newline at end of file
+# LLM_PROVIDER=anthropic/claude-3-opus
+
+# Optional: Global LLM temperature setting (0.0-2.0)
+# Controls randomness in responses. Lower = more focused, Higher = more creative
+# LLM_TEMPERATURE=0.7
+
+# Optional: Global custom API base URL
+# Use this to point to custom endpoints or proxy servers
+# LLM_BASE_URL=https://api.custom.com/v1
+
+# Optional: Provider-specific temperature overrides
+# These take precedence over the global LLM_TEMPERATURE
+# OPENAI_TEMPERATURE=0.5
+# ANTHROPIC_TEMPERATURE=0.3
+# GROQ_TEMPERATURE=0.8
+
+# Optional: Provider-specific base URL overrides
+# Use for provider-specific proxy endpoints
+# OPENAI_BASE_URL=https://custom-openai.company.com/v1
+# GROQ_BASE_URL=https://custom-groq.company.com/v1
\ No newline at end of file
diff --git a/deploy/docker/api.py b/deploy/docker/api.py
index c01c5ca7..53359e1f 100644
--- a/deploy/docker/api.py
+++ b/deploy/docker/api.py
@@ -42,7 +42,9 @@ from utils import (
     should_cleanup_task,
     decode_redis_hash,
     get_llm_api_key,
-    validate_llm_provider
+    validate_llm_provider,
+    get_llm_temperature,
+    get_llm_base_url
 )
 
 import psutil, time
@@ -96,7 +98,9 @@ async def handle_llm_qa(
         response = perform_completion_with_backoff(
             provider=config["llm"]["provider"],
             prompt_with_variables=prompt,
-            api_token=get_llm_api_key(config)  # Returns None to let litellm handle it
+            api_token=get_llm_api_key(config),  # Returns None to let litellm handle it
+            temperature=get_llm_temperature(config),
+            base_url=get_llm_base_url(config)
         )
 
         return response.choices[0].message.content
@@ -115,7 +119,9 @@ async def process_llm_extraction(
     instruction: str,
     schema: Optional[str] = None,
     cache: str = "0",
-    provider: Optional[str] = None
+    provider: Optional[str] = None,
+    temperature: Optional[float] = None,
+    base_url: Optional[str] = None
 ) -> None:
     """Process LLM extraction in background."""
     try:
@@ -131,7 +137,9 @@ async def process_llm_extraction(
         llm_strategy = LLMExtractionStrategy(
             llm_config=LLMConfig(
                 provider=provider or config["llm"]["provider"],
-                api_token=api_key
+                api_token=api_key,
+                temperature=temperature or get_llm_temperature(config, provider),
+                base_url=base_url or get_llm_base_url(config, provider)
             ),
             instruction=instruction,
             schema=json.loads(schema) if schema else None,
@@ -178,7 +186,9 @@ async def handle_markdown_request(
     query: Optional[str] = None,
     cache: str = "0",
     config: Optional[dict] = None,
-    provider: Optional[str] = None
+    provider: Optional[str] = None,
+    temperature: Optional[float] = None,
+    base_url: Optional[str] = None
 ) -> str:
     """Handle markdown generation requests."""
     try:
@@ -204,6 +214,8 @@ async def handle_markdown_request(
                     llm_config=LLMConfig(
                         provider=provider or config["llm"]["provider"],
                         api_token=get_llm_api_key(config, provider),  # Returns None to let litellm handle it
+                        temperature=temperature or get_llm_temperature(config, provider),
+                        base_url=base_url or get_llm_base_url(config, provider)
                     ),
                     instruction=query or "Extract main content"
                 )
@@ -248,7 +260,9 @@ async def handle_llm_request(
     schema: Optional[str] = None,
     cache: str = "0",
     config: Optional[dict] = None,
-    provider: Optional[str] = None
+    provider: Optional[str] = None,
+    temperature: Optional[float] = None,
+    api_base_url: Optional[str] = None
 ) -> JSONResponse:
     """Handle LLM extraction requests."""
     base_url = get_base_url(request)
@@ -279,7 +293,9 @@ async def handle_llm_request(
             cache,
             base_url,
             config,
-            provider
+            provider,
+            temperature,
+            api_base_url
         )
 
     except Exception as e:
@@ -324,7 +340,9 @@ async def create_new_task(
     cache: str,
     base_url: str,
     config: dict,
-    provider: Optional[str] = None
+    provider: Optional[str] = None,
+    temperature: Optional[float] = None,
+    api_base_url: Optional[str] = None
 ) -> JSONResponse:
     """Create and initialize a new task."""
     decoded_url = unquote(input_path)
@@ -349,7 +367,9 @@ async def create_new_task(
         query,
         schema,
         cache,
-        provider
+        provider,
+        temperature,
+        api_base_url
     )
 
     return JSONResponse({
diff --git a/deploy/docker/job.py b/deploy/docker/job.py
index 10d83fdd..823dd8c8 100644
--- a/deploy/docker/job.py
+++ b/deploy/docker/job.py
@@ -37,6 +37,8 @@ class LlmJobPayload(BaseModel):
     schema: Optional[str] = None
     cache:  bool = False
     provider: Optional[str] = None
+    temperature: Optional[float] = None
+    base_url: Optional[str] = None
 
 
 class CrawlJobPayload(BaseModel):
@@ -63,6 +65,8 @@ async def llm_job_enqueue(
         cache=payload.cache,
         config=_config,
         provider=payload.provider,
+        temperature=payload.temperature,
+        api_base_url=payload.base_url,
     )
 
 
@@ -72,7 +76,7 @@ async def llm_job_status(
     task_id: str,
     _td: Dict = Depends(lambda: _token_dep())
 ):
-    return await handle_task_status(_redis, task_id)
+    return await handle_task_status(_redis, task_id, base_url=str(request.base_url))
 
 
 # ---------- CRAWL job -------------------------------------------------------
diff --git a/deploy/docker/schemas.py b/deploy/docker/schemas.py
index 96196633..0d8335b6 100644
--- a/deploy/docker/schemas.py
+++ b/deploy/docker/schemas.py
@@ -16,6 +16,8 @@ class MarkdownRequest(BaseModel):
     q:   Optional[str] = Field(None,  description="Query string used by BM25/LLM filters")
     c:   Optional[str] = Field("0",   description="Cache‑bust / revision counter")
     provider: Optional[str] = Field(None, description="LLM provider override (e.g., 'anthropic/claude-3-opus')")
+    temperature: Optional[float] = Field(None, description="LLM temperature override (0.0-2.0)")
+    base_url: Optional[str] = Field(None, description="LLM API base URL override")
 
 
 class RawCode(BaseModel):
diff --git a/deploy/docker/server.py b/deploy/docker/server.py
index 57fd3d6d..e453758a 100644
--- a/deploy/docker/server.py
+++ b/deploy/docker/server.py
@@ -241,7 +241,8 @@ async def get_markdown(
         raise HTTPException(
             400, "Invalid URL format. Must start with http://, https://, or for raw HTML (raw:, raw://)")
     markdown = await handle_markdown_request(
-        body.url, body.f, body.q, body.c, config, body.provider
+        body.url, body.f, body.q, body.c, config, body.provider,
+        body.temperature, body.base_url
     )
     return JSONResponse({
         "url": body.url,
diff --git a/deploy/docker/utils.py b/deploy/docker/utils.py
index 8ec591e5..5f3618af 100644
--- a/deploy/docker/utils.py
+++ b/deploy/docker/utils.py
@@ -108,6 +108,69 @@ def validate_llm_provider(config: Dict, provider: Optional[str] = None) -> tuple
     return True, ""
 
 
+def get_llm_temperature(config: Dict, provider: Optional[str] = None) -> Optional[float]:
+    """Get temperature setting based on the LLM provider.
+    
+    Priority order:
+    1. Provider-specific environment variable (e.g., OPENAI_TEMPERATURE)
+    2. Global LLM_TEMPERATURE environment variable
+    3. None (to use litellm/provider defaults)
+    
+    Args:
+        config: The application configuration dictionary
+        provider: Optional provider override (e.g., "openai/gpt-4")
+    
+    Returns:
+        The temperature setting if configured, otherwise None
+    """
+    # Check provider-specific temperature first
+    if provider:
+        provider_name = provider.split('/')[0].upper()
+        provider_temp = os.environ.get(f"{provider_name}_TEMPERATURE")
+        if provider_temp:
+            try:
+                return float(provider_temp)
+            except ValueError:
+                logging.warning(f"Invalid temperature value for {provider_name}: {provider_temp}")
+    
+    # Check global LLM_TEMPERATURE
+    global_temp = os.environ.get("LLM_TEMPERATURE")
+    if global_temp:
+        try:
+            return float(global_temp)
+        except ValueError:
+            logging.warning(f"Invalid global temperature value: {global_temp}")
+    
+    # Return None to use litellm/provider defaults
+    return None
+
+
+def get_llm_base_url(config: Dict, provider: Optional[str] = None) -> Optional[str]:
+    """Get base URL setting based on the LLM provider.
+    
+    Priority order:
+    1. Provider-specific environment variable (e.g., OPENAI_BASE_URL)
+    2. Global LLM_BASE_URL environment variable
+    3. None (to use default endpoints)
+    
+    Args:
+        config: The application configuration dictionary
+        provider: Optional provider override (e.g., "openai/gpt-4")
+    
+    Returns:
+        The base URL if configured, otherwise None
+    """
+    # Check provider-specific base URL first
+    if provider:
+        provider_name = provider.split('/')[0].upper()
+        provider_url = os.environ.get(f"{provider_name}_BASE_URL")
+        if provider_url:
+            return provider_url
+    
+    # Check global LLM_BASE_URL
+    return os.environ.get("LLM_BASE_URL")
+
+
 def verify_email_domain(email: str) -> bool:
     try:
         domain = email.split('@')[1]
diff --git a/docs/md_v2/core/docker-deployment.md b/docs/md_v2/core/docker-deployment.md
index deda8163..a98b7ab8 100644
--- a/docs/md_v2/core/docker-deployment.md
+++ b/docs/md_v2/core/docker-deployment.md
@@ -89,6 +89,16 @@ ANTHROPIC_API_KEY=your-anthropic-key
 # TOGETHER_API_KEY=your-together-key
 # MISTRAL_API_KEY=your-mistral-key
 # GEMINI_API_TOKEN=your-gemini-token
+
+# Optional: Global LLM settings
+# LLM_PROVIDER=openai/gpt-4o-mini
+# LLM_TEMPERATURE=0.7
+# LLM_BASE_URL=https://api.custom.com/v1
+
+# Optional: Provider-specific overrides
+# OPENAI_TEMPERATURE=0.5
+# OPENAI_BASE_URL=https://custom-openai.com/v1
+# ANTHROPIC_TEMPERATURE=0.3
 EOL
 ```
 > 🔑 **Note**: Keep your API keys secure! Never commit `.llm.env` to version control.
@@ -156,28 +166,44 @@ cp deploy/docker/.llm.env.example .llm.env
 
 **Flexible LLM Provider Configuration:**
 
-The Docker setup now supports flexible LLM provider configuration through three methods:
+The Docker setup now supports flexible LLM provider configuration through a hierarchical system:
 
-1. **Environment Variable** (Highest Priority): Set `LLM_PROVIDER` to override the default
-   ```bash
-   export LLM_PROVIDER="anthropic/claude-3-opus"
-   # Or in your .llm.env file:
-   # LLM_PROVIDER=anthropic/claude-3-opus
-   ```
-
-2. **API Request Parameter**: Specify provider per request
+1. **API Request Parameters** (Highest Priority): Specify per request
    ```json
    {
      "url": "https://example.com",
      "f": "llm",
-     "provider": "groq/mixtral-8x7b"
+     "provider": "groq/mixtral-8x7b",
+     "temperature": 0.7,
+     "base_url": "https://api.custom.com/v1"
    }
    ```
 
-3. **Config File Default**: Falls back to `config.yml` (default: `openai/gpt-4o-mini`)
+2. **Provider-Specific Environment Variables**: Override for specific providers
+   ```bash
+   # In your .llm.env file:
+   OPENAI_TEMPERATURE=0.5
+   OPENAI_BASE_URL=https://custom-openai.com/v1
+   ANTHROPIC_TEMPERATURE=0.3
+   ```
+
+3. **Global Environment Variables**: Set defaults for all providers
+   ```bash
+   # In your .llm.env file:
+   LLM_PROVIDER=anthropic/claude-3-opus
+   LLM_TEMPERATURE=0.7
+   LLM_BASE_URL=https://api.proxy.com/v1
+   ```
+
+4. **Config File Default**: Falls back to `config.yml` (default: `openai/gpt-4o-mini`)
 
 The system automatically selects the appropriate API key based on the provider. LiteLLM handles finding the correct environment variable for each provider (e.g., OPENAI_API_KEY for OpenAI, GEMINI_API_TOKEN for Google Gemini, etc.).
 
+**Supported LLM Parameters:**
+- `provider`: LLM provider and model (e.g., "openai/gpt-4", "anthropic/claude-3-opus")
+- `temperature`: Controls randomness (0.0-2.0, lower = more focused, higher = more creative)
+- `base_url`: Custom API endpoint for proxy servers or alternative endpoints
+
 #### 3. Build and Run with Compose
 
 The `docker-compose.yml` file in the project root provides a simplified approach that automatically handles architecture detection using buildx.
@@ -555,6 +581,101 @@ Crucially, when sending configurations directly via JSON, they **must** follow t
 **LLM Extraction Strategy** *(Keep example, ensure schema uses type/value wrapper)*
 *(Keep Deep Crawler Example)*
 
+### LLM Configuration Examples
+
+The Docker API supports dynamic LLM configuration through multiple levels:
+
+#### Temperature Control
+
+Temperature affects the randomness of LLM responses (0.0 = deterministic, 2.0 = very creative):
+
+```python
+import requests
+
+# Low temperature for factual extraction
+response = requests.post(
+    "http://localhost:11235/md",
+    json={
+        "url": "https://example.com",
+        "f": "llm",
+        "q": "Extract all dates and numbers from this page",
+        "temperature": 0.2  # Very focused, deterministic
+    }
+)
+
+# High temperature for creative tasks
+response = requests.post(
+    "http://localhost:11235/md",
+    json={
+        "url": "https://example.com", 
+        "f": "llm",
+        "q": "Write a creative summary of this content",
+        "temperature": 1.2  # More creative, varied responses
+    }
+)
+```
+
+#### Custom API Endpoints
+
+Use custom base URLs for proxy servers or alternative API endpoints:
+
+```python
+
+# Using a local LLM server
+response = requests.post(
+    "http://localhost:11235/md",
+    json={
+        "url": "https://example.com",
+        "f": "llm",
+        "q": "Extract key information",
+        "provider": "ollama/llama2",
+        "base_url": "http://localhost:11434/v1"
+    }
+)
+```
+
+#### Dynamic Provider Selection
+
+Switch between providers based on task requirements:
+
+```python
+async def smart_extraction(url: str, content_type: str):
+    """Select provider and temperature based on content type"""
+    
+    configs = {
+        "technical": {
+            "provider": "openai/gpt-4",
+            "temperature": 0.3,
+            "query": "Extract technical specifications and code examples"
+        },
+        "creative": {
+            "provider": "anthropic/claude-3-opus",
+            "temperature": 0.9,
+            "query": "Create an engaging narrative summary"
+        },
+        "quick": {
+            "provider": "groq/mixtral-8x7b",
+            "temperature": 0.5,
+            "query": "Quick summary in bullet points"
+        }
+    }
+    
+    config = configs.get(content_type, configs["quick"])
+    
+    response = await httpx.post(
+        "http://localhost:11235/md",
+        json={
+            "url": url,
+            "f": "llm",
+            "q": config["query"],
+            "provider": config["provider"],
+            "temperature": config["temperature"]
+        }
+    )
+    
+    return response.json()
+```
+
 ### REST API Examples
 
 Update URLs to use port `11235`.
@@ -694,6 +815,7 @@ app:
 llm:
   provider: "openai/gpt-4o-mini"  # Can be overridden by LLM_PROVIDER env var
   # api_key: sk-...  # If you pass the API key directly (not recommended)
+  # temperature and base_url are controlled via environment variables or request parameters
 
 # Redis Configuration (Used by internal Redis server managed by supervisord)
 redis:
diff --git a/tests/docker/test_llm_params.py b/tests/docker/test_llm_params.py
new file mode 100755
index 00000000..533c4482
--- /dev/null
+++ b/tests/docker/test_llm_params.py
@@ -0,0 +1,349 @@
+#!/usr/bin/env python3
+"""
+Test script for LLM temperature and base_url parameters in Crawl4AI Docker API.
+This demonstrates the new hierarchical configuration system:
+1. Request-level parameters (highest priority)
+2. Provider-specific environment variables
+3. Global environment variables
+4. System defaults (lowest priority)
+"""
+
+import asyncio
+import httpx
+import json
+import os
+from rich.console import Console
+from rich.panel import Panel
+from rich.syntax import Syntax
+from rich.table import Table
+
+
+console = Console()
+
+# Configuration
+BASE_URL = "http://localhost:11235"  # Docker API endpoint
+TEST_URL = "https://httpbin.org/html"     # Simple test page
+
+# --- Helper Functions ---
+
+async def check_server_health(client: httpx.AsyncClient) -> bool:
+    """Check if the server is healthy."""
+    console.print("[bold cyan]Checking server health...[/]", end="")
+    try:
+        response = await client.get("/health", timeout=10.0)
+        response.raise_for_status()
+        console.print(" [bold green]✓ Server is healthy![/]")
+        return True
+    except Exception as e:
+        console.print(f"\n[bold red]✗ Server health check failed: {e}[/]")
+        console.print(f"Is the server running at {BASE_URL}?")
+        return False
+
+def print_request(endpoint: str, payload: dict, title: str = "Request"):
+    """Pretty print the request."""
+    syntax = Syntax(json.dumps(payload, indent=2), "json", theme="monokai")
+    console.print(Panel.fit(
+        f"[cyan]POST {endpoint}[/cyan]\n{syntax}",
+        title=f"[bold blue]{title}[/]",
+        border_style="blue"
+    ))
+
+def print_response(response: dict, title: str = "Response"):
+    """Pretty print relevant parts of the response."""
+    # Extract only the relevant parts
+    relevant = {}
+    if "markdown" in response:
+        relevant["markdown"] = response["markdown"][:200] + "..." if len(response.get("markdown", "")) > 200 else response.get("markdown", "")
+    if "success" in response:
+        relevant["success"] = response["success"]
+    if "url" in response:
+        relevant["url"] = response["url"]
+    if "filter" in response:
+        relevant["filter"] = response["filter"]
+    
+    console.print(Panel.fit(
+        Syntax(json.dumps(relevant, indent=2), "json", theme="monokai"),
+        title=f"[bold green]{title}[/]",
+        border_style="green"
+    ))
+
+# --- Test Functions ---
+
+async def test_default_no_params(client: httpx.AsyncClient):
+    """Test 1: No temperature or base_url specified - uses defaults"""
+    console.rule("[bold yellow]Test 1: Default Configuration (No Parameters)[/]")
+    
+    payload = {
+        "url": TEST_URL,
+        "f": "llm",
+        "q": "What is the main heading of this page? Answer in exactly 5 words."
+    }
+    
+    print_request("/md", payload, "Request without temperature/base_url")
+    
+    try:
+        response = await client.post("/md", json=payload, timeout=30.0)
+        response.raise_for_status()
+        data = response.json()
+        print_response(data, "Response (using system defaults)")
+        console.print("[dim]→ This used system defaults or environment variables if set[/]")
+    except Exception as e:
+        console.print(f"[red]Error: {e}[/]")
+
+async def test_request_temperature(client: httpx.AsyncClient):
+    """Test 2: Request-level temperature (highest priority)"""
+    console.rule("[bold yellow]Test 2: Request-Level Temperature[/]")
+    
+    # Test with low temperature (more focused)
+    payload_low = {
+        "url": TEST_URL,
+        "f": "llm",
+        "q": "What is the main heading? Be creative and poetic.",
+        "temperature": 0.1  # Very low - should be less creative
+    }
+    
+    print_request("/md", payload_low, "Low Temperature (0.1)")
+    
+    try:
+        response = await client.post("/md", json=payload_low, timeout=30.0)
+        response.raise_for_status()
+        data_low = response.json()
+        print_response(data_low, "Response with Low Temperature")
+        console.print("[dim]→ Low temperature (0.1) should produce focused, less creative output[/]")
+    except Exception as e:
+        console.print(f"[red]Error: {e}[/]")
+    
+    console.print()
+    
+    # Test with high temperature (more creative)
+    payload_high = {
+        "url": TEST_URL,
+        "f": "llm",
+        "q": "What is the main heading? Be creative and poetic.",
+        "temperature": 1.5  # High - should be more creative
+    }
+    
+    print_request("/md", payload_high, "High Temperature (1.5)")
+    
+    try:
+        response = await client.post("/md", json=payload_high, timeout=30.0)
+        response.raise_for_status()
+        data_high = response.json()
+        print_response(data_high, "Response with High Temperature")
+        console.print("[dim]→ High temperature (1.5) should produce more creative, varied output[/]")
+    except Exception as e:
+        console.print(f"[red]Error: {e}[/]")
+
+async def test_provider_override(client: httpx.AsyncClient):
+    """Test 3: Provider override with temperature"""
+    console.rule("[bold yellow]Test 3: Provider Override with Temperature[/]")
+    
+    provider = "gemini/gemini-2.5-flash-lite"
+    payload = {
+        "url": TEST_URL,
+        "f": "llm",
+        "q": "Summarize this page in one sentence.",
+        "provider": provider,  # Explicitly set provider
+        "temperature": 0.7
+    }
+    
+    print_request("/md", payload, "Provider + Temperature Override")
+    
+    try:
+        response = await client.post("/md", json=payload, timeout=30.0)
+        response.raise_for_status()
+        data = response.json()
+        print_response(data, "Response with Provider Override")
+        console.print(f"[dim]→ This explicitly uses {provider} with temperature 0.7[/]")
+    except Exception as e:
+        console.print(f"[red]Error: {e}[/]")
+
+async def test_base_url_custom(client: httpx.AsyncClient):
+    """Test 4: Custom base_url (will fail unless you have a custom endpoint)"""
+    console.rule("[bold yellow]Test 4: Custom Base URL (Demo Only)[/]")
+    
+    payload = {
+        "url": TEST_URL,
+        "f": "llm",
+        "q": "What is this page about?",
+        "base_url": "https://api.custom-endpoint.com/v1",  # Custom endpoint
+        "temperature": 0.5
+    }
+    
+    print_request("/md", payload, "Custom Base URL Request")
+    console.print("[yellow]Note: This will fail unless you have a custom endpoint set up[/]")
+    
+    try:
+        response = await client.post("/md", json=payload, timeout=10.0)
+        response.raise_for_status()
+        data = response.json()
+        print_response(data, "Response from Custom Endpoint")
+    except httpx.HTTPStatusError as e:
+        console.print(f"[yellow]Expected failure (no custom endpoint): Status {e.response.status_code}[/]")
+    except Exception as e:
+        console.print(f"[yellow]Expected error: {e}[/]")
+
+async def test_llm_job_endpoint(client: httpx.AsyncClient):
+    """Test 5: Test the /llm/job endpoint with temperature and base_url"""
+    console.rule("[bold yellow]Test 5: LLM Job Endpoint with Parameters[/]")
+    
+    payload = {
+        "url": TEST_URL,
+        "q": "Extract the main title and any key information",
+        "temperature": 0.3,
+        # "base_url": "https://api.openai.com/v1"  # Optional
+    }
+    
+    print_request("/llm/job", payload, "LLM Job with Temperature")
+    
+    try:
+        # Submit the job
+        response = await client.post("/llm/job", json=payload, timeout=30.0)
+        response.raise_for_status()
+        job_data = response.json()
+        
+        if "task_id" in job_data:
+            task_id = job_data["task_id"]
+            console.print(f"[green]Job created with task_id: {task_id}[/]")
+            
+            # Poll for result (simplified - in production use proper polling)
+            await asyncio.sleep(3)
+            
+            status_response = await client.get(f"/llm/job/{task_id}")
+            status_data = status_response.json()
+            
+            if status_data.get("status") == "completed":
+                console.print("[green]Job completed successfully![/]")
+                if "result" in status_data:
+                    console.print(Panel.fit(
+                        Syntax(json.dumps(status_data["result"], indent=2), "json", theme="monokai"),
+                        title="Extraction Result",
+                        border_style="green"
+                    ))
+            else:
+                console.print(f"[yellow]Job status: {status_data.get('status', 'unknown')}[/]")
+        else:
+            console.print(f"[red]Unexpected response: {job_data}[/]")
+            
+    except Exception as e:
+        console.print(f"[red]Error: {e}[/]")
+
+
+async def test_llm_endpoint(client: httpx.AsyncClient):
+    """
+    Quick QA round-trip with /llm.
+    Asks a trivial question against SIMPLE_URL just to show wiring.
+    """
+    import time
+    import urllib.parse
+
+    page_url = "https://kidocode.com"
+    question = "What is the title of this page?"
+
+    enc = urllib.parse.quote_plus(page_url, safe="")
+    console.print(f"GET /llm/{enc}?q={question}")
+
+    try:
+        t0 = time.time()
+        resp = await client.get(f"/llm/{enc}", params={"q": question})
+        dt = time.time() - t0
+        console.print(
+            f"Response Status: [bold {'green' if resp.is_success else 'red'}]{resp.status_code}[/] (took {dt:.2f}s)")
+        resp.raise_for_status()
+        answer = resp.json().get("answer", "")
+        console.print(Panel(answer or "No answer returned",
+                      title="LLM answer", border_style="magenta", expand=False))
+    except Exception as e:
+        console.print(f"[bold red]Error hitting /llm:[/] {e}")
+
+
+async def show_environment_info():
+    """Display current environment configuration"""
+    console.rule("[bold cyan]Current Environment Configuration[/]")
+    
+    table = Table(title="LLM Environment Variables", show_header=True, header_style="bold magenta")
+    table.add_column("Variable", style="cyan", width=30)
+    table.add_column("Value", style="yellow")
+    table.add_column("Description", style="dim")
+    
+    env_vars = [
+        ("LLM_PROVIDER", "Global default provider"),
+        ("LLM_TEMPERATURE", "Global default temperature"),
+        ("LLM_BASE_URL", "Global custom API endpoint"),
+        ("OPENAI_API_KEY", "OpenAI API key"),
+        ("OPENAI_TEMPERATURE", "OpenAI-specific temperature"),
+        ("OPENAI_BASE_URL", "OpenAI-specific endpoint"),
+        ("ANTHROPIC_API_KEY", "Anthropic API key"),
+        ("ANTHROPIC_TEMPERATURE", "Anthropic-specific temperature"),
+        ("GROQ_API_KEY", "Groq API key"),
+        ("GROQ_TEMPERATURE", "Groq-specific temperature"),
+    ]
+    
+    for var, desc in env_vars:
+        value = os.environ.get(var, "[not set]")
+        if "API_KEY" in var and value != "[not set]":
+            # Mask API keys for security
+            value = value[:10] + "..." if len(value) > 10 else "***"
+        table.add_row(var, value, desc)
+    
+    console.print(table)
+    console.print()
+
+# --- Main Test Runner ---
+
+async def main():
+    """Run all tests"""
+    console.print(Panel.fit(
+        "[bold cyan]Crawl4AI LLM Parameters Test Suite[/]\n" +
+        "Testing temperature and base_url configuration hierarchy",
+        border_style="cyan"
+    ))
+    
+    # Show current environment
+    # await show_environment_info()
+    
+    # Create HTTP client
+    async with httpx.AsyncClient(base_url=BASE_URL, timeout=60.0) as client:
+        # Check server health
+        if not await check_server_health(client):
+            console.print("[red]Server is not available. Please ensure the Docker container is running.[/]")
+            return
+        
+        # Run tests
+        tests = [
+            ("Default Configuration", test_default_no_params),
+            ("Request Temperature", test_request_temperature),
+            ("Provider Override", test_provider_override),
+            ("Custom Base URL", test_base_url_custom),
+            ("LLM Job Endpoint", test_llm_job_endpoint),
+            ("LLM Endpoint", test_llm_endpoint),
+        ]
+        
+        for i, (name, test_func) in enumerate(tests, 1):
+            if i > 1:
+                console.print()  # Add spacing between tests
+            
+            try:
+                await test_func(client)
+            except Exception as e:
+                console.print(f"[red]Test '{name}' failed with error: {e}[/]")
+                console.print_exception(show_locals=False)
+        
+        console.rule("[bold green]All Tests Complete![/]", style="green")
+        
+        # Summary
+        console.print("\n[bold cyan]Configuration Hierarchy Summary:[/]")
+        console.print("1. [yellow]Request parameters[/] - Highest priority (temperature, base_url in API call)")
+        console.print("2. [yellow]Provider-specific env[/] - e.g., OPENAI_TEMPERATURE, GROQ_BASE_URL")
+        console.print("3. [yellow]Global env variables[/] - LLM_TEMPERATURE, LLM_BASE_URL")
+        console.print("4. [yellow]System defaults[/] - Lowest priority (provider/litellm defaults)")
+        console.print()
+
+if __name__ == "__main__":
+    try:
+        asyncio.run(main())
+    except KeyboardInterrupt:
+        console.print("\n[yellow]Tests interrupted by user.[/]")
+    except Exception as e:
+        console.print(f"\n[bold red]An error occurred:[/]")
+        console.print_exception(show_locals=False)
\ No newline at end of file