feat(docker): Add temperature and base_url parameters for LLM configuration. ref #1035

Implement hierarchical configuration for LLM parameters with support for:
  - Temperature control (0.0-2.0) to adjust response creativity
  - Custom base_url for proxy servers and alternative endpoints
  - 4-tier priority: request params > provider env > global env > defaults

  Add helper functions in utils.py, update API schemas and handlers,
  support environment variables (LLM_TEMPERATURE, OPENAI_TEMPERATURE, etc.),
  and provide comprehensive documentation with examples.
This commit is contained in:
ntohidi
2025-08-26 16:44:07 +08:00
parent 40ab287c90
commit 159207b86f
8 changed files with 603 additions and 23 deletions

View File

@@ -10,4 +10,23 @@ GEMINI_API_TOKEN=your_gemini_key_here
# Optional: Override the default LLM provider
# Examples: "openai/gpt-4", "anthropic/claude-3-opus", "deepseek/chat", etc.
# If not set, uses the provider specified in config.yml (default: openai/gpt-4o-mini)
# LLM_PROVIDER=anthropic/claude-3-opus
# LLM_PROVIDER=anthropic/claude-3-opus
# Optional: Global LLM temperature setting (0.0-2.0)
# Controls randomness in responses. Lower = more focused, Higher = more creative
# LLM_TEMPERATURE=0.7
# Optional: Global custom API base URL
# Use this to point to custom endpoints or proxy servers
# LLM_BASE_URL=https://api.custom.com/v1
# Optional: Provider-specific temperature overrides
# These take precedence over the global LLM_TEMPERATURE
# OPENAI_TEMPERATURE=0.5
# ANTHROPIC_TEMPERATURE=0.3
# GROQ_TEMPERATURE=0.8
# Optional: Provider-specific base URL overrides
# Use for provider-specific proxy endpoints
# OPENAI_BASE_URL=https://custom-openai.company.com/v1
# GROQ_BASE_URL=https://custom-groq.company.com/v1

View File

@@ -42,7 +42,9 @@ from utils import (
should_cleanup_task,
decode_redis_hash,
get_llm_api_key,
validate_llm_provider
validate_llm_provider,
get_llm_temperature,
get_llm_base_url
)
import psutil, time
@@ -96,7 +98,9 @@ async def handle_llm_qa(
response = perform_completion_with_backoff(
provider=config["llm"]["provider"],
prompt_with_variables=prompt,
api_token=get_llm_api_key(config) # Returns None to let litellm handle it
api_token=get_llm_api_key(config), # Returns None to let litellm handle it
temperature=get_llm_temperature(config),
base_url=get_llm_base_url(config)
)
return response.choices[0].message.content
@@ -115,7 +119,9 @@ async def process_llm_extraction(
instruction: str,
schema: Optional[str] = None,
cache: str = "0",
provider: Optional[str] = None
provider: Optional[str] = None,
temperature: Optional[float] = None,
base_url: Optional[str] = None
) -> None:
"""Process LLM extraction in background."""
try:
@@ -131,7 +137,9 @@ async def process_llm_extraction(
llm_strategy = LLMExtractionStrategy(
llm_config=LLMConfig(
provider=provider or config["llm"]["provider"],
api_token=api_key
api_token=api_key,
temperature=temperature or get_llm_temperature(config, provider),
base_url=base_url or get_llm_base_url(config, provider)
),
instruction=instruction,
schema=json.loads(schema) if schema else None,
@@ -178,7 +186,9 @@ async def handle_markdown_request(
query: Optional[str] = None,
cache: str = "0",
config: Optional[dict] = None,
provider: Optional[str] = None
provider: Optional[str] = None,
temperature: Optional[float] = None,
base_url: Optional[str] = None
) -> str:
"""Handle markdown generation requests."""
try:
@@ -204,6 +214,8 @@ async def handle_markdown_request(
llm_config=LLMConfig(
provider=provider or config["llm"]["provider"],
api_token=get_llm_api_key(config, provider), # Returns None to let litellm handle it
temperature=temperature or get_llm_temperature(config, provider),
base_url=base_url or get_llm_base_url(config, provider)
),
instruction=query or "Extract main content"
)
@@ -248,7 +260,9 @@ async def handle_llm_request(
schema: Optional[str] = None,
cache: str = "0",
config: Optional[dict] = None,
provider: Optional[str] = None
provider: Optional[str] = None,
temperature: Optional[float] = None,
api_base_url: Optional[str] = None
) -> JSONResponse:
"""Handle LLM extraction requests."""
base_url = get_base_url(request)
@@ -279,7 +293,9 @@ async def handle_llm_request(
cache,
base_url,
config,
provider
provider,
temperature,
api_base_url
)
except Exception as e:
@@ -324,7 +340,9 @@ async def create_new_task(
cache: str,
base_url: str,
config: dict,
provider: Optional[str] = None
provider: Optional[str] = None,
temperature: Optional[float] = None,
api_base_url: Optional[str] = None
) -> JSONResponse:
"""Create and initialize a new task."""
decoded_url = unquote(input_path)
@@ -349,7 +367,9 @@ async def create_new_task(
query,
schema,
cache,
provider
provider,
temperature,
api_base_url
)
return JSONResponse({

View File

@@ -37,6 +37,8 @@ class LlmJobPayload(BaseModel):
schema: Optional[str] = None
cache: bool = False
provider: Optional[str] = None
temperature: Optional[float] = None
base_url: Optional[str] = None
class CrawlJobPayload(BaseModel):
@@ -63,6 +65,8 @@ async def llm_job_enqueue(
cache=payload.cache,
config=_config,
provider=payload.provider,
temperature=payload.temperature,
api_base_url=payload.base_url,
)
@@ -72,7 +76,7 @@ async def llm_job_status(
task_id: str,
_td: Dict = Depends(lambda: _token_dep())
):
return await handle_task_status(_redis, task_id)
return await handle_task_status(_redis, task_id, base_url=str(request.base_url))
# ---------- CRAWL job -------------------------------------------------------

View File

@@ -16,6 +16,8 @@ class MarkdownRequest(BaseModel):
q: Optional[str] = Field(None, description="Query string used by BM25/LLM filters")
c: Optional[str] = Field("0", description="Cachebust / revision counter")
provider: Optional[str] = Field(None, description="LLM provider override (e.g., 'anthropic/claude-3-opus')")
temperature: Optional[float] = Field(None, description="LLM temperature override (0.0-2.0)")
base_url: Optional[str] = Field(None, description="LLM API base URL override")
class RawCode(BaseModel):

View File

@@ -241,7 +241,8 @@ async def get_markdown(
raise HTTPException(
400, "Invalid URL format. Must start with http://, https://, or for raw HTML (raw:, raw://)")
markdown = await handle_markdown_request(
body.url, body.f, body.q, body.c, config, body.provider
body.url, body.f, body.q, body.c, config, body.provider,
body.temperature, body.base_url
)
return JSONResponse({
"url": body.url,

View File

@@ -108,6 +108,69 @@ def validate_llm_provider(config: Dict, provider: Optional[str] = None) -> tuple
return True, ""
def get_llm_temperature(config: Dict, provider: Optional[str] = None) -> Optional[float]:
"""Get temperature setting based on the LLM provider.
Priority order:
1. Provider-specific environment variable (e.g., OPENAI_TEMPERATURE)
2. Global LLM_TEMPERATURE environment variable
3. None (to use litellm/provider defaults)
Args:
config: The application configuration dictionary
provider: Optional provider override (e.g., "openai/gpt-4")
Returns:
The temperature setting if configured, otherwise None
"""
# Check provider-specific temperature first
if provider:
provider_name = provider.split('/')[0].upper()
provider_temp = os.environ.get(f"{provider_name}_TEMPERATURE")
if provider_temp:
try:
return float(provider_temp)
except ValueError:
logging.warning(f"Invalid temperature value for {provider_name}: {provider_temp}")
# Check global LLM_TEMPERATURE
global_temp = os.environ.get("LLM_TEMPERATURE")
if global_temp:
try:
return float(global_temp)
except ValueError:
logging.warning(f"Invalid global temperature value: {global_temp}")
# Return None to use litellm/provider defaults
return None
def get_llm_base_url(config: Dict, provider: Optional[str] = None) -> Optional[str]:
"""Get base URL setting based on the LLM provider.
Priority order:
1. Provider-specific environment variable (e.g., OPENAI_BASE_URL)
2. Global LLM_BASE_URL environment variable
3. None (to use default endpoints)
Args:
config: The application configuration dictionary
provider: Optional provider override (e.g., "openai/gpt-4")
Returns:
The base URL if configured, otherwise None
"""
# Check provider-specific base URL first
if provider:
provider_name = provider.split('/')[0].upper()
provider_url = os.environ.get(f"{provider_name}_BASE_URL")
if provider_url:
return provider_url
# Check global LLM_BASE_URL
return os.environ.get("LLM_BASE_URL")
def verify_email_domain(email: str) -> bool:
try:
domain = email.split('@')[1]