feat: make LLM backoff configurable end-to-end

- extend LLMConfig with backoff delay/attempt/factor fields and thread them through LLMExtractionStrategy, LLMContentFilter, table extraction, and Docker API handlers - expose the backoff parameter knobs on perform_completion_with_backoff/aperform_completion_with_backoff and document them in the md_v2 guides
2025-11-28 18:50:04 +05:30
parent b36c6daa5c
commit 7a133e22cc
9 changed files with 84 additions and 15 deletions
--- a/deploy/docker/api.py
+++ b/deploy/docker/api.py
@@ -108,7 +108,10 @@ async def handle_llm_qa(
            prompt_with_variables=prompt,
            api_token=get_llm_api_key(config),  # Returns None to let litellm handle it
            temperature=get_llm_temperature(config),
-            base_url=get_llm_base_url(config)
+            base_url=get_llm_base_url(config),
+            base_delay=config["llm"].get("backoff_base_delay", 2),
+            max_attempts=config["llm"].get("backoff_max_attempts", 3),
+            exponential_factor=config["llm"].get("backoff_exponential_factor", 2)
        )

        return response.choices[0].message.content