diff --git a/crawl4ai/async_configs.py b/crawl4ai/async_configs.py index ef31bf7d..cccfedee 100644 --- a/crawl4ai/async_configs.py +++ b/crawl4ai/async_configs.py @@ -1792,7 +1792,10 @@ class LLMConfig: frequency_penalty: Optional[float] = None, presence_penalty: Optional[float] = None, stop: Optional[List[str]] = None, - n: Optional[int] = None, + n: Optional[int] = None, + backoff_base_delay: Optional[int] = None, + backoff_max_attempts: Optional[int] = None, + backoff_exponential_factor: Optional[int] = None, ): """Configuaration class for LLM provider and API token.""" self.provider = provider @@ -1821,6 +1824,9 @@ class LLMConfig: self.presence_penalty = presence_penalty self.stop = stop self.n = n + self.backoff_base_delay = backoff_base_delay if backoff_base_delay is not None else 2 + self.backoff_max_attempts = backoff_max_attempts if backoff_max_attempts is not None else 3 + self.backoff_exponential_factor = backoff_exponential_factor if backoff_exponential_factor is not None else 2 @staticmethod def from_kwargs(kwargs: dict) -> "LLMConfig": @@ -1834,7 +1840,10 @@ class LLMConfig: frequency_penalty=kwargs.get("frequency_penalty"), presence_penalty=kwargs.get("presence_penalty"), stop=kwargs.get("stop"), - n=kwargs.get("n") + n=kwargs.get("n"), + backoff_base_delay=kwargs.get("backoff_base_delay"), + backoff_max_attempts=kwargs.get("backoff_max_attempts"), + backoff_exponential_factor=kwargs.get("backoff_exponential_factor") ) def to_dict(self): @@ -1848,7 +1857,10 @@ class LLMConfig: "frequency_penalty": self.frequency_penalty, "presence_penalty": self.presence_penalty, "stop": self.stop, - "n": self.n + "n": self.n, + "backoff_base_delay": self.backoff_base_delay, + "backoff_max_attempts": self.backoff_max_attempts, + "backoff_exponential_factor": self.backoff_exponential_factor } def clone(self, **kwargs): diff --git a/crawl4ai/content_filter_strategy.py b/crawl4ai/content_filter_strategy.py index 1e764f74..50baed27 100644 --- a/crawl4ai/content_filter_strategy.py +++ b/crawl4ai/content_filter_strategy.py @@ -980,6 +980,9 @@ class LLMContentFilter(RelevantContentFilter): prompt, api_token, base_url=base_url, + base_delay=self.llm_config.backoff_base_delay, + max_attempts=self.llm_config.backoff_max_attempts, + exponential_factor=self.llm_config.backoff_exponential_factor, extra_args=extra_args, ) diff --git a/crawl4ai/extraction_strategy.py b/crawl4ai/extraction_strategy.py index 4a64e5d4..7033e380 100644 --- a/crawl4ai/extraction_strategy.py +++ b/crawl4ai/extraction_strategy.py @@ -649,6 +649,9 @@ class LLMExtractionStrategy(ExtractionStrategy): base_url=self.llm_config.base_url, json_response=self.force_json_response, extra_args=self.extra_args, + base_delay=self.llm_config.backoff_base_delay, + max_attempts=self.llm_config.backoff_max_attempts, + exponential_factor=self.llm_config.backoff_exponential_factor ) # , json_response=self.extract_type == "schema") # Track usage usage = TokenUsage( @@ -846,6 +849,9 @@ class LLMExtractionStrategy(ExtractionStrategy): base_url=self.llm_config.base_url, json_response=self.force_json_response, extra_args=self.extra_args, + base_delay=self.llm_config.backoff_base_delay, + max_attempts=self.llm_config.backoff_max_attempts, + exponential_factor=self.llm_config.backoff_exponential_factor ) # Track usage usage = TokenUsage( diff --git a/crawl4ai/table_extraction.py b/crawl4ai/table_extraction.py index b2f1992b..7edb3b76 100644 --- a/crawl4ai/table_extraction.py +++ b/crawl4ai/table_extraction.py @@ -795,6 +795,9 @@ Return only a JSON array of extracted tables following the specified format.""" api_token=self.llm_config.api_token, base_url=self.llm_config.base_url, json_response=True, + base_delay=self.llm_config.backoff_base_delay, + max_attempts=self.llm_config.backoff_max_attempts, + exponential_factor=self.llm_config.backoff_exponential_factor, extra_args=self.extra_args ) @@ -1116,6 +1119,9 @@ Return only a JSON array of extracted tables following the specified format.""" api_token=self.llm_config.api_token, base_url=self.llm_config.base_url, json_response=True, + base_delay=self.llm_config.backoff_base_delay, + max_attempts=self.llm_config.backoff_max_attempts, + exponential_factor=self.llm_config.backoff_exponential_factor, extra_args=self.extra_args ) diff --git a/crawl4ai/utils.py b/crawl4ai/utils.py index 68a343fb..74216095 100644 --- a/crawl4ai/utils.py +++ b/crawl4ai/utils.py @@ -1745,6 +1745,9 @@ def perform_completion_with_backoff( api_token, json_response=False, base_url=None, + base_delay=2, + max_attempts=3, + exponential_factor=2, **kwargs, ): """ @@ -1761,6 +1764,9 @@ def perform_completion_with_backoff( api_token (str): The API token for authentication. json_response (bool): Whether to request a JSON response. Defaults to False. base_url (Optional[str]): The base URL for the API. Defaults to None. + base_delay (int): The base delay in seconds. Defaults to 2. + max_attempts (int): The maximum number of attempts. Defaults to 3. + exponential_factor (int): The exponential factor. Defaults to 2. **kwargs: Additional arguments for the API request. Returns: @@ -1770,9 +1776,6 @@ def perform_completion_with_backoff( from litellm import completion from litellm.exceptions import RateLimitError - max_attempts = 3 - base_delay = 2 # Base delay in seconds, you can adjust this based on your needs - extra_args = {"temperature": 0.01, "api_key": api_token, "base_url": base_url} if json_response: extra_args["response_format"] = {"type": "json_object"} @@ -1798,7 +1801,7 @@ def perform_completion_with_backoff( # Check if we have exhausted our max attempts if attempt < max_attempts - 1: # Calculate the delay and wait - delay = base_delay * (2**attempt) # Exponential backoff formula + delay = base_delay * (exponential_factor**attempt) # Exponential backoff formula print(f"Waiting for {delay} seconds before retrying...") time.sleep(delay) else: @@ -1831,6 +1834,9 @@ async def aperform_completion_with_backoff( api_token, json_response=False, base_url=None, + base_delay=2, + max_attempts=3, + exponential_factor=2, **kwargs, ): """ @@ -1847,6 +1853,9 @@ async def aperform_completion_with_backoff( api_token (str): The API token for authentication. json_response (bool): Whether to request a JSON response. Defaults to False. base_url (Optional[str]): The base URL for the API. Defaults to None. + base_delay (int): The base delay in seconds. Defaults to 2. + max_attempts (int): The maximum number of attempts. Defaults to 3. + exponential_factor (int): The exponential factor. Defaults to 2. **kwargs: Additional arguments for the API request. Returns: @@ -1857,9 +1866,6 @@ async def aperform_completion_with_backoff( from litellm.exceptions import RateLimitError import asyncio - max_attempts = 3 - base_delay = 2 # Base delay in seconds, you can adjust this based on your needs - extra_args = {"temperature": 0.01, "api_key": api_token, "base_url": base_url} if json_response: extra_args["response_format"] = {"type": "json_object"} @@ -1885,7 +1891,7 @@ async def aperform_completion_with_backoff( # Check if we have exhausted our max attempts if attempt < max_attempts - 1: # Calculate the delay and wait - delay = base_delay * (2**attempt) # Exponential backoff formula + delay = base_delay * (exponential_factor**attempt) # Exponential backoff formula print(f"Waiting for {delay} seconds before retrying...") await asyncio.sleep(delay) else: diff --git a/deploy/docker/api.py b/deploy/docker/api.py index 4fab27b1..81cd312a 100644 --- a/deploy/docker/api.py +++ b/deploy/docker/api.py @@ -108,7 +108,10 @@ async def handle_llm_qa( prompt_with_variables=prompt, api_token=get_llm_api_key(config), # Returns None to let litellm handle it temperature=get_llm_temperature(config), - base_url=get_llm_base_url(config) + base_url=get_llm_base_url(config), + base_delay=config["llm"].get("backoff_base_delay", 2), + max_attempts=config["llm"].get("backoff_max_attempts", 3), + exponential_factor=config["llm"].get("backoff_exponential_factor", 2) ) return response.choices[0].message.content diff --git a/docs/md_v2/api/parameters.md b/docs/md_v2/api/parameters.md index 41984ba5..9d907516 100644 --- a/docs/md_v2/api/parameters.md +++ b/docs/md_v2/api/parameters.md @@ -439,10 +439,19 @@ LLMConfig is useful to pass LLM provider config to strategies and functions that | **`provider`** | `"ollama/llama3","groq/llama3-70b-8192","groq/llama3-8b-8192", "openai/gpt-4o-mini" ,"openai/gpt-4o","openai/o1-mini","openai/o1-preview","openai/o3-mini","openai/o3-mini-high","anthropic/claude-3-haiku-20240307","anthropic/claude-3-opus-20240229","anthropic/claude-3-sonnet-20240229","anthropic/claude-3-5-sonnet-20240620","gemini/gemini-pro","gemini/gemini-1.5-pro","gemini/gemini-2.0-flash","gemini/gemini-2.0-flash-exp","gemini/gemini-2.0-flash-lite-preview-02-05","deepseek/deepseek-chat"`
*(default: `"openai/gpt-4o-mini"`)* | Which LLM provider to use. | **`api_token`** |1.Optional. When not provided explicitly, api_token will be read from environment variables based on provider. For example: If a gemini model is passed as provider then,`"GEMINI_API_KEY"` will be read from environment variables
2. API token of LLM provider
eg: `api_token = "gsk_1ClHGGJ7Lpn4WGybR7vNWGdyb3FY7zXEw3SCiy0BAVM9lL8CQv"`
3. Environment variable - use with prefix "env:"
eg:`api_token = "env: GROQ_API_KEY"` | API token to use for the given provider | **`base_url`** |Optional. Custom API endpoint | If your provider has a custom endpoint +| **`backoff_base_delay`** |Optional. `int` *(default: `2`)* | Seconds to wait before the first retry when the provider throttles a request. +| **`backoff_max_attempts`** |Optional. `int` *(default: `3`)* | Total tries (initial call + retries) before surfacing an error. +| **`backoff_exponential_factor`** |Optional. `int` *(default: `2`)* | Multiplier that increases the wait time for each retry (`delay = base_delay * factor^attempt`). ## 3.2 Example Usage ```python -llm_config = LLMConfig(provider="openai/gpt-4o-mini", api_token=os.getenv("OPENAI_API_KEY")) +llm_config = LLMConfig( + provider="openai/gpt-4o-mini", + api_token=os.getenv("OPENAI_API_KEY"), + backoff_base_delay=1, # optional + backoff_max_attempts=5, # optional + backoff_exponential_factor=3, # optional +) ``` ## 4. Putting It All Together diff --git a/docs/md_v2/complete-sdk-reference.md b/docs/md_v2/complete-sdk-reference.md index d4a5ba65..7e6abf5c 100644 --- a/docs/md_v2/complete-sdk-reference.md +++ b/docs/md_v2/complete-sdk-reference.md @@ -1593,8 +1593,20 @@ The `clone()` method: - Environment variable - use with prefix "env:"
eg:`api_token = "env: GROQ_API_KEY"` 3. **`base_url`**: - If your provider has a custom endpoint + +4. **Backoff controls** *(optional)*: + - `backoff_base_delay` *(default `2` seconds)* – how long to pause before the first retry if the provider rate-limits you. + - `backoff_max_attempts` *(default `3`)* – total tries for the same prompt (initial call + retries). + - `backoff_exponential_factor` *(default `2`)* – how quickly the pause grows between retries. A factor of 2 yields waits like 2s → 4s → 8s. + - Because these plug into Crawl4AI’s retry helper, every LLM strategy automatically follows the pacing you define here. ```python -llm_config = LLMConfig(provider="openai/gpt-4o-mini", api_token=os.getenv("OPENAI_API_KEY")) +llm_config = LLMConfig( + provider="openai/gpt-4o-mini", + api_token=os.getenv("OPENAI_API_KEY"), + backoff_base_delay=1, # optional + backoff_max_attempts=5, # optional + backoff_exponential_factor=3, # optional +) ``` ## 4. Putting It All Together In a typical scenario, you define **one** `BrowserConfig` for your crawler session, then create **one or more** `CrawlerRunConfig` & `LLMConfig` depending on each call's needs: diff --git a/docs/md_v2/core/browser-crawler-config.md b/docs/md_v2/core/browser-crawler-config.md index 5bee2368..a0e59fd0 100644 --- a/docs/md_v2/core/browser-crawler-config.md +++ b/docs/md_v2/core/browser-crawler-config.md @@ -308,8 +308,20 @@ The `clone()` method: 3.⠀**`base_url`**: - If your provider has a custom endpoint +4.⠀**Retry/backoff controls** *(optional)*: + - `backoff_base_delay` *(default `2` seconds)* – base delay inserted before the first retry when the provider returns a rate-limit response. + - `backoff_max_attempts` *(default `3`)* – total number of attempts (initial call plus retries) before the request is surfaced as an error. + - `backoff_exponential_factor` *(default `2`)* – growth rate for the retry delay (`delay = base_delay * factor^attempt`). + - These values are forwarded to the shared `perform_completion_with_backoff` helper, ensuring every strategy that consumes your `LLMConfig` honors the same throttling policy. + ```python -llm_config = LLMConfig(provider="openai/gpt-4o-mini", api_token=os.getenv("OPENAI_API_KEY")) +llm_config = LLMConfig( + provider="openai/gpt-4o-mini", + api_token=os.getenv("OPENAI_API_KEY"), + backoff_base_delay=1, # optional + backoff_max_attempts=5, # optional + backoff_exponential_factor=3, #optional +) ``` ## 4. Putting It All Together