Feat/llm config (#724)

* feature: Add LlmConfig to easily configure and pass LLM configs to different strategies * pulled in next branch and resolved conflicts * feat: Add gemini and deepseek providers. Make ignore_cache in llm content filter to true by default to avoid confusions * Refactor: Update LlmConfig in LLMExtractionStrategy class and deprecate old params * updated tests, docs and readme
2025-02-21 13:11:37 +05:30
parent 3cb28875c3
commit 2af958e12c
25 changed files with 420 additions and 240 deletions
--- a/docs/examples/extraction_strategies_examples.py
+++ b/docs/examples/extraction_strategies_examples.py
@@ -11,6 +11,7 @@ import asyncio
 import os

 from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode
+from crawl4ai.async_configs import LlmConfig
 from crawl4ai.extraction_strategy import (
    LLMExtractionStrategy,
    JsonCssExtractionStrategy,
@@ -60,22 +61,19 @@ async def main():

    # 1. LLM Extraction with different input formats
    markdown_strategy = LLMExtractionStrategy(
-        provider="openai/gpt-4o-mini",
-        api_token=os.getenv("OPENAI_API_KEY"),
+        llmConfig = LlmConfig(provider="openai/gpt-4o-mini", api_token=os.getenv("OPENAI_API_KEY")),
        instruction="Extract product information including name, price, and description",
    )

    html_strategy = LLMExtractionStrategy(
        input_format="html",
-        provider="openai/gpt-4o-mini",
-        api_token=os.getenv("OPENAI_API_KEY"),
+        llmConfig=LlmConfig(provider="openai/gpt-4o-mini", api_token=os.getenv("OPENAI_API_KEY")),
        instruction="Extract product information from HTML including structured data",
    )

    fit_markdown_strategy = LLMExtractionStrategy(
        input_format="fit_markdown",
-        provider="openai/gpt-4o-mini",
-        api_token=os.getenv("OPENAI_API_KEY"),
+        llmConfig=LlmConfig(provider="openai/gpt-4o-mini",api_token=os.getenv("OPENAI_API_KEY")),
        instruction="Extract product information from cleaned markdown",
    )

--- a/docs/examples/llm_extraction_openai_pricing.py
+++ b/docs/examples/llm_extraction_openai_pricing.py
@@ -1,3 +1,4 @@
+from crawl4ai.async_configs import LlmConfig
 from crawl4ai.extraction_strategy import *
 from crawl4ai.crawler_strategy import *
 import asyncio
@@ -25,8 +26,7 @@ async def main():
            word_count_threshold=1,
            extraction_strategy=LLMExtractionStrategy(
                # provider= "openai/gpt-4o", api_token = os.getenv('OPENAI_API_KEY'),
-                provider="groq/llama-3.1-70b-versatile",
-                api_token=os.getenv("GROQ_API_KEY"),
+                llmConfig=LlmConfig(provider="groq/llama-3.1-70b-versatile", api_token=os.getenv("GROQ_API_KEY")),
                schema=OpenAIModelFee.model_json_schema(),
                extraction_type="schema",
                instruction="From the crawled content, extract all mentioned model names along with their "
--- a/docs/examples/llm_markdown_generator.py
+++ b/docs/examples/llm_markdown_generator.py
@@ -1,6 +1,7 @@
 import os
 import asyncio
 from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode
+from crawl4ai.async_configs import LlmConfig
 from crawl4ai.content_filter_strategy import LLMContentFilter

 async def test_llm_filter():
@@ -22,8 +23,7 @@ async def test_llm_filter():

        # Initialize LLM filter with focused instruction
        filter = LLMContentFilter(
-            provider="openai/gpt-4o",
-            api_token=os.getenv('OPENAI_API_KEY'),
+            llmConfig=LlmConfig(provider="openai/gpt-4o", api_token=os.getenv('OPENAI_API_KEY')),
            instruction="""
            Focus on extracting the core educational content about Python classes.
            Include:
@@ -43,8 +43,7 @@ async def test_llm_filter():
        )
        
        filter = LLMContentFilter(
-            provider="openai/gpt-4o",
-            api_token=os.getenv('OPENAI_API_KEY'),
+            llmConfig=LlmConfig(provider="openai/gpt-4o",api_token=os.getenv('OPENAI_API_KEY')),
            chunk_token_threshold=2 ** 12 * 2, # 2048 * 2
            ignore_cache = True,
            instruction="""
--- a/docs/examples/quickstart_async.config.py
+++ b/docs/examples/quickstart_async.config.py
@@ -1,5 +1,7 @@
 import os, sys

+from crawl4ai.async_configs import LlmConfig
+
 sys.path.append(
    os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 )
@@ -209,8 +211,7 @@ async def extract_structured_data_using_llm(
        word_count_threshold=1,
        page_timeout=80000,
        extraction_strategy=LLMExtractionStrategy(
-            provider=provider,
-            api_token=api_token,
+            llmConfig=LlmConfig(provider=provider,api_token=api_token),
            schema=OpenAIModelFee.model_json_schema(),
            extraction_type="schema",
            instruction="""From the crawled content, extract all mentioned model names along with their fees for input and output tokens. 
--- a/docs/examples/quickstart_async.py
+++ b/docs/examples/quickstart_async.py
@@ -1,5 +1,7 @@
 import os, sys

+from crawl4ai.async_configs import LlmConfig
+
 # append parent directory to system path
 sys.path.append(
    os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@@ -145,8 +147,7 @@ async def extract_structured_data_using_llm(
            url="https://openai.com/api/pricing/",
            word_count_threshold=1,
            extraction_strategy=LLMExtractionStrategy(
-                provider=provider,
-                api_token=api_token,
+                llmConfig=LlmConfig(provider=provider,api_token=api_token),
                schema=OpenAIModelFee.model_json_schema(),
                extraction_type="schema",
                instruction="""From the crawled content, extract all mentioned model names along with their fees for input and output tokens. 
@@ -569,8 +570,7 @@ async def generate_knowledge_graph():
        relationships: List[Relationship]

    extraction_strategy = LLMExtractionStrategy(
-        provider="openai/gpt-4o-mini",  # Or any other provider, including Ollama and open source models
-        api_token=os.getenv("OPENAI_API_KEY"),  # In case of Ollama just pass "no-token"
+        llmConfig=LlmConfig(provider="openai/gpt-4o-mini",  api_token=os.getenv("OPENAI_API_KEY")),  # In case of Ollama just pass "no-token"
        schema=KnowledgeGraph.model_json_schema(),
        extraction_type="schema",
        instruction="""Extract entities and relationships from the given text.""",
--- a/docs/examples/quickstart_sync.py
+++ b/docs/examples/quickstart_sync.py
@@ -1,5 +1,6 @@
 import os
 import time
+from crawl4ai.async_configs import LlmConfig
 from crawl4ai.web_crawler import WebCrawler
 from crawl4ai.chunking_strategy import *
 from crawl4ai.extraction_strategy import *
@@ -178,7 +179,7 @@ def add_llm_extraction_strategy(crawler):
    result = crawler.run(
        url="https://www.nbcnews.com/business",
        extraction_strategy=LLMExtractionStrategy(
-            provider="openai/gpt-4o", api_token=os.getenv("OPENAI_API_KEY")
+            llmConfig =  LlmConfig(provider="openai/gpt-4o", api_token=os.getenv("OPENAI_API_KEY"))
        ),
    )
    cprint(
@@ -197,8 +198,7 @@ def add_llm_extraction_strategy(crawler):
    result = crawler.run(
        url="https://www.nbcnews.com/business",
        extraction_strategy=LLMExtractionStrategy(
-            provider="openai/gpt-4o",
-            api_token=os.getenv("OPENAI_API_KEY"),
+            llmConfig=LlmConfig(provider="openai/gpt-4o",api_token=os.getenv("OPENAI_API_KEY")),
            instruction="I am interested in only financial news",
        ),
    )
@@ -210,8 +210,7 @@ def add_llm_extraction_strategy(crawler):
    result = crawler.run(
        url="https://www.nbcnews.com/business",
        extraction_strategy=LLMExtractionStrategy(
-            provider="openai/gpt-4o",
-            api_token=os.getenv("OPENAI_API_KEY"),
+            llmConfig=LlmConfig(provider="openai/gpt-4o",api_token=os.getenv("OPENAI_API_KEY")),
            instruction="Extract only content related to technology",
        ),
    )
--- a/docs/md_v2/api/parameters.md
+++ b/docs/md_v2/api/parameters.md
@@ -245,11 +245,32 @@ run_config = CrawlerRunConfig(
 )
 ```

-## 3. Putting It All Together
+# 3. **LlmConfig** - Setting up LLM providers
+LlmConfig is useful to pass LLM provider config to strategies and functions that rely on LLMs to do extraction, filtering, schema generation etc. Currently it can be used in the following -
+
+1. LLMExtractionStrategy
+2. LLMContentFilter
+3. JsonCssExtractionStrategy.generate_schema
+4. JsonXPathExtractionStrategy.generate_schema
+
+## 3.1 Parameters
+| **Parameter**         | **Type / Default**                     | **What It Does**                                                                                                                     |
+|-----------------------|----------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------|
+| **`provider`**    | `"ollama/llama3","groq/llama3-70b-8192","groq/llama3-8b-8192", "openai/gpt-4o-mini" ,"openai/gpt-4o","openai/o1-mini","openai/o1-preview","openai/o3-mini","openai/o3-mini-high","anthropic/claude-3-haiku-20240307","anthropic/claude-3-opus-20240229","anthropic/claude-3-sonnet-20240229","anthropic/claude-3-5-sonnet-20240620","gemini/gemini-pro","gemini/gemini-1.5-pro","gemini/gemini-2.0-flash","gemini/gemini-2.0-flash-exp","gemini/gemini-2.0-flash-lite-preview-02-05","deepseek/deepseek-chat"`<br/>*(default: `"openai/gpt-4o-mini"`)* | Which LLM provoder to use. 
+| **`api_token`**         |1.Optional. When not provided explicitly, api_token will be read from environment variables based on provider. For example: If a gemini model is passed as provider then,`"GEMINI_API_KEY"` will be read from environment variables  <br/> 2. API token of LLM provider <br/> eg: `api_token = "gsk_1ClHGGJ7Lpn4WGybR7vNWGdyb3FY7zXEw3SCiy0BAVM9lL8CQv"` <br/> 3. Environment variable - use with prefix "env:" <br/> eg:`api_token = "env: GROQ_API_KEY"`              | API token to use for the given provider 
+| **`base_url`**         |Optional. Custom API endpoint | If your provider has a custom endpoint
+
+## 3.2 Example Usage
+```python
+llmConfig = LlmConfig(provider="openai/gpt-4o-mini", api_token=os.getenv("OPENAI_API_KEY"))
+```
+
+## 4. Putting It All Together

 - **Use** `BrowserConfig` for **global** browser settings: engine, headless, proxy, user agent.  
 - **Use** `CrawlerRunConfig` for each crawl’s **context**: how to filter content, handle caching, wait for dynamic elements, or run JS.  
 - **Pass** both configs to `AsyncWebCrawler` (the `BrowserConfig`) and then to `arun()` (the `CrawlerRunConfig`).  
+- **Use** `LlmConfig` for LLM provider configurations that can be used across all extraction, filtering, schema generation tasks. Can be used in - `LLMExtractionStrategy`, `LLMContentFilter`, `JsonCssExtractionStrategy.generate_schema` & `JsonXPathExtractionStrategy.generate_schema`

 ```python
 # Create a modified copy with the clone() method
--- a/docs/md_v2/api/strategies.md
+++ b/docs/md_v2/api/strategies.md
@@ -131,6 +131,7 @@ OverlappingWindowChunking(
 ```python
 from pydantic import BaseModel
 from crawl4ai.extraction_strategy import LLMExtractionStrategy
+from crawl4ai.async_configs import LlmConfig

 # Define schema
 class Article(BaseModel):
@@ -140,7 +141,7 @@ class Article(BaseModel):

 # Create strategy
 strategy = LLMExtractionStrategy(
-    provider="ollama/llama2",
+    llmConfig = LlmConfig(provider="ollama/llama2"),
    schema=Article.schema(),
    instruction="Extract article details"
 )
@@ -197,6 +198,7 @@ result = await crawler.arun(

 ```python
 from crawl4ai.chunking_strategy import OverlappingWindowChunking
+from crawl4ai.async_configs import LlmConfig

 # Create chunking strategy
 chunker = OverlappingWindowChunking(
@@ -206,7 +208,7 @@ chunker = OverlappingWindowChunking(

 # Use with extraction strategy
 strategy = LLMExtractionStrategy(
-    provider="ollama/llama2",
+    llmConfig = LlmConfig(provider="ollama/llama2"),
    chunking_strategy=chunker
 )

--- a/docs/md_v2/core/browser-crawler-config.md
+++ b/docs/md_v2/core/browser-crawler-config.md
@@ -1,9 +1,10 @@
-# Browser & Crawler Configuration (Quick Overview)
+# Browser, Crawler & LLM Configuration (Quick Overview)

 Crawl4AI’s flexibility stems from two key classes:

 1. **`BrowserConfig`** – Dictates **how** the browser is launched and behaves (e.g., headless or visible, proxy, user agent).  
-2. **`CrawlerRunConfig`** – Dictates **how** each **crawl** operates (e.g., caching, extraction, timeouts, JavaScript code to run, etc.).
+2. **`CrawlerRunConfig`** – Dictates **how** each **crawl** operates (e.g., caching, extraction, timeouts, JavaScript code to run, etc.).  
+3. **`LlmConfig`** - Dictates **how** LLM providers are configured. (model, api token, base url, temperature etc.)

 In most examples, you create **one** `BrowserConfig` for the entire crawler session, then pass a **fresh** or re-used `CrawlerRunConfig` whenever you call `arun()`. This tutorial shows the most commonly used parameters. If you need advanced or rarely used fields, see the [Configuration Parameters](../api/parameters.md).

@@ -234,13 +235,37 @@ The `clone()` method:

 ---

-## 3. Putting It All Together

-In a typical scenario, you define **one** `BrowserConfig` for your crawler session, then create **one or more** `CrawlerRunConfig` depending on each call’s needs:
+
+
+
+## 3. LlmConfig Essentials
+
+### Key fields to note
+
+1. **`provider`**:  
+- Which LLM provoder to use. 
+- Possible values are `"ollama/llama3","groq/llama3-70b-8192","groq/llama3-8b-8192", "openai/gpt-4o-mini" ,"openai/gpt-4o","openai/o1-mini","openai/o1-preview","openai/o3-mini","openai/o3-mini-high","anthropic/claude-3-haiku-20240307","anthropic/claude-3-opus-20240229","anthropic/claude-3-sonnet-20240229","anthropic/claude-3-5-sonnet-20240620","gemini/gemini-pro","gemini/gemini-1.5-pro","gemini/gemini-2.0-flash","gemini/gemini-2.0-flash-exp","gemini/gemini-2.0-flash-lite-preview-02-05","deepseek/deepseek-chat"`<br/>*(default: `"openai/gpt-4o-mini"`)*
+
+2. **`api_token`**:  
+    - Optional. When not provided explicitly, api_token will be read from environment variables based on provider. For example: If a gemini model is passed as provider then,`"GEMINI_API_KEY"` will be read from environment variables  
+    - API token of LLM provider <br/> eg: `api_token = "gsk_1ClHGGJ7Lpn4WGybR7vNWGdyb3FY7zXEw3SCiy0BAVM9lL8CQv"`
+    - Environment variable - use with prefix "env:" <br/> eg:`api_token = "env: GROQ_API_KEY"`            
+
+3. **`base_url`**:  
+   - If your provider has a custom endpoint
+
+```python
+llmConfig = LlmConfig(provider="openai/gpt-4o-mini", api_token=os.getenv("OPENAI_API_KEY"))
+```
+
+## 4. Putting It All Together
+
+In a typical scenario, you define **one** `BrowserConfig` for your crawler session, then create **one or more** `CrawlerRunConfig` & `LlmConfig` depending on each call’s needs:

 ```python
 import asyncio
-from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode
+from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode, LlmConfig
 from crawl4ai.extraction_strategy import JsonCssExtractionStrategy

 async def main():
@@ -262,8 +287,39 @@ async def main():
    }
    extraction = JsonCssExtractionStrategy(schema)

-    # 3) Crawler run config: skip cache, use extraction
+    # 3) Example LLM content filtering
+
+    gemini_config = LlmConfig(
+        provider="gemini/gemini-1.5-pro" 
+        api_token = "env:GEMINI_API_TOKEN"
+    )
+
+    # Initialize LLM filter with specific instruction
+    filter = LLMContentFilter(
+        llmConfig=gemini_config,  # or your preferred provider
+        instruction="""
+        Focus on extracting the core educational content.
+        Include:
+        - Key concepts and explanations
+        - Important code examples
+        - Essential technical details
+        Exclude:
+        - Navigation elements
+        - Sidebars
+        - Footer content
+        Format the output as clean markdown with proper code blocks and headers.
+        """,
+        chunk_token_threshold=500,  # Adjust based on your needs
+        verbose=True
+    )
+
+    md_generator = DefaultMarkdownGenerator(
+    content_filter=filter,
+    options={"ignore_links": True}
+
+    # 4) Crawler run config: skip cache, use extraction
    run_conf = CrawlerRunConfig(
+        markdown_generator=md_generator,
        extraction_strategy=extraction,
        cache_mode=CacheMode.BYPASS,
    )
@@ -283,11 +339,11 @@ if __name__ == "__main__":

 ---

-## 4. Next Steps
+## 5. Next Steps

 For a **detailed list** of available parameters (including advanced ones), see:

- [BrowserConfig and CrawlerRunConfig Reference](../api/parameters.md)  
+- [BrowserConfig, CrawlerRunConfig & LlmConfig Reference](../api/parameters.md)  

 You can explore topics like:

@@ -298,11 +354,12 @@ You can explore topics like:

 ---

-## 5. Conclusion
+## 6. Conclusion

-**BrowserConfig** and **CrawlerRunConfig** give you straightforward ways to define:
+**BrowserConfig**, **CrawlerRunConfig** and **LlmConfig** give you straightforward ways to define:

 - **Which** browser to launch, how it should run, and any proxy or user agent needs.  
 - **How** each crawl should behave—caching, timeouts, JavaScript code, extraction strategies, etc.
+- **Which** LLM provider to use, api token, temperature and base url for custom endpoints

 Use them together for **clear, maintainable** code, and when you need more specialized behavior, check out the advanced parameters in the [reference docs](../api/parameters.md). Happy crawling!
--- a/docs/md_v2/core/content-selection.md
+++ b/docs/md_v2/core/content-selection.md
@@ -211,7 +211,7 @@ if __name__ == "__main__":
 import asyncio
 import json
 from pydantic import BaseModel, Field
-from crawl4ai import AsyncWebCrawler, CrawlerRunConfig
+from crawl4ai import AsyncWebCrawler, CrawlerRunConfig, LlmConfig
 from crawl4ai.extraction_strategy import LLMExtractionStrategy

 class ArticleData(BaseModel):
@@ -220,8 +220,7 @@ class ArticleData(BaseModel):

 async def main():
    llm_strategy = LLMExtractionStrategy(
-        provider="openai/gpt-4",
-        api_token="sk-YOUR_API_KEY",
+        llmConfig = LlmConfig(provider="openai/gpt-4",api_token="sk-YOUR_API_KEY")
        schema=ArticleData.schema(),
        extraction_type="schema",
        instruction="Extract 'headline' and a short 'summary' from the content."
--- a/docs/md_v2/core/markdown-generation.md
+++ b/docs/md_v2/core/markdown-generation.md
@@ -175,14 +175,13 @@ prune_filter = PruningContentFilter(
 For intelligent content filtering and high-quality markdown generation, you can use the **LLMContentFilter**. This filter leverages LLMs to generate relevant markdown while preserving the original content's meaning and structure:

 ```python
-from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
+from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, LlmConfig
 from crawl4ai.content_filter_strategy import LLMContentFilter

 async def main():
    # Initialize LLM filter with specific instruction
    filter = LLMContentFilter(
-        provider="openai/gpt-4o",  # or your preferred provider
-        api_token="your-api-token",  # or use environment variable
+        llmConfig = LlmConfig(provider="openai/gpt-4o",api_token="your-api-token"), #or use environment variable
        instruction="""
        Focus on extracting the core educational content.
        Include:
--- a/docs/md_v2/core/quickstart.md
+++ b/docs/md_v2/core/quickstart.md
@@ -128,6 +128,7 @@ Crawl4AI can also extract structured data (JSON) using CSS or XPath selectors. B

 ```python
 from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
+from crawl4ai.async_configs import LlmConfig

 # Generate a schema (one-time cost)
 html = "<div class='product'><h2>Gaming Laptop</h2><span class='price'>$999.99</span></div>"
@@ -135,15 +136,13 @@ html = "<div class='product'><h2>Gaming Laptop</h2><span class='price'>$999.99</
 # Using OpenAI (requires API token)
 schema = JsonCssExtractionStrategy.generate_schema(
    html,
-    provider="openai/gpt-4o",  # Default provider
-    api_token="your-openai-token"  # Required for OpenAI
+    llmConfig = LlmConfig(provider="openai/gpt-4o",api_token="your-openai-token")  # Required for OpenAI
 )

 # Or using Ollama (open source, no token needed)
 schema = JsonCssExtractionStrategy.generate_schema(
    html,
-    provider="ollama/llama3.3",  # Open source alternative
-    api_token=None  # Not needed for Ollama
+    llmConfig = LlmConfig(provider="ollama/llama3.3", api_token=None)  # Not needed for Ollama
 )

 # Use the schema for fast, repeated extractions
@@ -212,7 +211,7 @@ import os
 import json
 import asyncio
 from pydantic import BaseModel, Field
-from crawl4ai import AsyncWebCrawler, CrawlerRunConfig
+from crawl4ai import AsyncWebCrawler, CrawlerRunConfig, LlmConfig
 from crawl4ai.extraction_strategy import LLMExtractionStrategy

 class OpenAIModelFee(BaseModel):
@@ -242,8 +241,7 @@ async def extract_structured_data_using_llm(
        word_count_threshold=1,
        page_timeout=80000,
        extraction_strategy=LLMExtractionStrategy(
-            provider=provider,
-            api_token=api_token,
+            llmConfig = LlmConfig(provider=provider,api_token=api_token),
            schema=OpenAIModelFee.model_json_schema(),
            extraction_type="schema",
            instruction="""From the crawled content, extract all mentioned model names along with their fees for input and output tokens. 
@@ -259,12 +257,6 @@ async def extract_structured_data_using_llm(
        print(result.extracted_content)

 if __name__ == "__main__":
-    # Use ollama with llama3.3
-    # asyncio.run(
-    #     extract_structured_data_using_llm(
-    #         provider="ollama/llama3.3", api_token="no-token"
-    #     )
-    # )

    asyncio.run(
        extract_structured_data_using_llm(
--- a/docs/md_v2/extraction/llm-strategies.md
+++ b/docs/md_v2/extraction/llm-strategies.md
@@ -71,8 +71,7 @@ Below is an overview of important LLM extraction parameters. All are typically s

 ```python
 extraction_strategy = LLMExtractionStrategy(
-    provider="openai/gpt-4",
-    api_token="YOUR_OPENAI_KEY",
+    llmConfig = LlmConfig(provider="openai/gpt-4", api_token="YOUR_OPENAI_KEY"),
    schema=MyModel.model_json_schema(),
    extraction_type="schema",
    instruction="Extract a list of items from the text with 'name' and 'price' fields.",
@@ -97,7 +96,7 @@ import asyncio
 import json
 from pydantic import BaseModel, Field
 from typing import List
-from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode
+from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode, LlmConfig
 from crawl4ai.extraction_strategy import LLMExtractionStrategy

 class Product(BaseModel):
@@ -107,9 +106,8 @@ class Product(BaseModel):
 async def main():
    # 1. Define the LLM extraction strategy
    llm_strategy = LLMExtractionStrategy(
-        provider="openai/gpt-4o-mini",            # e.g. "ollama/llama2"
-        api_token=os.getenv('OPENAI_API_KEY'),
-        schema=Product.schema_json(),            # Or use model_json_schema()
+        llmConfig = LlmConfig(provider="openai/gpt-4o-mini", api_token=os.getenv('OPENAI_API_KEY')),
+        schema=Product.schema_json(), # Or use model_json_schema()
        extraction_type="schema",
        instruction="Extract all product objects with 'name' and 'price' from the content.",
        chunk_token_threshold=1000,
--- a/docs/md_v2/extraction/no-llm-strategies.md
+++ b/docs/md_v2/extraction/no-llm-strategies.md
@@ -415,6 +415,7 @@ The schema generator is available as a static method on both `JsonCssExtractionS

 ```python
 from crawl4ai.extraction_strategy import JsonCssExtractionStrategy, JsonXPathExtractionStrategy
+from crawl4ai.async_configs import LlmConfig

 # Sample HTML with product information
 html = """
@@ -433,17 +434,15 @@ html = """
 # Option 1: Using OpenAI (requires API token)
 css_schema = JsonCssExtractionStrategy.generate_schema(
    html,
-    schema_type="css",  # This is the default
-    provider="openai/gpt-4o",  # Default provider
-    api_token="your-openai-token"  # Required for OpenAI
+    schema_type="css", 
+    llmConfig = LlmConfig(provider="openai/gpt-4o",api_token="your-openai-token")
 )

 # Option 2: Using Ollama (open source, no token needed)
 xpath_schema = JsonXPathExtractionStrategy.generate_schema(
    html,
    schema_type="xpath",
-    provider="ollama/llama3.3",  # Open source alternative
-    api_token=None  # Not needed for Ollama
+    llmConfig = LlmConfig(provider="ollama/llama3.3", api_token=None)  # Not needed for Ollama
 )

 # Use the generated schema for fast, repeated extractions