Feat/llm config (#724)

* feature: Add LlmConfig to easily configure and pass LLM configs to different strategies * pulled in next branch and resolved conflicts * feat: Add gemini and deepseek providers. Make ignore_cache in llm content filter to true by default to avoid confusions * Refactor: Update LlmConfig in LLMExtractionStrategy class and deprecate old params * updated tests, docs and readme
2025-02-21 13:11:37 +05:30
parent 3cb28875c3
commit 2af958e12c
25 changed files with 420 additions and 240 deletions
--- a/docs/md_v2/core/content-selection.md
+++ b/docs/md_v2/core/content-selection.md
@@ -211,7 +211,7 @@ if __name__ == "__main__":
 import asyncio
 import json
 from pydantic import BaseModel, Field
-from crawl4ai import AsyncWebCrawler, CrawlerRunConfig
+from crawl4ai import AsyncWebCrawler, CrawlerRunConfig, LlmConfig
 from crawl4ai.extraction_strategy import LLMExtractionStrategy

 class ArticleData(BaseModel):
@@ -220,8 +220,7 @@ class ArticleData(BaseModel):

 async def main():
    llm_strategy = LLMExtractionStrategy(
-        provider="openai/gpt-4",
-        api_token="sk-YOUR_API_KEY",
+        llmConfig = LlmConfig(provider="openai/gpt-4",api_token="sk-YOUR_API_KEY")
        schema=ArticleData.schema(),
        extraction_type="schema",
        instruction="Extract 'headline' and a short 'summary' from the content."