Feat/llm config (#724)

* feature: Add LlmConfig to easily configure and pass LLM configs to different strategies

* pulled in next branch and resolved conflicts

* feat: Add gemini and deepseek providers. Make ignore_cache in llm content filter to true by default to avoid confusions

* Refactor: Update LlmConfig in LLMExtractionStrategy class and deprecate old params

* updated tests, docs and readme
This commit is contained in:
Aravind
2025-02-21 13:11:37 +05:30
committed by GitHub
parent 3cb28875c3
commit 2af958e12c
25 changed files with 420 additions and 240 deletions

View File

@@ -131,6 +131,7 @@ OverlappingWindowChunking(
```python
from pydantic import BaseModel
from crawl4ai.extraction_strategy import LLMExtractionStrategy
from crawl4ai.async_configs import LlmConfig
# Define schema
class Article(BaseModel):
@@ -140,7 +141,7 @@ class Article(BaseModel):
# Create strategy
strategy = LLMExtractionStrategy(
provider="ollama/llama2",
llmConfig = LlmConfig(provider="ollama/llama2"),
schema=Article.schema(),
instruction="Extract article details"
)
@@ -197,6 +198,7 @@ result = await crawler.arun(
```python
from crawl4ai.chunking_strategy import OverlappingWindowChunking
from crawl4ai.async_configs import LlmConfig
# Create chunking strategy
chunker = OverlappingWindowChunking(
@@ -206,7 +208,7 @@ chunker = OverlappingWindowChunking(
# Use with extraction strategy
strategy = LLMExtractionStrategy(
provider="ollama/llama2",
llmConfig = LlmConfig(provider="ollama/llama2"),
chunking_strategy=chunker
)