Merge branch 'next' into 2025-MAR-ALPHA-1

This commit is contained in:
Aravind Karnam
2025-03-13 10:42:22 +05:30
67 changed files with 4194 additions and 729 deletions

View File

@@ -76,7 +76,7 @@ Below is an overview of important LLM extraction parameters. All are typically s
```python
extraction_strategy = LLMExtractionStrategy(
llmConfig = LlmConfig(provider="openai/gpt-4", api_token="YOUR_OPENAI_KEY"),
llm_config = LLMConfig(provider="openai/gpt-4", api_token="YOUR_OPENAI_KEY"),
schema=MyModel.model_json_schema(),
extraction_type="schema",
instruction="Extract a list of items from the text with 'name' and 'price' fields.",
@@ -101,7 +101,7 @@ import asyncio
import json
from pydantic import BaseModel, Field
from typing import List
from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode, LlmConfig
from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode, LLMConfig
from crawl4ai.extraction_strategy import LLMExtractionStrategy
class Product(BaseModel):
@@ -111,7 +111,7 @@ class Product(BaseModel):
async def main():
# 1. Define the LLM extraction strategy
llm_strategy = LLMExtractionStrategy(
llmConfig = LlmConfig(provider="openai/gpt-4o-mini", api_token=os.getenv('OPENAI_API_KEY')),
llm_config = LLMConfig(provider="openai/gpt-4o-mini", api_token=os.getenv('OPENAI_API_KEY')),
schema=Product.schema_json(), # Or use model_json_schema()
extraction_type="schema",
instruction="Extract all product objects with 'name' and 'price' from the content.",