(fix): Update document about LLM extraction strategy to use LLMConfig. REF #1146

2025-06-03 12:53:59 +02:00
parent cc95d3abd4
commit fcc2abe4db
1 changed files with 7 additions and 3 deletions
--- a/docs/md_v2/extraction/llm-strategies.md
+++ b/docs/md_v2/extraction/llm-strategies.md
@@ -218,7 +218,7 @@ import json
 import asyncio
 from typing import List
 from pydantic import BaseModel, Field
-from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode
+from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode, LLMConfig
 from crawl4ai.extraction_strategy import LLMExtractionStrategy

 class Entity(BaseModel):
@@ -238,8 +238,8 @@ class KnowledgeGraph(BaseModel):
 async def main():
    # LLM extraction strategy
    llm_strat = LLMExtractionStrategy(
-        llmConfig = LlmConfig(provider="openai/gpt-4", api_token=os.getenv('OPENAI_API_KEY')),
-        schema=KnowledgeGraph.schema_json(),
+        llmConfig = LLMConfig(provider="openai/gpt-4", api_token=os.getenv('OPENAI_API_KEY')),
+        schema=KnowledgeGraph.model_json_schema(),
        extraction_type="schema",
        instruction="Extract entities and relationships from the content. Return valid JSON.",
        chunk_token_threshold=1400,
@@ -258,6 +258,10 @@ async def main():
        url = "https://www.nbcnews.com/business"
        result = await crawler.arun(url=url, config=crawl_config)

+        print("--- LLM RAW RESPONSE ---")
+        print(result.extracted_content)
+        print("--- END LLM RAW RESPONSE ---")
+
        if result.success:
            with open("kb_result.json", "w", encoding="utf-8") as f:
                f.write(result.extracted_content)