(fix): Update document about LLM extraction strategy to use LLMConfig. REF #1146

This commit is contained in:
ntohidi
2025-06-03 12:53:59 +02:00
parent cc95d3abd4
commit fcc2abe4db

View File

@@ -218,7 +218,7 @@ import json
import asyncio import asyncio
from typing import List from typing import List
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode, LLMConfig
from crawl4ai.extraction_strategy import LLMExtractionStrategy from crawl4ai.extraction_strategy import LLMExtractionStrategy
class Entity(BaseModel): class Entity(BaseModel):
@@ -238,8 +238,8 @@ class KnowledgeGraph(BaseModel):
async def main(): async def main():
# LLM extraction strategy # LLM extraction strategy
llm_strat = LLMExtractionStrategy( llm_strat = LLMExtractionStrategy(
llmConfig = LlmConfig(provider="openai/gpt-4", api_token=os.getenv('OPENAI_API_KEY')), llmConfig = LLMConfig(provider="openai/gpt-4", api_token=os.getenv('OPENAI_API_KEY')),
schema=KnowledgeGraph.schema_json(), schema=KnowledgeGraph.model_json_schema(),
extraction_type="schema", extraction_type="schema",
instruction="Extract entities and relationships from the content. Return valid JSON.", instruction="Extract entities and relationships from the content. Return valid JSON.",
chunk_token_threshold=1400, chunk_token_threshold=1400,
@@ -258,6 +258,10 @@ async def main():
url = "https://www.nbcnews.com/business" url = "https://www.nbcnews.com/business"
result = await crawler.arun(url=url, config=crawl_config) result = await crawler.arun(url=url, config=crawl_config)
print("--- LLM RAW RESPONSE ---")
print(result.extracted_content)
print("--- END LLM RAW RESPONSE ---")
if result.success: if result.success:
with open("kb_result.json", "w", encoding="utf-8") as f: with open("kb_result.json", "w", encoding="utf-8") as f:
f.write(result.extracted_content) f.write(result.extracted_content)