(fix): Update document about LLM extraction strategy to use LLMConfig. REF #1146
This commit is contained in:
@@ -218,7 +218,7 @@ import json
|
||||
import asyncio
|
||||
from typing import List
|
||||
from pydantic import BaseModel, Field
|
||||
from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode
|
||||
from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode, LLMConfig
|
||||
from crawl4ai.extraction_strategy import LLMExtractionStrategy
|
||||
|
||||
class Entity(BaseModel):
|
||||
@@ -238,8 +238,8 @@ class KnowledgeGraph(BaseModel):
|
||||
async def main():
|
||||
# LLM extraction strategy
|
||||
llm_strat = LLMExtractionStrategy(
|
||||
llmConfig = LlmConfig(provider="openai/gpt-4", api_token=os.getenv('OPENAI_API_KEY')),
|
||||
schema=KnowledgeGraph.schema_json(),
|
||||
llmConfig = LLMConfig(provider="openai/gpt-4", api_token=os.getenv('OPENAI_API_KEY')),
|
||||
schema=KnowledgeGraph.model_json_schema(),
|
||||
extraction_type="schema",
|
||||
instruction="Extract entities and relationships from the content. Return valid JSON.",
|
||||
chunk_token_threshold=1400,
|
||||
@@ -258,6 +258,10 @@ async def main():
|
||||
url = "https://www.nbcnews.com/business"
|
||||
result = await crawler.arun(url=url, config=crawl_config)
|
||||
|
||||
print("--- LLM RAW RESPONSE ---")
|
||||
print(result.extracted_content)
|
||||
print("--- END LLM RAW RESPONSE ---")
|
||||
|
||||
if result.success:
|
||||
with open("kb_result.json", "w", encoding="utf-8") as f:
|
||||
f.write(result.extracted_content)
|
||||
|
||||
Reference in New Issue
Block a user