(fix): Update document about LLM extraction strategy to use LLMConfig. REF #1146
This commit is contained in:
@@ -218,7 +218,7 @@ import json
|
|||||||
import asyncio
|
import asyncio
|
||||||
from typing import List
|
from typing import List
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode
|
from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode, LLMConfig
|
||||||
from crawl4ai.extraction_strategy import LLMExtractionStrategy
|
from crawl4ai.extraction_strategy import LLMExtractionStrategy
|
||||||
|
|
||||||
class Entity(BaseModel):
|
class Entity(BaseModel):
|
||||||
@@ -238,8 +238,8 @@ class KnowledgeGraph(BaseModel):
|
|||||||
async def main():
|
async def main():
|
||||||
# LLM extraction strategy
|
# LLM extraction strategy
|
||||||
llm_strat = LLMExtractionStrategy(
|
llm_strat = LLMExtractionStrategy(
|
||||||
llmConfig = LlmConfig(provider="openai/gpt-4", api_token=os.getenv('OPENAI_API_KEY')),
|
llmConfig = LLMConfig(provider="openai/gpt-4", api_token=os.getenv('OPENAI_API_KEY')),
|
||||||
schema=KnowledgeGraph.schema_json(),
|
schema=KnowledgeGraph.model_json_schema(),
|
||||||
extraction_type="schema",
|
extraction_type="schema",
|
||||||
instruction="Extract entities and relationships from the content. Return valid JSON.",
|
instruction="Extract entities and relationships from the content. Return valid JSON.",
|
||||||
chunk_token_threshold=1400,
|
chunk_token_threshold=1400,
|
||||||
@@ -258,6 +258,10 @@ async def main():
|
|||||||
url = "https://www.nbcnews.com/business"
|
url = "https://www.nbcnews.com/business"
|
||||||
result = await crawler.arun(url=url, config=crawl_config)
|
result = await crawler.arun(url=url, config=crawl_config)
|
||||||
|
|
||||||
|
print("--- LLM RAW RESPONSE ---")
|
||||||
|
print(result.extracted_content)
|
||||||
|
print("--- END LLM RAW RESPONSE ---")
|
||||||
|
|
||||||
if result.success:
|
if result.success:
|
||||||
with open("kb_result.json", "w", encoding="utf-8") as f:
|
with open("kb_result.json", "w", encoding="utf-8") as f:
|
||||||
f.write(result.extracted_content)
|
f.write(result.extracted_content)
|
||||||
|
|||||||
Reference in New Issue
Block a user