diff --git a/docs/md_v2/extraction/llm-strategies.md b/docs/md_v2/extraction/llm-strategies.md index 9f6a6b3e..7c488094 100644 --- a/docs/md_v2/extraction/llm-strategies.md +++ b/docs/md_v2/extraction/llm-strategies.md @@ -218,7 +218,7 @@ import json import asyncio from typing import List from pydantic import BaseModel, Field -from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode +from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode, LLMConfig from crawl4ai.extraction_strategy import LLMExtractionStrategy class Entity(BaseModel): @@ -238,8 +238,8 @@ class KnowledgeGraph(BaseModel): async def main(): # LLM extraction strategy llm_strat = LLMExtractionStrategy( - llmConfig = LlmConfig(provider="openai/gpt-4", api_token=os.getenv('OPENAI_API_KEY')), - schema=KnowledgeGraph.schema_json(), + llmConfig = LLMConfig(provider="openai/gpt-4", api_token=os.getenv('OPENAI_API_KEY')), + schema=KnowledgeGraph.model_json_schema(), extraction_type="schema", instruction="Extract entities and relationships from the content. Return valid JSON.", chunk_token_threshold=1400, @@ -258,6 +258,10 @@ async def main(): url = "https://www.nbcnews.com/business" result = await crawler.arun(url=url, config=crawl_config) + print("--- LLM RAW RESPONSE ---") + print(result.extracted_content) + print("--- END LLM RAW RESPONSE ---") + if result.success: with open("kb_result.json", "w", encoding="utf-8") as f: f.write(result.extracted_content)