From f51b0780423ceadd626f2b747bbf291cd0587dfa Mon Sep 17 00:00:00 2001
From: unclecode <unclecode@kidocode.com>
Date: Mon, 24 Jun 2024 22:54:29 +0800
Subject: [PATCH] Update reame example.

---
 README.md | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index ebea270c..39f7cc9c 100644
--- a/README.md
+++ b/README.md
@@ -60,19 +60,30 @@ Crawl all OpenAI models and their fees from the official page.
 import os
 from crawl4ai import WebCrawler
 from crawl4ai.extraction_strategy import LLMExtractionStrategy
+from pydantic import BaseModel, Field
+
+class OpenAIModelFee(BaseModel):
+    model_name: str = Field(..., description="Name of the OpenAI model.")
+    input_fee: str = Field(..., description="Fee for input token for the OpenAI model.")
+    output_fee: str = Field(..., description="Fee for output token ßfor the OpenAI model.")
 
 url = 'https://openai.com/api/pricing/'
 crawler = WebCrawler()
 crawler.warmup()
 
 result = crawler.run(
-    url=url,
-    extraction_strategy=LLMExtractionStrategy(
-        provider="openai/gpt-4",
-        api_token=os.getenv('OPENAI_API_KEY'),
-        instruction="Extract all model names and their fees for input and output tokens."
-    ),
-)
+        url=url,
+        word_count_threshold=1,
+        extraction_strategy= LLMExtractionStrategy(
+            provider= "openai/gpt-4o", api_token = os.getenv('OPENAI_API_KEY'), 
+            schema=OpenAIModelFee.schema(),
+            extraction_type="schema",
+            instruction="""From the crawled content, extract all mentioned model names along with their fees for input and output tokens. 
+            Do not miss any models in the entire content. One extracted model JSON format should look like this: 
+            {"model_name": "GPT-4", "input_fee": "US$10.00 / 1M tokens", "output_fee": "US$30.00 / 1M tokens"}."""
+        ),            
+        bypass_cache=True,
+    )
 
 print(result.extracted_content)
 ```