Fix #340 example llm_extraction (#358)

@Haopeng138 Thank you so much. They are still part of the library. I forgot to update them since I moved the asynchronous versions years ago. I really appreciate it. I have to say that I feel weak in the documentation. That's why I spent a lot of time on it last week. Now, when you mention some of the things in the example folder, I realize I forgot about the example folder. I'll try to update it more. If you find anything else, please help and support. Thank you. I will add your name to contributor name as well.
2024-12-24 12:56:07 +01:00
parent ed7bc1909c
commit bacbeb3ed4
1 changed files with 27 additions and 28 deletions
--- a/docs/examples/llm_extraction_openai_pricing.py
+++ b/docs/examples/llm_extraction_openai_pricing.py
@@ -1,23 +1,21 @@
 import os
 import time
 from crawl4ai.web_crawler import WebCrawler
 from crawl4ai.chunking_strategy import *
 from crawl4ai.extraction_strategy import *
 from crawl4ai.crawler_strategy import *
 import asyncio
 from pydantic import BaseModel, Field
 url = r'https://openai.com/api/pricing/'
 crawler = WebCrawler()
 crawler.warmup()
 from pydantic import BaseModel, Field
 class OpenAIModelFee(BaseModel):
    model_name: str = Field(..., description="Name of the OpenAI model.")
    input_fee: str = Field(..., description="Fee for input token for the OpenAI model.")
    output_fee: str = Field(..., description="Fee for output token for the OpenAI model.")
-result = crawler.run(
+from crawl4ai import AsyncWebCrawler
 async def main():
    # Use AsyncWebCrawler
    async with AsyncWebCrawler() as crawler:
        result = await crawler.arun(
            url=url,
            word_count_threshold=1,
            extraction_strategy= LLMExtractionStrategy(
@@ -30,12 +28,13 @@ result = crawler.run(
                            'One extracted model JSON format should look like this: ' \
                            '{ "model_name": "GPT-4", "input_fee": "US$10.00 / 1M tokens", "output_fee": "US$30.00 / 1M tokens" }'
            ),
-    bypass_cache=True,
+
        )
-
+        print("Success:", result.success)
        model_fees = json.loads(result.extracted_content)
        print(len(model_fees))
        with open(".data/data.json", "w", encoding="utf-8") as f:
            f.write(result.extracted_content)
 asyncio.run(main())