Apply Ruff Corrections

2025-01-13 19:19:58 +08:00
parent c3370ec5da
commit 8ec12d7d68
84 changed files with 6861 additions and 5076 deletions
--- a/docs/examples/summarize_page.py
+++ b/docs/examples/summarize_page.py
@@ -1,39 +1,41 @@
 import os
-import time
 import json
 from crawl4ai.web_crawler import WebCrawler
 from crawl4ai.chunking_strategy import *
 from crawl4ai.extraction_strategy import *
 from crawl4ai.crawler_strategy import *

-url = r'https://marketplace.visualstudio.com/items?itemName=Unclecode.groqopilot'
+url = r"https://marketplace.visualstudio.com/items?itemName=Unclecode.groqopilot"

 crawler = WebCrawler()
 crawler.warmup()

 from pydantic import BaseModel, Field

+
 class PageSummary(BaseModel):
    title: str = Field(..., description="Title of the page.")
    summary: str = Field(..., description="Summary of the page.")
    brief_summary: str = Field(..., description="Brief summary of the page.")
    keywords: list = Field(..., description="Keywords assigned to the page.")

+
 result = crawler.run(
    url=url,
    word_count_threshold=1,
-    extraction_strategy= LLMExtractionStrategy(
-        provider= "openai/gpt-4o", api_token = os.getenv('OPENAI_API_KEY'), 
+    extraction_strategy=LLMExtractionStrategy(
+        provider="openai/gpt-4o",
+        api_token=os.getenv("OPENAI_API_KEY"),
        schema=PageSummary.model_json_schema(),
        extraction_type="schema",
-        apply_chunking =False,
-        instruction="From the crawled content, extract the following details: "\
-            "1. Title of the page "\
-            "2. Summary of the page, which is a detailed summary "\
-            "3. Brief summary of the page, which is a paragraph text "\
-            "4. Keywords assigned to the page, which is a list of keywords. "\
-            'The extracted JSON format should look like this: '\
-            '{ "title": "Page Title", "summary": "Detailed summary of the page.", "brief_summary": "Brief summary in a paragraph.", "keywords": ["keyword1", "keyword2", "keyword3"] }'
+        apply_chunking=False,
+        instruction="From the crawled content, extract the following details: "
+        "1. Title of the page "
+        "2. Summary of the page, which is a detailed summary "
+        "3. Brief summary of the page, which is a paragraph text "
+        "4. Keywords assigned to the page, which is a list of keywords. "
+        "The extracted JSON format should look like this: "
+        '{ "title": "Page Title", "summary": "Detailed summary of the page.", "brief_summary": "Brief summary in a paragraph.", "keywords": ["keyword1", "keyword2", "keyword3"] }',
    ),
    bypass_cache=True,
 )