Apply Ruff Corrections

2025-01-13 19:19:58 +08:00
parent c3370ec5da
commit 8ec12d7d68
84 changed files with 6861 additions and 5076 deletions
--- a/docs/examples/amazon_product_extraction_direct_url.py
+++ b/docs/examples/amazon_product_extraction_direct_url.py
@@ -9,13 +9,11 @@ from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
 from crawl4ai.async_configs import BrowserConfig, CrawlerRunConfig
 import json

+
 async def extract_amazon_products():
    # Initialize browser config
-    browser_config = BrowserConfig(
-        browser_type="chromium",
-        headless=True
-    )
-    
+    browser_config = BrowserConfig(browser_type="chromium", headless=True)
+
    # Initialize crawler config with JSON CSS extraction strategy
    crawler_config = CrawlerRunConfig(
        extraction_strategy=JsonCssExtractionStrategy(
@@ -27,74 +25,70 @@ async def extract_amazon_products():
                        "name": "asin",
                        "selector": "",
                        "type": "attribute",
-                        "attribute": "data-asin"
-                    },
-                    {
-                        "name": "title",
-                        "selector": "h2 a span",
-                        "type": "text"
+                        "attribute": "data-asin",
                    },
+                    {"name": "title", "selector": "h2 a span", "type": "text"},
                    {
                        "name": "url",
                        "selector": "h2 a",
                        "type": "attribute",
-                        "attribute": "href"
+                        "attribute": "href",
                    },
                    {
                        "name": "image",
                        "selector": ".s-image",
                        "type": "attribute",
-                        "attribute": "src"
+                        "attribute": "src",
                    },
                    {
                        "name": "rating",
                        "selector": ".a-icon-star-small .a-icon-alt",
-                        "type": "text"
+                        "type": "text",
                    },
                    {
                        "name": "reviews_count",
                        "selector": "[data-csa-c-func-deps='aui-da-a-popover'] ~ span span",
-                        "type": "text"
+                        "type": "text",
                    },
                    {
                        "name": "price",
                        "selector": ".a-price .a-offscreen",
-                        "type": "text"
+                        "type": "text",
                    },
                    {
                        "name": "original_price",
                        "selector": ".a-price.a-text-price .a-offscreen",
-                        "type": "text"
+                        "type": "text",
                    },
                    {
                        "name": "sponsored",
                        "selector": ".puis-sponsored-label-text",
-                        "type": "exists"
+                        "type": "exists",
                    },
                    {
                        "name": "delivery_info",
                        "selector": "[data-cy='delivery-recipe'] .a-color-base",
                        "type": "text",
-                        "multiple": True
-                    }
-                ]
+                        "multiple": True,
+                    },
+                ],
            }
        )
    )

    # Example search URL (you should replace with your actual Amazon URL)
    url = "https://www.amazon.com/s?k=Samsung+Galaxy+Tab"
-    
+
    # Use context manager for proper resource handling
    async with AsyncWebCrawler(config=browser_config) as crawler:
        # Extract the data
        result = await crawler.arun(url=url, config=crawler_config)
-        
+
        # Process and print the results
        if result and result.extracted_content:
            # Parse the JSON string into a list of products
            products = json.loads(result.extracted_content)
-            
+
            # Process each product in the list
            for product in products:
                print("\nProduct Details:")
@@ -105,10 +99,12 @@ async def extract_amazon_products():
                print(f"Rating: {product.get('rating')}")
                print(f"Reviews: {product.get('reviews_count')}")
                print(f"Sponsored: {'Yes' if product.get('sponsored') else 'No'}")
-                if product.get('delivery_info'):
+                if product.get("delivery_info"):
                    print(f"Delivery: {' '.join(product['delivery_info'])}")
                print("-" * 80)

+
 if __name__ == "__main__":
    import asyncio
+
    asyncio.run(extract_amazon_products())