Apply Ruff Corrections

2025-01-13 19:19:58 +08:00
parent c3370ec5da
commit 8ec12d7d68
84 changed files with 6861 additions and 5076 deletions
--- a/tests/async/test_0.4.2_config_params.py
+++ b/tests/async/test_0.4.2_config_params.py
@@ -1,15 +1,16 @@
 import os, sys
+
 parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 sys.path.append(parent_dir)
 __location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))

 import asyncio
 from crawl4ai import AsyncWebCrawler, CacheMode
-from crawl4ai.async_configs import BrowserConfig, CrawlerRunConfig      
+from crawl4ai.async_configs import BrowserConfig, CrawlerRunConfig
 from crawl4ai.content_filter_strategy import PruningContentFilter
 from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
 from crawl4ai.chunking_strategy import RegexChunking
-from crawl4ai.markdown_generation_strategy import DefaultMarkdownGenerator
+

 # Category 1: Browser Configuration Tests
 async def test_browser_config_object():
@@ -21,29 +22,31 @@ async def test_browser_config_object():
        viewport_height=1080,
        use_managed_browser=True,
        user_agent_mode="random",
-        user_agent_generator_config={"device_type": "desktop", "os_type": "windows"}
+        user_agent_generator_config={"device_type": "desktop", "os_type": "windows"},
    )
-    
+
    async with AsyncWebCrawler(config=browser_config, verbose=True) as crawler:
-        result = await crawler.arun('https://example.com', cache_mode=CacheMode.BYPASS)
+        result = await crawler.arun("https://example.com", cache_mode=CacheMode.BYPASS)
        assert result.success, "Browser config crawl failed"
        assert len(result.html) > 0, "No HTML content retrieved"

+
 async def test_browser_performance_config():
    """Test browser configurations focused on performance"""
    browser_config = BrowserConfig(
        text_mode=True,
        light_mode=True,
-        extra_args=['--disable-gpu', '--disable-software-rasterizer'],
+        extra_args=["--disable-gpu", "--disable-software-rasterizer"],
        ignore_https_errors=True,
-        java_script_enabled=False
+        java_script_enabled=False,
    )
-    
+
    async with AsyncWebCrawler(config=browser_config) as crawler:
-        result = await crawler.arun('https://example.com')
+        result = await crawler.arun("https://example.com")
        assert result.success, "Performance optimized crawl failed"
        assert result.status_code == 200, "Unexpected status code"

+
 # Category 2: Content Processing Tests
 async def test_content_extraction_config():
    """Test content extraction with various strategies"""
@@ -53,24 +56,20 @@ async def test_content_extraction_config():
            schema={
                "name": "article",
                "baseSelector": "div",
-                "fields": [{
-                    "name": "title",
-                    "selector": "h1",
-                    "type": "text"
-                }]
+                "fields": [{"name": "title", "selector": "h1", "type": "text"}],
            }
        ),
        chunking_strategy=RegexChunking(),
-        content_filter=PruningContentFilter()
+        content_filter=PruningContentFilter(),
    )
-    
+
    async with AsyncWebCrawler() as crawler:
        result = await crawler.arun(
-            'https://example.com/article',
-            config=crawler_config
+            "https://example.com/article", config=crawler_config
        )
        assert result.extracted_content is not None, "Content extraction failed"
-        assert 'title' in result.extracted_content, "Missing expected content field"
+        assert "title" in result.extracted_content, "Missing expected content field"
+

 # Category 3: Cache and Session Management Tests
 async def test_cache_and_session_management():
@@ -79,25 +78,20 @@ async def test_cache_and_session_management():
    crawler_config = CrawlerRunConfig(
        cache_mode=CacheMode.WRITE_ONLY,
        process_iframes=True,
-        remove_overlay_elements=True
+        remove_overlay_elements=True,
    )
-    
+
    async with AsyncWebCrawler(config=browser_config) as crawler:
        # First request - should write to cache
-        result1 = await crawler.arun(
-            'https://example.com',
-            config=crawler_config
-        )
-        
+        result1 = await crawler.arun("https://example.com", config=crawler_config)
+
        # Second request - should use fresh fetch due to WRITE_ONLY mode
-        result2 = await crawler.arun(
-            'https://example.com',
-            config=crawler_config
-        )
-        
+        result2 = await crawler.arun("https://example.com", config=crawler_config)
+
        assert result1.success and result2.success, "Cache mode crawl failed"
        assert result1.html == result2.html, "Inconsistent results between requests"

+
 # Category 4: Media Handling Tests
 async def test_media_handling_config():
    """Test configurations related to media handling"""
@@ -107,24 +101,22 @@ async def test_media_handling_config():
        viewport_width=1920,
        viewport_height=1080,
        accept_downloads=True,
-        downloads_path= os.path.expanduser("~/.crawl4ai/downloads")
+        downloads_path=os.path.expanduser("~/.crawl4ai/downloads"),
    )
    crawler_config = CrawlerRunConfig(
        screenshot=True,
        pdf=True,
        adjust_viewport_to_content=True,
        wait_for_images=True,
-        screenshot_height_threshold=20000
+        screenshot_height_threshold=20000,
    )
-    
+
    async with AsyncWebCrawler(config=browser_config) as crawler:
-        result = await crawler.arun(
-            'https://example.com',
-            config=crawler_config
-        )
+        result = await crawler.arun("https://example.com", config=crawler_config)
        assert result.screenshot is not None, "Screenshot capture failed"
        assert result.pdf is not None, "PDF generation failed"

+
 # Category 5: Anti-Bot and Site Interaction Tests
 async def test_antibot_config():
    """Test configurations for handling anti-bot measures"""
@@ -135,76 +127,64 @@ async def test_antibot_config():
        wait_for="js:()=>document.querySelector('body')",
        delay_before_return_html=1.0,
        log_console=True,
-        cache_mode=CacheMode.BYPASS
+        cache_mode=CacheMode.BYPASS,
    )
-    
+
    async with AsyncWebCrawler() as crawler:
-        result = await crawler.arun(
-            'https://example.com',
-            config=crawler_config
-        )
+        result = await crawler.arun("https://example.com", config=crawler_config)
        assert result.success, "Anti-bot measure handling failed"

+
 # Category 6: Parallel Processing Tests
 async def test_parallel_processing():
    """Test parallel processing capabilities"""
-    crawler_config = CrawlerRunConfig(
-        mean_delay=0.5,
-        max_range=1.0,
-        semaphore_count=5
-    )
-    
-    urls = [
-        'https://example.com/1',
-        'https://example.com/2',
-        'https://example.com/3'
-    ]
-    
+    crawler_config = CrawlerRunConfig(mean_delay=0.5, max_range=1.0, semaphore_count=5)
+
+    urls = ["https://example.com/1", "https://example.com/2", "https://example.com/3"]
+
    async with AsyncWebCrawler() as crawler:
-        results = await crawler.arun_many(
-            urls,
-            config=crawler_config
-        )
+        results = await crawler.arun_many(urls, config=crawler_config)
        assert len(results) == len(urls), "Not all URLs were processed"
        assert all(r.success for r in results), "Some parallel requests failed"

+
 # Category 7: Backwards Compatibility Tests
 async def test_legacy_parameter_support():
    """Test that legacy parameters still work"""
    async with AsyncWebCrawler(
-        headless=True,
-        browser_type="chromium",
-        viewport_width=1024,
-        viewport_height=768
+        headless=True, browser_type="chromium", viewport_width=1024, viewport_height=768
    ) as crawler:
        result = await crawler.arun(
-            'https://example.com',
+            "https://example.com",
            screenshot=True,
            word_count_threshold=200,
            bypass_cache=True,
-            css_selector=".main-content"
+            css_selector=".main-content",
        )
        assert result.success, "Legacy parameter support failed"

+
 # Category 8: Mixed Configuration Tests
 async def test_mixed_config_usage():
    """Test mixing new config objects with legacy parameters"""
    browser_config = BrowserConfig(headless=True)
    crawler_config = CrawlerRunConfig(screenshot=True)
-    
+
    async with AsyncWebCrawler(
        config=browser_config,
-        verbose=True  # legacy parameter
+        verbose=True,  # legacy parameter
    ) as crawler:
        result = await crawler.arun(
-            'https://example.com',
+            "https://example.com",
            config=crawler_config,
            cache_mode=CacheMode.BYPASS,  # legacy parameter
-            css_selector="body"  # legacy parameter
+            css_selector="body",  # legacy parameter
        )
        assert result.success, "Mixed configuration usage failed"

+
 if __name__ == "__main__":
+
    async def run_tests():
        test_functions = [
            test_browser_config_object,
@@ -217,7 +197,7 @@ if __name__ == "__main__":
            # test_legacy_parameter_support,
            # test_mixed_config_usage
        ]
-        
+
        for test in test_functions:
            print(f"\nRunning {test.__name__}...")
            try:
@@ -227,5 +207,5 @@ if __name__ == "__main__":
                print(f"✗ {test.__name__} failed: {str(e)}")
            except Exception as e:
                print(f"✗ {test.__name__} error: {str(e)}")
-    
-    asyncio.run(run_tests())
+
+    asyncio.run(run_tests())