Apply Ruff Corrections

2025-01-13 19:19:58 +08:00
parent c3370ec5da
commit 8ec12d7d68
84 changed files with 6861 additions and 5076 deletions
--- a/tests/async/test_async_doanloader.py
+++ b/tests/async/test_async_doanloader.py
@@ -4,7 +4,6 @@ import asyncio
 import shutil
 from typing import List
 import tempfile
-import time

 # Add the parent directory to the Python path
 parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
@@ -12,28 +11,27 @@ sys.path.append(parent_dir)

 from crawl4ai.async_webcrawler import AsyncWebCrawler

+
 class TestDownloads:
    def __init__(self):
        self.temp_dir = tempfile.mkdtemp(prefix="crawl4ai_test_")
        self.download_dir = os.path.join(self.temp_dir, "downloads")
        os.makedirs(self.download_dir, exist_ok=True)
        self.results: List[str] = []
-        
+
    def cleanup(self):
        shutil.rmtree(self.temp_dir)
-        
+
    def log_result(self, test_name: str, success: bool, message: str = ""):
        result = f"{'✅' if success else '❌'} {test_name}: {message}"
        self.results.append(result)
        print(result)
-        
+
    async def test_basic_download(self):
        """Test basic file download functionality"""
        try:
            async with AsyncWebCrawler(
-                accept_downloads=True,
-                downloads_path=self.download_dir,
-                verbose=True
+                accept_downloads=True, downloads_path=self.download_dir, verbose=True
            ) as crawler:
                # Python.org downloads page typically has stable download links
                result = await crawler.arun(
@@ -42,14 +40,19 @@ class TestDownloads:
                    // Click first download link
                    const downloadLink = document.querySelector('a[href$=".exe"]');
                    if (downloadLink) downloadLink.click();
-                    """
+                    """,
+                )
+
+                success = (
+                    result.downloaded_files is not None
+                    and len(result.downloaded_files) > 0
                )
-                
-                success = result.downloaded_files is not None and len(result.downloaded_files) > 0
                self.log_result(
                    "Basic Download",
                    success,
-                    f"Downloaded {len(result.downloaded_files or [])} files" if success else "No files downloaded"
+                    f"Downloaded {len(result.downloaded_files or [])} files"
+                    if success
+                    else "No files downloaded",
                )
        except Exception as e:
            self.log_result("Basic Download", False, str(e))
@@ -59,27 +62,32 @@ class TestDownloads:
        try:
            user_data_dir = os.path.join(self.temp_dir, "user_data")
            os.makedirs(user_data_dir, exist_ok=True)
-            
+
            async with AsyncWebCrawler(
                accept_downloads=True,
                downloads_path=self.download_dir,
                use_persistent_context=True,
                user_data_dir=user_data_dir,
-                verbose=True
+                verbose=True,
            ) as crawler:
                result = await crawler.arun(
                    url="https://www.python.org/downloads/",
                    js_code="""
                    const downloadLink = document.querySelector('a[href$=".exe"]');
                    if (downloadLink) downloadLink.click();
-                    """
+                    """,
+                )
+
+                success = (
+                    result.downloaded_files is not None
+                    and len(result.downloaded_files) > 0
                )
-                
-                success = result.downloaded_files is not None and len(result.downloaded_files) > 0
                self.log_result(
                    "Persistent Context Download",
                    success,
-                    f"Downloaded {len(result.downloaded_files or [])} files" if success else "No files downloaded"
+                    f"Downloaded {len(result.downloaded_files or [])} files"
+                    if success
+                    else "No files downloaded",
                )
        except Exception as e:
            self.log_result("Persistent Context Download", False, str(e))
@@ -88,9 +96,7 @@ class TestDownloads:
        """Test multiple simultaneous downloads"""
        try:
            async with AsyncWebCrawler(
-                accept_downloads=True,
-                downloads_path=self.download_dir,
-                verbose=True
+                accept_downloads=True, downloads_path=self.download_dir, verbose=True
            ) as crawler:
                result = await crawler.arun(
                    url="https://www.python.org/downloads/",
@@ -98,14 +104,19 @@ class TestDownloads:
                    // Click multiple download links
                    const downloadLinks = document.querySelectorAll('a[href$=".exe"]');
                    downloadLinks.forEach(link => link.click());
-                    """
+                    """,
+                )
+
+                success = (
+                    result.downloaded_files is not None
+                    and len(result.downloaded_files) > 1
                )
-                
-                success = result.downloaded_files is not None and len(result.downloaded_files) > 1
                self.log_result(
                    "Multiple Downloads",
                    success,
-                    f"Downloaded {len(result.downloaded_files or [])} files" if success else "Not enough files downloaded"
+                    f"Downloaded {len(result.downloaded_files or [])} files"
+                    if success
+                    else "Not enough files downloaded",
                )
        except Exception as e:
            self.log_result("Multiple Downloads", False, str(e))
@@ -113,49 +124,51 @@ class TestDownloads:
    async def test_different_browsers(self):
        """Test downloads across different browser types"""
        browsers = ["chromium", "firefox", "webkit"]
-        
+
        for browser_type in browsers:
            try:
                async with AsyncWebCrawler(
                    accept_downloads=True,
                    downloads_path=self.download_dir,
                    browser_type=browser_type,
-                    verbose=True
+                    verbose=True,
                ) as crawler:
                    result = await crawler.arun(
                        url="https://www.python.org/downloads/",
                        js_code="""
                        const downloadLink = document.querySelector('a[href$=".exe"]');
                        if (downloadLink) downloadLink.click();
-                        """
+                        """,
+                    )
+
+                    success = (
+                        result.downloaded_files is not None
+                        and len(result.downloaded_files) > 0
                    )
-                    
-                    success = result.downloaded_files is not None and len(result.downloaded_files) > 0
                    self.log_result(
                        f"{browser_type.title()} Download",
                        success,
-                        f"Downloaded {len(result.downloaded_files or [])} files" if success else "No files downloaded"
+                        f"Downloaded {len(result.downloaded_files or [])} files"
+                        if success
+                        else "No files downloaded",
                    )
            except Exception as e:
                self.log_result(f"{browser_type.title()} Download", False, str(e))

    async def test_edge_cases(self):
        """Test various edge cases"""
-        
+
        # Test 1: Downloads without specifying download path
        try:
-            async with AsyncWebCrawler(
-                accept_downloads=True,
-                verbose=True
-            ) as crawler:
+            async with AsyncWebCrawler(accept_downloads=True, verbose=True) as crawler:
                result = await crawler.arun(
                    url="https://www.python.org/downloads/",
-                    js_code="document.querySelector('a[href$=\".exe\"]').click()"
+                    js_code="document.querySelector('a[href$=\".exe\"]').click()",
                )
                self.log_result(
                    "Default Download Path",
                    True,
-                    f"Downloaded to default path: {result.downloaded_files[0] if result.downloaded_files else 'None'}"
+                    f"Downloaded to default path: {result.downloaded_files[0] if result.downloaded_files else 'None'}",
                )
        except Exception as e:
            self.log_result("Default Download Path", False, str(e))
@@ -165,31 +178,34 @@ class TestDownloads:
            async with AsyncWebCrawler(
                accept_downloads=True,
                downloads_path="/invalid/path/that/doesnt/exist",
-                verbose=True
+                verbose=True,
            ) as crawler:
                result = await crawler.arun(
                    url="https://www.python.org/downloads/",
-                    js_code="document.querySelector('a[href$=\".exe\"]').click()"
+                    js_code="document.querySelector('a[href$=\".exe\"]').click()",
                )
-                self.log_result("Invalid Download Path", False, "Should have raised an error")
-        except Exception as e:
-            self.log_result("Invalid Download Path", True, "Correctly handled invalid path")
+                self.log_result(
+                    "Invalid Download Path", False, "Should have raised an error"
+                )
+        except Exception:
+            self.log_result(
+                "Invalid Download Path", True, "Correctly handled invalid path"
+            )

        # Test 3: Download with accept_downloads=False
        try:
-            async with AsyncWebCrawler(
-                accept_downloads=False,
-                verbose=True
-            ) as crawler:
+            async with AsyncWebCrawler(accept_downloads=False, verbose=True) as crawler:
                result = await crawler.arun(
                    url="https://www.python.org/downloads/",
-                    js_code="document.querySelector('a[href$=\".exe\"]').click()"
+                    js_code="document.querySelector('a[href$=\".exe\"]').click()",
                )
                success = result.downloaded_files is None
                self.log_result(
                    "Disabled Downloads",
                    success,
-                    "Correctly ignored downloads" if success else "Unexpectedly downloaded files"
+                    "Correctly ignored downloads"
+                    if success
+                    else "Unexpectedly downloaded files",
                )
        except Exception as e:
            self.log_result("Disabled Downloads", False, str(e))
@@ -197,33 +213,35 @@ class TestDownloads:
    async def run_all_tests(self):
        """Run all test cases"""
        print("\n🧪 Running Download Tests...\n")
-        
+
        test_methods = [
            self.test_basic_download,
            self.test_persistent_context_download,
            self.test_multiple_downloads,
            self.test_different_browsers,
-            self.test_edge_cases
+            self.test_edge_cases,
        ]
-        
+
        for test in test_methods:
            print(f"\n📝 Running {test.__doc__}...")
            await test()
            await asyncio.sleep(2)  # Brief pause between tests
-            
+
        print("\n📊 Test Results Summary:")
        for result in self.results:
            print(result)
-            
-        successes = len([r for r in self.results if '✅' in r])
+
+        successes = len([r for r in self.results if "✅" in r])
        total = len(self.results)
        print(f"\nTotal: {successes}/{total} tests passed")
-        
+
        self.cleanup()

+
 async def main():
    tester = TestDownloads()
    await tester.run_all_tests()

+
 if __name__ == "__main__":
-    asyncio.run(main())
+    asyncio.run(main())