Apply Ruff Corrections
This commit is contained in:
@@ -4,7 +4,6 @@ import asyncio
|
||||
import shutil
|
||||
from typing import List
|
||||
import tempfile
|
||||
import time
|
||||
|
||||
# Add the parent directory to the Python path
|
||||
parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
@@ -12,28 +11,27 @@ sys.path.append(parent_dir)
|
||||
|
||||
from crawl4ai.async_webcrawler import AsyncWebCrawler
|
||||
|
||||
|
||||
class TestDownloads:
|
||||
def __init__(self):
|
||||
self.temp_dir = tempfile.mkdtemp(prefix="crawl4ai_test_")
|
||||
self.download_dir = os.path.join(self.temp_dir, "downloads")
|
||||
os.makedirs(self.download_dir, exist_ok=True)
|
||||
self.results: List[str] = []
|
||||
|
||||
|
||||
def cleanup(self):
|
||||
shutil.rmtree(self.temp_dir)
|
||||
|
||||
|
||||
def log_result(self, test_name: str, success: bool, message: str = ""):
|
||||
result = f"{'✅' if success else '❌'} {test_name}: {message}"
|
||||
self.results.append(result)
|
||||
print(result)
|
||||
|
||||
|
||||
async def test_basic_download(self):
|
||||
"""Test basic file download functionality"""
|
||||
try:
|
||||
async with AsyncWebCrawler(
|
||||
accept_downloads=True,
|
||||
downloads_path=self.download_dir,
|
||||
verbose=True
|
||||
accept_downloads=True, downloads_path=self.download_dir, verbose=True
|
||||
) as crawler:
|
||||
# Python.org downloads page typically has stable download links
|
||||
result = await crawler.arun(
|
||||
@@ -42,14 +40,19 @@ class TestDownloads:
|
||||
// Click first download link
|
||||
const downloadLink = document.querySelector('a[href$=".exe"]');
|
||||
if (downloadLink) downloadLink.click();
|
||||
"""
|
||||
""",
|
||||
)
|
||||
|
||||
success = (
|
||||
result.downloaded_files is not None
|
||||
and len(result.downloaded_files) > 0
|
||||
)
|
||||
|
||||
success = result.downloaded_files is not None and len(result.downloaded_files) > 0
|
||||
self.log_result(
|
||||
"Basic Download",
|
||||
success,
|
||||
f"Downloaded {len(result.downloaded_files or [])} files" if success else "No files downloaded"
|
||||
f"Downloaded {len(result.downloaded_files or [])} files"
|
||||
if success
|
||||
else "No files downloaded",
|
||||
)
|
||||
except Exception as e:
|
||||
self.log_result("Basic Download", False, str(e))
|
||||
@@ -59,27 +62,32 @@ class TestDownloads:
|
||||
try:
|
||||
user_data_dir = os.path.join(self.temp_dir, "user_data")
|
||||
os.makedirs(user_data_dir, exist_ok=True)
|
||||
|
||||
|
||||
async with AsyncWebCrawler(
|
||||
accept_downloads=True,
|
||||
downloads_path=self.download_dir,
|
||||
use_persistent_context=True,
|
||||
user_data_dir=user_data_dir,
|
||||
verbose=True
|
||||
verbose=True,
|
||||
) as crawler:
|
||||
result = await crawler.arun(
|
||||
url="https://www.python.org/downloads/",
|
||||
js_code="""
|
||||
const downloadLink = document.querySelector('a[href$=".exe"]');
|
||||
if (downloadLink) downloadLink.click();
|
||||
"""
|
||||
""",
|
||||
)
|
||||
|
||||
success = (
|
||||
result.downloaded_files is not None
|
||||
and len(result.downloaded_files) > 0
|
||||
)
|
||||
|
||||
success = result.downloaded_files is not None and len(result.downloaded_files) > 0
|
||||
self.log_result(
|
||||
"Persistent Context Download",
|
||||
success,
|
||||
f"Downloaded {len(result.downloaded_files or [])} files" if success else "No files downloaded"
|
||||
f"Downloaded {len(result.downloaded_files or [])} files"
|
||||
if success
|
||||
else "No files downloaded",
|
||||
)
|
||||
except Exception as e:
|
||||
self.log_result("Persistent Context Download", False, str(e))
|
||||
@@ -88,9 +96,7 @@ class TestDownloads:
|
||||
"""Test multiple simultaneous downloads"""
|
||||
try:
|
||||
async with AsyncWebCrawler(
|
||||
accept_downloads=True,
|
||||
downloads_path=self.download_dir,
|
||||
verbose=True
|
||||
accept_downloads=True, downloads_path=self.download_dir, verbose=True
|
||||
) as crawler:
|
||||
result = await crawler.arun(
|
||||
url="https://www.python.org/downloads/",
|
||||
@@ -98,14 +104,19 @@ class TestDownloads:
|
||||
// Click multiple download links
|
||||
const downloadLinks = document.querySelectorAll('a[href$=".exe"]');
|
||||
downloadLinks.forEach(link => link.click());
|
||||
"""
|
||||
""",
|
||||
)
|
||||
|
||||
success = (
|
||||
result.downloaded_files is not None
|
||||
and len(result.downloaded_files) > 1
|
||||
)
|
||||
|
||||
success = result.downloaded_files is not None and len(result.downloaded_files) > 1
|
||||
self.log_result(
|
||||
"Multiple Downloads",
|
||||
success,
|
||||
f"Downloaded {len(result.downloaded_files or [])} files" if success else "Not enough files downloaded"
|
||||
f"Downloaded {len(result.downloaded_files or [])} files"
|
||||
if success
|
||||
else "Not enough files downloaded",
|
||||
)
|
||||
except Exception as e:
|
||||
self.log_result("Multiple Downloads", False, str(e))
|
||||
@@ -113,49 +124,51 @@ class TestDownloads:
|
||||
async def test_different_browsers(self):
|
||||
"""Test downloads across different browser types"""
|
||||
browsers = ["chromium", "firefox", "webkit"]
|
||||
|
||||
|
||||
for browser_type in browsers:
|
||||
try:
|
||||
async with AsyncWebCrawler(
|
||||
accept_downloads=True,
|
||||
downloads_path=self.download_dir,
|
||||
browser_type=browser_type,
|
||||
verbose=True
|
||||
verbose=True,
|
||||
) as crawler:
|
||||
result = await crawler.arun(
|
||||
url="https://www.python.org/downloads/",
|
||||
js_code="""
|
||||
const downloadLink = document.querySelector('a[href$=".exe"]');
|
||||
if (downloadLink) downloadLink.click();
|
||||
"""
|
||||
""",
|
||||
)
|
||||
|
||||
success = (
|
||||
result.downloaded_files is not None
|
||||
and len(result.downloaded_files) > 0
|
||||
)
|
||||
|
||||
success = result.downloaded_files is not None and len(result.downloaded_files) > 0
|
||||
self.log_result(
|
||||
f"{browser_type.title()} Download",
|
||||
success,
|
||||
f"Downloaded {len(result.downloaded_files or [])} files" if success else "No files downloaded"
|
||||
f"Downloaded {len(result.downloaded_files or [])} files"
|
||||
if success
|
||||
else "No files downloaded",
|
||||
)
|
||||
except Exception as e:
|
||||
self.log_result(f"{browser_type.title()} Download", False, str(e))
|
||||
|
||||
async def test_edge_cases(self):
|
||||
"""Test various edge cases"""
|
||||
|
||||
|
||||
# Test 1: Downloads without specifying download path
|
||||
try:
|
||||
async with AsyncWebCrawler(
|
||||
accept_downloads=True,
|
||||
verbose=True
|
||||
) as crawler:
|
||||
async with AsyncWebCrawler(accept_downloads=True, verbose=True) as crawler:
|
||||
result = await crawler.arun(
|
||||
url="https://www.python.org/downloads/",
|
||||
js_code="document.querySelector('a[href$=\".exe\"]').click()"
|
||||
js_code="document.querySelector('a[href$=\".exe\"]').click()",
|
||||
)
|
||||
self.log_result(
|
||||
"Default Download Path",
|
||||
True,
|
||||
f"Downloaded to default path: {result.downloaded_files[0] if result.downloaded_files else 'None'}"
|
||||
f"Downloaded to default path: {result.downloaded_files[0] if result.downloaded_files else 'None'}",
|
||||
)
|
||||
except Exception as e:
|
||||
self.log_result("Default Download Path", False, str(e))
|
||||
@@ -165,31 +178,34 @@ class TestDownloads:
|
||||
async with AsyncWebCrawler(
|
||||
accept_downloads=True,
|
||||
downloads_path="/invalid/path/that/doesnt/exist",
|
||||
verbose=True
|
||||
verbose=True,
|
||||
) as crawler:
|
||||
result = await crawler.arun(
|
||||
url="https://www.python.org/downloads/",
|
||||
js_code="document.querySelector('a[href$=\".exe\"]').click()"
|
||||
js_code="document.querySelector('a[href$=\".exe\"]').click()",
|
||||
)
|
||||
self.log_result("Invalid Download Path", False, "Should have raised an error")
|
||||
except Exception as e:
|
||||
self.log_result("Invalid Download Path", True, "Correctly handled invalid path")
|
||||
self.log_result(
|
||||
"Invalid Download Path", False, "Should have raised an error"
|
||||
)
|
||||
except Exception:
|
||||
self.log_result(
|
||||
"Invalid Download Path", True, "Correctly handled invalid path"
|
||||
)
|
||||
|
||||
# Test 3: Download with accept_downloads=False
|
||||
try:
|
||||
async with AsyncWebCrawler(
|
||||
accept_downloads=False,
|
||||
verbose=True
|
||||
) as crawler:
|
||||
async with AsyncWebCrawler(accept_downloads=False, verbose=True) as crawler:
|
||||
result = await crawler.arun(
|
||||
url="https://www.python.org/downloads/",
|
||||
js_code="document.querySelector('a[href$=\".exe\"]').click()"
|
||||
js_code="document.querySelector('a[href$=\".exe\"]').click()",
|
||||
)
|
||||
success = result.downloaded_files is None
|
||||
self.log_result(
|
||||
"Disabled Downloads",
|
||||
success,
|
||||
"Correctly ignored downloads" if success else "Unexpectedly downloaded files"
|
||||
"Correctly ignored downloads"
|
||||
if success
|
||||
else "Unexpectedly downloaded files",
|
||||
)
|
||||
except Exception as e:
|
||||
self.log_result("Disabled Downloads", False, str(e))
|
||||
@@ -197,33 +213,35 @@ class TestDownloads:
|
||||
async def run_all_tests(self):
|
||||
"""Run all test cases"""
|
||||
print("\n🧪 Running Download Tests...\n")
|
||||
|
||||
|
||||
test_methods = [
|
||||
self.test_basic_download,
|
||||
self.test_persistent_context_download,
|
||||
self.test_multiple_downloads,
|
||||
self.test_different_browsers,
|
||||
self.test_edge_cases
|
||||
self.test_edge_cases,
|
||||
]
|
||||
|
||||
|
||||
for test in test_methods:
|
||||
print(f"\n📝 Running {test.__doc__}...")
|
||||
await test()
|
||||
await asyncio.sleep(2) # Brief pause between tests
|
||||
|
||||
|
||||
print("\n📊 Test Results Summary:")
|
||||
for result in self.results:
|
||||
print(result)
|
||||
|
||||
successes = len([r for r in self.results if '✅' in r])
|
||||
|
||||
successes = len([r for r in self.results if "✅" in r])
|
||||
total = len(self.results)
|
||||
print(f"\nTotal: {successes}/{total} tests passed")
|
||||
|
||||
|
||||
self.cleanup()
|
||||
|
||||
|
||||
async def main():
|
||||
tester = TestDownloads()
|
||||
await tester.run_all_tests()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
asyncio.run(main())
|
||||
|
||||
Reference in New Issue
Block a user