Files
crawl4ai/tests/browser/smoke_test_cdp.py
copilot-swe-agent[bot] c1c5dfc49b Add smoke test and comprehensive documentation
- Created standalone smoke test script for quick validation
- Added detailed CHANGES_CDP_CONCURRENCY.md documentation
- Documented all fixes, testing approach, and migration guide
- Smoke test can run without pytest for easy verification

Co-authored-by: Ahmed-Tawfik94 <106467151+Ahmed-Tawfik94@users.noreply.github.com>
2025-11-06 08:20:39 +00:00

166 lines
5.3 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Simple smoke test for CDP concurrency fixes.
This can be run without pytest to quickly validate the changes.
"""
import asyncio
import sys
import os
# Add the project root to Python path
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))
from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode
async def test_basic_cdp():
"""Basic test that CDP browser works"""
print("Test 1: Basic CDP browser test...")
browser_config = BrowserConfig(
use_managed_browser=True,
headless=True,
verbose=False
)
try:
async with AsyncWebCrawler(config=browser_config) as crawler:
result = await crawler.arun(
url="https://example.com",
config=CrawlerRunConfig(cache_mode=CacheMode.BYPASS)
)
assert result.success, f"Failed: {result.error_message}"
assert len(result.html) > 0, "Empty HTML"
print(" ✓ Basic CDP test passed")
return True
except Exception as e:
print(f" ✗ Basic CDP test failed: {e}")
return False
async def test_arun_many_cdp():
"""Test arun_many with CDP browser - the key concurrency fix"""
print("\nTest 2: arun_many with CDP browser...")
browser_config = BrowserConfig(
use_managed_browser=True,
headless=True,
verbose=False
)
urls = [
"https://example.com",
"https://httpbin.org/html",
"https://www.example.org",
]
try:
async with AsyncWebCrawler(config=browser_config) as crawler:
results = await crawler.arun_many(
urls=urls,
config=CrawlerRunConfig(cache_mode=CacheMode.BYPASS)
)
assert len(results) == len(urls), f"Expected {len(urls)} results, got {len(results)}"
success_count = sum(1 for r in results if r.success)
print(f" ✓ Crawled {success_count}/{len(urls)} URLs successfully")
if success_count >= len(urls) * 0.8: # Allow 20% failure for network issues
print(" ✓ arun_many CDP test passed")
return True
else:
print(f" ✗ Too many failures: {len(urls) - success_count}/{len(urls)}")
return False
except Exception as e:
print(f" ✗ arun_many CDP test failed: {e}")
import traceback
traceback.print_exc()
return False
async def test_concurrent_arun_many():
"""Test concurrent arun_many calls - stress test for page lock"""
print("\nTest 3: Concurrent arun_many calls...")
browser_config = BrowserConfig(
use_managed_browser=True,
headless=True,
verbose=False
)
try:
async with AsyncWebCrawler(config=browser_config) as crawler:
# Run two arun_many calls concurrently
task1 = crawler.arun_many(
urls=["https://example.com", "https://httpbin.org/html"],
config=CrawlerRunConfig(cache_mode=CacheMode.BYPASS)
)
task2 = crawler.arun_many(
urls=["https://www.example.org", "https://example.com"],
config=CrawlerRunConfig(cache_mode=CacheMode.BYPASS)
)
results1, results2 = await asyncio.gather(task1, task2, return_exceptions=True)
# Check for exceptions
if isinstance(results1, Exception):
print(f" ✗ Task 1 raised exception: {results1}")
return False
if isinstance(results2, Exception):
print(f" ✗ Task 2 raised exception: {results2}")
return False
total_success = sum(1 for r in results1 if r.success) + sum(1 for r in results2 if r.success)
total_requests = len(results1) + len(results2)
print(f"{total_success}/{total_requests} concurrent requests succeeded")
if total_success >= total_requests * 0.7: # Allow 30% failure for concurrent stress
print(" ✓ Concurrent arun_many test passed")
return True
else:
print(f" ✗ Too many concurrent failures")
return False
except Exception as e:
print(f" ✗ Concurrent test failed: {e}")
import traceback
traceback.print_exc()
return False
async def main():
"""Run all smoke tests"""
print("=" * 60)
print("CDP Concurrency Smoke Tests")
print("=" * 60)
results = []
# Run tests sequentially
results.append(await test_basic_cdp())
results.append(await test_arun_many_cdp())
results.append(await test_concurrent_arun_many())
print("\n" + "=" * 60)
passed = sum(results)
total = len(results)
if passed == total:
print(f"✓ All {total} smoke tests passed!")
print("=" * 60)
return 0
else:
print(f"{total - passed}/{total} smoke tests failed")
print("=" * 60)
return 1
if __name__ == "__main__":
exit_code = asyncio.run(main())
sys.exit(exit_code)