Add smoke test and comprehensive documentation
- Created standalone smoke test script for quick validation - Added detailed CHANGES_CDP_CONCURRENCY.md documentation - Documented all fixes, testing approach, and migration guide - Smoke test can run without pytest for easy verification Co-authored-by: Ahmed-Tawfik94 <106467151+Ahmed-Tawfik94@users.noreply.github.com>
This commit is contained in:
165
tests/browser/smoke_test_cdp.py
Executable file
165
tests/browser/smoke_test_cdp.py
Executable file
@@ -0,0 +1,165 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Simple smoke test for CDP concurrency fixes.
|
||||
This can be run without pytest to quickly validate the changes.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
import os
|
||||
|
||||
# Add the project root to Python path
|
||||
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))
|
||||
|
||||
from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode
|
||||
|
||||
|
||||
async def test_basic_cdp():
|
||||
"""Basic test that CDP browser works"""
|
||||
print("Test 1: Basic CDP browser test...")
|
||||
|
||||
browser_config = BrowserConfig(
|
||||
use_managed_browser=True,
|
||||
headless=True,
|
||||
verbose=False
|
||||
)
|
||||
|
||||
try:
|
||||
async with AsyncWebCrawler(config=browser_config) as crawler:
|
||||
result = await crawler.arun(
|
||||
url="https://example.com",
|
||||
config=CrawlerRunConfig(cache_mode=CacheMode.BYPASS)
|
||||
)
|
||||
assert result.success, f"Failed: {result.error_message}"
|
||||
assert len(result.html) > 0, "Empty HTML"
|
||||
print(" ✓ Basic CDP test passed")
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f" ✗ Basic CDP test failed: {e}")
|
||||
return False
|
||||
|
||||
|
||||
async def test_arun_many_cdp():
|
||||
"""Test arun_many with CDP browser - the key concurrency fix"""
|
||||
print("\nTest 2: arun_many with CDP browser...")
|
||||
|
||||
browser_config = BrowserConfig(
|
||||
use_managed_browser=True,
|
||||
headless=True,
|
||||
verbose=False
|
||||
)
|
||||
|
||||
urls = [
|
||||
"https://example.com",
|
||||
"https://httpbin.org/html",
|
||||
"https://www.example.org",
|
||||
]
|
||||
|
||||
try:
|
||||
async with AsyncWebCrawler(config=browser_config) as crawler:
|
||||
results = await crawler.arun_many(
|
||||
urls=urls,
|
||||
config=CrawlerRunConfig(cache_mode=CacheMode.BYPASS)
|
||||
)
|
||||
|
||||
assert len(results) == len(urls), f"Expected {len(urls)} results, got {len(results)}"
|
||||
|
||||
success_count = sum(1 for r in results if r.success)
|
||||
print(f" ✓ Crawled {success_count}/{len(urls)} URLs successfully")
|
||||
|
||||
if success_count >= len(urls) * 0.8: # Allow 20% failure for network issues
|
||||
print(" ✓ arun_many CDP test passed")
|
||||
return True
|
||||
else:
|
||||
print(f" ✗ Too many failures: {len(urls) - success_count}/{len(urls)}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
print(f" ✗ arun_many CDP test failed: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return False
|
||||
|
||||
|
||||
async def test_concurrent_arun_many():
|
||||
"""Test concurrent arun_many calls - stress test for page lock"""
|
||||
print("\nTest 3: Concurrent arun_many calls...")
|
||||
|
||||
browser_config = BrowserConfig(
|
||||
use_managed_browser=True,
|
||||
headless=True,
|
||||
verbose=False
|
||||
)
|
||||
|
||||
try:
|
||||
async with AsyncWebCrawler(config=browser_config) as crawler:
|
||||
# Run two arun_many calls concurrently
|
||||
task1 = crawler.arun_many(
|
||||
urls=["https://example.com", "https://httpbin.org/html"],
|
||||
config=CrawlerRunConfig(cache_mode=CacheMode.BYPASS)
|
||||
)
|
||||
|
||||
task2 = crawler.arun_many(
|
||||
urls=["https://www.example.org", "https://example.com"],
|
||||
config=CrawlerRunConfig(cache_mode=CacheMode.BYPASS)
|
||||
)
|
||||
|
||||
results1, results2 = await asyncio.gather(task1, task2, return_exceptions=True)
|
||||
|
||||
# Check for exceptions
|
||||
if isinstance(results1, Exception):
|
||||
print(f" ✗ Task 1 raised exception: {results1}")
|
||||
return False
|
||||
if isinstance(results2, Exception):
|
||||
print(f" ✗ Task 2 raised exception: {results2}")
|
||||
return False
|
||||
|
||||
total_success = sum(1 for r in results1 if r.success) + sum(1 for r in results2 if r.success)
|
||||
total_requests = len(results1) + len(results2)
|
||||
|
||||
print(f" ✓ {total_success}/{total_requests} concurrent requests succeeded")
|
||||
|
||||
if total_success >= total_requests * 0.7: # Allow 30% failure for concurrent stress
|
||||
print(" ✓ Concurrent arun_many test passed")
|
||||
return True
|
||||
else:
|
||||
print(f" ✗ Too many concurrent failures")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
print(f" ✗ Concurrent test failed: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return False
|
||||
|
||||
|
||||
async def main():
|
||||
"""Run all smoke tests"""
|
||||
print("=" * 60)
|
||||
print("CDP Concurrency Smoke Tests")
|
||||
print("=" * 60)
|
||||
|
||||
results = []
|
||||
|
||||
# Run tests sequentially
|
||||
results.append(await test_basic_cdp())
|
||||
results.append(await test_arun_many_cdp())
|
||||
results.append(await test_concurrent_arun_many())
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
passed = sum(results)
|
||||
total = len(results)
|
||||
|
||||
if passed == total:
|
||||
print(f"✓ All {total} smoke tests passed!")
|
||||
print("=" * 60)
|
||||
return 0
|
||||
else:
|
||||
print(f"✗ {total - passed}/{total} smoke tests failed")
|
||||
print("=" * 60)
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit_code = asyncio.run(main())
|
||||
sys.exit(exit_code)
|
||||
Reference in New Issue
Block a user