Make fields in MediaItem and Link models optional with default values to prevent validation errors when data is incomplete. Also expose BaseDispatcher in __init__ and fix markdown field handling in database manager. BREAKING CHANGE: MediaItem and Link model fields are now optional with default values which may affect existing code expecting required fields.
149 lines
4.1 KiB
Python
149 lines
4.1 KiB
Python
import asyncio
|
|
import pytest
|
|
from typing import List
|
|
from crawl4ai import (
|
|
AsyncWebCrawler,
|
|
BrowserConfig,
|
|
CrawlerRunConfig,
|
|
MemoryAdaptiveDispatcher,
|
|
RateLimiter,
|
|
CacheMode
|
|
)
|
|
|
|
@pytest.mark.asyncio
|
|
@pytest.mark.parametrize("viewport", [
|
|
(800, 600),
|
|
(1024, 768),
|
|
(1920, 1080)
|
|
])
|
|
async def test_viewport_config(viewport):
|
|
"""Test different viewport configurations"""
|
|
width, height = viewport
|
|
browser_config = BrowserConfig(
|
|
browser_type="chromium",
|
|
headless=True,
|
|
viewport_width=width,
|
|
viewport_height=height
|
|
)
|
|
|
|
async with AsyncWebCrawler(config=browser_config) as crawler:
|
|
result = await crawler.arun(
|
|
url="https://example.com",
|
|
config=CrawlerRunConfig(
|
|
# cache_mode=CacheMode.BYPASS,
|
|
page_timeout=30000 # 30 seconds
|
|
)
|
|
)
|
|
assert result.success
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_memory_management():
|
|
"""Test memory-adaptive dispatching"""
|
|
browser_config = BrowserConfig(
|
|
browser_type="chromium",
|
|
headless=True,
|
|
viewport_width=1024,
|
|
viewport_height=768
|
|
)
|
|
|
|
dispatcher = MemoryAdaptiveDispatcher(
|
|
memory_threshold_percent=70.0,
|
|
check_interval=1.0,
|
|
max_session_permit=5
|
|
)
|
|
|
|
urls = ["https://example.com"] * 3 # Test with multiple identical URLs
|
|
|
|
async with AsyncWebCrawler(config=browser_config) as crawler:
|
|
results = await crawler.arun_many(
|
|
urls=urls,
|
|
config=CrawlerRunConfig(page_timeout=30000),
|
|
dispatcher=dispatcher
|
|
)
|
|
assert len(results) == len(urls)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_rate_limiting():
|
|
"""Test rate limiting functionality"""
|
|
browser_config = BrowserConfig(
|
|
browser_type="chromium",
|
|
headless=True
|
|
)
|
|
|
|
dispatcher = MemoryAdaptiveDispatcher(
|
|
rate_limiter=RateLimiter(
|
|
base_delay=(1.0, 2.0),
|
|
max_delay=5.0,
|
|
max_retries=2
|
|
),
|
|
memory_threshold_percent=70.0
|
|
)
|
|
|
|
urls = [
|
|
"https://example.com",
|
|
"https://example.org",
|
|
"https://example.net"
|
|
]
|
|
|
|
async with AsyncWebCrawler(config=browser_config) as crawler:
|
|
results = await crawler.arun_many(
|
|
urls=urls,
|
|
config=CrawlerRunConfig(page_timeout=30000),
|
|
dispatcher=dispatcher
|
|
)
|
|
assert len(results) == len(urls)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_javascript_execution():
|
|
"""Test JavaScript execution capabilities"""
|
|
browser_config = BrowserConfig(
|
|
browser_type="chromium",
|
|
headless=True,
|
|
java_script_enabled=True
|
|
)
|
|
|
|
js_code = """
|
|
document.body.style.backgroundColor = 'red';
|
|
return document.body.style.backgroundColor;
|
|
"""
|
|
|
|
async with AsyncWebCrawler(config=browser_config) as crawler:
|
|
result = await crawler.arun(
|
|
url="https://example.com",
|
|
config=CrawlerRunConfig(
|
|
js_code=js_code,
|
|
page_timeout=30000
|
|
)
|
|
)
|
|
assert result.success
|
|
|
|
@pytest.mark.asyncio
|
|
@pytest.mark.parametrize("error_url", [
|
|
"https://invalid.domain.test",
|
|
"https://httpbin.org/status/404",
|
|
"https://httpbin.org/status/503",
|
|
"https://httpbin.org/status/403"
|
|
])
|
|
async def test_error_handling(error_url):
|
|
"""Test error handling for various failure scenarios"""
|
|
browser_config = BrowserConfig(
|
|
browser_type="chromium",
|
|
headless=True
|
|
)
|
|
|
|
async with AsyncWebCrawler(config=browser_config) as crawler:
|
|
result = await crawler.arun(
|
|
url=error_url,
|
|
config=CrawlerRunConfig(
|
|
page_timeout=10000, # Short timeout for error cases
|
|
cache_mode=CacheMode.BYPASS
|
|
)
|
|
)
|
|
assert not result.success
|
|
assert result.error_message is not None
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(test_viewport_config((1024, 768)))
|
|
asyncio.run(test_memory_management())
|
|
asyncio.run(test_rate_limiting())
|
|
asyncio.run(test_javascript_execution()) |