- Add tests for device_scale_factor (config + integration) - Add tests for redirected_status_code (model + redirect + raw HTML) - Document device_scale_factor in browser config docs and API reference - Document redirected_status_code in crawler result docs and API reference - Add TristanDonze and charlaie to CONTRIBUTORS.md - Update PR-TODOLIST with session results
This commit is contained in:
72
tests/test_pr_1435_redirected_status_code.py
Normal file
72
tests/test_pr_1435_redirected_status_code.py
Normal file
@@ -0,0 +1,72 @@
|
||||
"""Tests for PR #1435: redirected_status_code in CrawlResult."""
|
||||
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
|
||||
from crawl4ai.models import CrawlResult, AsyncCrawlResponse
|
||||
|
||||
|
||||
class TestRedirectedStatusCodeModel:
|
||||
"""Test that the field exists and defaults correctly on both models."""
|
||||
|
||||
def test_crawl_result_default_none(self):
|
||||
result = CrawlResult(url="http://example.com", html="", success=True)
|
||||
assert result.redirected_status_code is None
|
||||
|
||||
def test_crawl_result_set_value(self):
|
||||
result = CrawlResult(url="http://example.com", html="", success=True, redirected_status_code=200)
|
||||
assert result.redirected_status_code == 200
|
||||
|
||||
def test_async_crawl_response_default_none(self):
|
||||
resp = AsyncCrawlResponse(html="<html></html>", response_headers={}, status_code=200)
|
||||
assert resp.redirected_status_code is None
|
||||
|
||||
def test_async_crawl_response_set_value(self):
|
||||
resp = AsyncCrawlResponse(html="<html></html>", response_headers={}, status_code=200, redirected_status_code=301)
|
||||
assert resp.redirected_status_code == 301
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_redirected_status_code_on_direct_request():
|
||||
"""A non-redirected request should have redirected_status_code equal to the final status."""
|
||||
browser_config = BrowserConfig(headless=True)
|
||||
run_config = CrawlerRunConfig()
|
||||
|
||||
async with AsyncWebCrawler(config=browser_config) as crawler:
|
||||
result = await crawler.arun("https://httpbin.org/get", config=run_config)
|
||||
|
||||
assert result.success
|
||||
# Direct request — redirected_status_code should be the final response status (200)
|
||||
assert result.redirected_status_code == 200
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_redirected_status_code_on_redirect():
|
||||
"""A redirected request should capture the final destination's status code."""
|
||||
browser_config = BrowserConfig(headless=True)
|
||||
run_config = CrawlerRunConfig()
|
||||
|
||||
# httpbin /redirect/1 does a 302 redirect to /get (which returns 200)
|
||||
async with AsyncWebCrawler(config=browser_config) as crawler:
|
||||
result = await crawler.arun("https://httpbin.org/redirect/1", config=run_config)
|
||||
|
||||
assert result.success
|
||||
# status_code should be 302 (the first hop, per crawl4ai's redirect chain walking)
|
||||
assert result.status_code == 302
|
||||
# redirected_status_code should be 200 (the final destination)
|
||||
assert result.redirected_status_code == 200
|
||||
# redirected_url should point to the final destination
|
||||
assert "/get" in (result.redirected_url or "")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_redirected_status_code_on_raw_html():
|
||||
"""Raw HTML input should have redirected_status_code = None (no network request)."""
|
||||
browser_config = BrowserConfig(headless=True)
|
||||
run_config = CrawlerRunConfig()
|
||||
|
||||
async with AsyncWebCrawler(config=browser_config) as crawler:
|
||||
result = await crawler.arun("raw:<html><body>test</body></html>", config=run_config)
|
||||
|
||||
assert result.success
|
||||
assert result.redirected_status_code is None
|
||||
62
tests/test_pr_1463_device_scale_factor.py
Normal file
62
tests/test_pr_1463_device_scale_factor.py
Normal file
@@ -0,0 +1,62 @@
|
||||
"""Tests for PR #1463: configurable device_scale_factor in BrowserConfig."""
|
||||
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
import base64
|
||||
from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
|
||||
|
||||
|
||||
class TestDeviceScaleFactorConfig:
|
||||
"""Test that device_scale_factor flows correctly through BrowserConfig."""
|
||||
|
||||
def test_default_value(self):
|
||||
config = BrowserConfig()
|
||||
assert config.device_scale_factor == 1.0
|
||||
|
||||
def test_custom_value(self):
|
||||
config = BrowserConfig(device_scale_factor=2.0)
|
||||
assert config.device_scale_factor == 2.0
|
||||
|
||||
def test_to_dict_includes_field(self):
|
||||
config = BrowserConfig(device_scale_factor=3.0)
|
||||
d = config.to_dict()
|
||||
assert d["device_scale_factor"] == 3.0
|
||||
|
||||
def test_clone_preserves(self):
|
||||
config = BrowserConfig(device_scale_factor=2.5)
|
||||
cloned = config.clone()
|
||||
assert cloned.device_scale_factor == 2.5
|
||||
|
||||
def test_from_kwargs(self):
|
||||
config = BrowserConfig.from_kwargs({"device_scale_factor": 1.5})
|
||||
assert config.device_scale_factor == 1.5
|
||||
|
||||
def test_from_kwargs_default(self):
|
||||
config = BrowserConfig.from_kwargs({})
|
||||
assert config.device_scale_factor == 1.0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_device_scale_factor_produces_larger_screenshot():
|
||||
"""Integration test: higher device_scale_factor should produce a larger screenshot."""
|
||||
html = "<html><body><h1>Scale Test</h1></body></html>"
|
||||
raw_url = f"raw:{html}"
|
||||
run_config = CrawlerRunConfig(screenshot=True)
|
||||
|
||||
# Take screenshot at scale 1.0
|
||||
browser_1x = BrowserConfig(headless=True, device_scale_factor=1.0, viewport_width=800, viewport_height=600)
|
||||
async with AsyncWebCrawler(config=browser_1x) as crawler:
|
||||
result_1x = await crawler.arun(raw_url, config=run_config)
|
||||
|
||||
# Take screenshot at scale 2.0
|
||||
browser_2x = BrowserConfig(headless=True, device_scale_factor=2.0, viewport_width=800, viewport_height=600)
|
||||
async with AsyncWebCrawler(config=browser_2x) as crawler:
|
||||
result_2x = await crawler.arun(raw_url, config=run_config)
|
||||
|
||||
assert result_1x.screenshot is not None
|
||||
assert result_2x.screenshot is not None
|
||||
|
||||
# 2x scale should produce more pixel data (larger base64 string)
|
||||
size_1x = len(base64.b64decode(result_1x.screenshot))
|
||||
size_2x = len(base64.b64decode(result_2x.screenshot))
|
||||
assert size_2x > size_1x, f"2x screenshot ({size_2x} bytes) should be larger than 1x ({size_1x} bytes)"
|
||||
Reference in New Issue
Block a user