Add tests, docs, and contributors for PRs #1463 and #1435

- Add tests for device_scale_factor (config + integration)
- Add tests for redirected_status_code (model + redirect + raw HTML)
- Document device_scale_factor in browser config docs and API reference
- Document redirected_status_code in crawler result docs and API reference
- Add TristanDonze and charlaie to CONTRIBUTORS.md
- Update PR-TODOLIST with session results
This commit is contained in:
unclecode
2026-02-06 09:30:19 +00:00
parent 37a49c5315
commit fbc52813a4
8 changed files with 164 additions and 8 deletions

View File

@@ -0,0 +1,72 @@
"""Tests for PR #1435: redirected_status_code in CrawlResult."""
import pytest
import pytest_asyncio
from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
from crawl4ai.models import CrawlResult, AsyncCrawlResponse
class TestRedirectedStatusCodeModel:
"""Test that the field exists and defaults correctly on both models."""
def test_crawl_result_default_none(self):
result = CrawlResult(url="http://example.com", html="", success=True)
assert result.redirected_status_code is None
def test_crawl_result_set_value(self):
result = CrawlResult(url="http://example.com", html="", success=True, redirected_status_code=200)
assert result.redirected_status_code == 200
def test_async_crawl_response_default_none(self):
resp = AsyncCrawlResponse(html="<html></html>", response_headers={}, status_code=200)
assert resp.redirected_status_code is None
def test_async_crawl_response_set_value(self):
resp = AsyncCrawlResponse(html="<html></html>", response_headers={}, status_code=200, redirected_status_code=301)
assert resp.redirected_status_code == 301
@pytest.mark.asyncio
async def test_redirected_status_code_on_direct_request():
"""A non-redirected request should have redirected_status_code equal to the final status."""
browser_config = BrowserConfig(headless=True)
run_config = CrawlerRunConfig()
async with AsyncWebCrawler(config=browser_config) as crawler:
result = await crawler.arun("https://httpbin.org/get", config=run_config)
assert result.success
# Direct request — redirected_status_code should be the final response status (200)
assert result.redirected_status_code == 200
@pytest.mark.asyncio
async def test_redirected_status_code_on_redirect():
"""A redirected request should capture the final destination's status code."""
browser_config = BrowserConfig(headless=True)
run_config = CrawlerRunConfig()
# httpbin /redirect/1 does a 302 redirect to /get (which returns 200)
async with AsyncWebCrawler(config=browser_config) as crawler:
result = await crawler.arun("https://httpbin.org/redirect/1", config=run_config)
assert result.success
# status_code should be 302 (the first hop, per crawl4ai's redirect chain walking)
assert result.status_code == 302
# redirected_status_code should be 200 (the final destination)
assert result.redirected_status_code == 200
# redirected_url should point to the final destination
assert "/get" in (result.redirected_url or "")
@pytest.mark.asyncio
async def test_redirected_status_code_on_raw_html():
"""Raw HTML input should have redirected_status_code = None (no network request)."""
browser_config = BrowserConfig(headless=True)
run_config = CrawlerRunConfig()
async with AsyncWebCrawler(config=browser_config) as crawler:
result = await crawler.arun("raw:<html><body>test</body></html>", config=run_config)
assert result.success
assert result.redirected_status_code is None

View File

@@ -0,0 +1,62 @@
"""Tests for PR #1463: configurable device_scale_factor in BrowserConfig."""
import pytest
import pytest_asyncio
import base64
from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
class TestDeviceScaleFactorConfig:
"""Test that device_scale_factor flows correctly through BrowserConfig."""
def test_default_value(self):
config = BrowserConfig()
assert config.device_scale_factor == 1.0
def test_custom_value(self):
config = BrowserConfig(device_scale_factor=2.0)
assert config.device_scale_factor == 2.0
def test_to_dict_includes_field(self):
config = BrowserConfig(device_scale_factor=3.0)
d = config.to_dict()
assert d["device_scale_factor"] == 3.0
def test_clone_preserves(self):
config = BrowserConfig(device_scale_factor=2.5)
cloned = config.clone()
assert cloned.device_scale_factor == 2.5
def test_from_kwargs(self):
config = BrowserConfig.from_kwargs({"device_scale_factor": 1.5})
assert config.device_scale_factor == 1.5
def test_from_kwargs_default(self):
config = BrowserConfig.from_kwargs({})
assert config.device_scale_factor == 1.0
@pytest.mark.asyncio
async def test_device_scale_factor_produces_larger_screenshot():
"""Integration test: higher device_scale_factor should produce a larger screenshot."""
html = "<html><body><h1>Scale Test</h1></body></html>"
raw_url = f"raw:{html}"
run_config = CrawlerRunConfig(screenshot=True)
# Take screenshot at scale 1.0
browser_1x = BrowserConfig(headless=True, device_scale_factor=1.0, viewport_width=800, viewport_height=600)
async with AsyncWebCrawler(config=browser_1x) as crawler:
result_1x = await crawler.arun(raw_url, config=run_config)
# Take screenshot at scale 2.0
browser_2x = BrowserConfig(headless=True, device_scale_factor=2.0, viewport_width=800, viewport_height=600)
async with AsyncWebCrawler(config=browser_2x) as crawler:
result_2x = await crawler.arun(raw_url, config=run_config)
assert result_1x.screenshot is not None
assert result_2x.screenshot is not None
# 2x scale should produce more pixel data (larger base64 string)
size_1x = len(base64.b64decode(result_1x.screenshot))
size_2x = len(base64.b64decode(result_2x.screenshot))
assert size_2x > size_1x, f"2x screenshot ({size_2x} bytes) should be larger than 1x ({size_1x} bytes)"