feat: add force viewport screenshot

This commit is contained in:
TheRedRad
2026-01-06 21:12:17 +01:00
parent c85f56b085
commit cee79a8129
2 changed files with 15 additions and 1 deletions

View File

@@ -1081,6 +1081,9 @@ class CrawlerRunConfig():
Default: None.
screenshot_height_threshold (int): Threshold for page height to decide screenshot strategy.
Default: SCREENSHOT_HEIGHT_TRESHOLD (from config, e.g. 20000).
force_viewport_screenshot (bool): If True, always take viewport-only screenshots regardless of page height.
When False, uses automatic decision (viewport for short pages, full-page for long pages).
Default: False.
pdf (bool): Whether to generate a PDF of the page.
Default: False.
image_description_min_word_threshold (int): Minimum words for image description extraction.
@@ -1220,6 +1223,7 @@ class CrawlerRunConfig():
screenshot: bool = False,
screenshot_wait_for: float = None,
screenshot_height_threshold: int = SCREENSHOT_HEIGHT_TRESHOLD,
force_viewport_screenshot: bool = False,
pdf: bool = False,
capture_mhtml: bool = False,
image_description_min_word_threshold: int = IMAGE_DESCRIPTION_MIN_WORD_THRESHOLD,
@@ -1336,6 +1340,7 @@ class CrawlerRunConfig():
self.screenshot = screenshot
self.screenshot_wait_for = screenshot_wait_for
self.screenshot_height_threshold = screenshot_height_threshold
self.force_viewport_screenshot = force_viewport_screenshot
self.pdf = pdf
self.capture_mhtml = capture_mhtml
self.image_description_min_word_threshold = image_description_min_word_threshold

View File

@@ -998,7 +998,9 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
if config.screenshot_wait_for:
await asyncio.sleep(config.screenshot_wait_for)
screenshot_data = await self.take_screenshot(
page, screenshot_height_threshold=config.screenshot_height_threshold
page,
screenshot_height_threshold=config.screenshot_height_threshold,
force_viewport_screenshot=config.force_viewport_screenshot
)
if screenshot_data or pdf_data or mhtml_data:
@@ -1536,6 +1538,13 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
Returns:
str: The base64-encoded screenshot data
"""
# Check if viewport-only screenshot is forced
force_viewport = kwargs.get('force_viewport_screenshot', False)
if force_viewport:
# Use viewport-only screenshot
return await self.take_screenshot_naive(page)
need_scroll = await self.page_need_scroll(page)
if not need_scroll: