Merge PR #1694: feat: add force viewport screenshot

This commit is contained in:
unclecode
2026-02-01 01:05:52 +00:00
2 changed files with 15 additions and 1 deletions

View File

@@ -1271,6 +1271,9 @@ class CrawlerRunConfig():
Default: None. Default: None.
screenshot_height_threshold (int): Threshold for page height to decide screenshot strategy. screenshot_height_threshold (int): Threshold for page height to decide screenshot strategy.
Default: SCREENSHOT_HEIGHT_TRESHOLD (from config, e.g. 20000). Default: SCREENSHOT_HEIGHT_TRESHOLD (from config, e.g. 20000).
force_viewport_screenshot (bool): If True, always take viewport-only screenshots regardless of page height.
When False, uses automatic decision (viewport for short pages, full-page for long pages).
Default: False.
pdf (bool): Whether to generate a PDF of the page. pdf (bool): Whether to generate a PDF of the page.
Default: False. Default: False.
image_description_min_word_threshold (int): Minimum words for image description extraction. image_description_min_word_threshold (int): Minimum words for image description extraction.
@@ -1423,6 +1426,7 @@ class CrawlerRunConfig():
screenshot: bool = False, screenshot: bool = False,
screenshot_wait_for: float = None, screenshot_wait_for: float = None,
screenshot_height_threshold: int = SCREENSHOT_HEIGHT_TRESHOLD, screenshot_height_threshold: int = SCREENSHOT_HEIGHT_TRESHOLD,
force_viewport_screenshot: bool = False,
pdf: bool = False, pdf: bool = False,
capture_mhtml: bool = False, capture_mhtml: bool = False,
image_description_min_word_threshold: int = IMAGE_DESCRIPTION_MIN_WORD_THRESHOLD, image_description_min_word_threshold: int = IMAGE_DESCRIPTION_MIN_WORD_THRESHOLD,
@@ -1551,6 +1555,7 @@ class CrawlerRunConfig():
self.screenshot = screenshot self.screenshot = screenshot
self.screenshot_wait_for = screenshot_wait_for self.screenshot_wait_for = screenshot_wait_for
self.screenshot_height_threshold = screenshot_height_threshold self.screenshot_height_threshold = screenshot_height_threshold
self.force_viewport_screenshot = force_viewport_screenshot
self.pdf = pdf self.pdf = pdf
self.capture_mhtml = capture_mhtml self.capture_mhtml = capture_mhtml
self.image_description_min_word_threshold = image_description_min_word_threshold self.image_description_min_word_threshold = image_description_min_word_threshold

View File

@@ -1019,7 +1019,9 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
if config.screenshot_wait_for: if config.screenshot_wait_for:
await asyncio.sleep(config.screenshot_wait_for) await asyncio.sleep(config.screenshot_wait_for)
screenshot_data = await self.take_screenshot( screenshot_data = await self.take_screenshot(
page, screenshot_height_threshold=config.screenshot_height_threshold page,
screenshot_height_threshold=config.screenshot_height_threshold,
force_viewport_screenshot=config.force_viewport_screenshot
) )
if screenshot_data or pdf_data or mhtml_data: if screenshot_data or pdf_data or mhtml_data:
@@ -1637,6 +1639,13 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
Returns: Returns:
str: The base64-encoded screenshot data str: The base64-encoded screenshot data
""" """
# Check if viewport-only screenshot is forced
force_viewport = kwargs.get('force_viewport_screenshot', False)
if force_viewport:
# Use viewport-only screenshot
return await self.take_screenshot_naive(page)
need_scroll = await self.page_need_scroll(page) need_scroll = await self.page_need_scroll(page)
if not need_scroll: if not need_scroll: