Merge PR #1694: feat: add force viewport screenshot
This commit is contained in:
@@ -1271,6 +1271,9 @@ class CrawlerRunConfig():
|
||||
Default: None.
|
||||
screenshot_height_threshold (int): Threshold for page height to decide screenshot strategy.
|
||||
Default: SCREENSHOT_HEIGHT_TRESHOLD (from config, e.g. 20000).
|
||||
force_viewport_screenshot (bool): If True, always take viewport-only screenshots regardless of page height.
|
||||
When False, uses automatic decision (viewport for short pages, full-page for long pages).
|
||||
Default: False.
|
||||
pdf (bool): Whether to generate a PDF of the page.
|
||||
Default: False.
|
||||
image_description_min_word_threshold (int): Minimum words for image description extraction.
|
||||
@@ -1423,6 +1426,7 @@ class CrawlerRunConfig():
|
||||
screenshot: bool = False,
|
||||
screenshot_wait_for: float = None,
|
||||
screenshot_height_threshold: int = SCREENSHOT_HEIGHT_TRESHOLD,
|
||||
force_viewport_screenshot: bool = False,
|
||||
pdf: bool = False,
|
||||
capture_mhtml: bool = False,
|
||||
image_description_min_word_threshold: int = IMAGE_DESCRIPTION_MIN_WORD_THRESHOLD,
|
||||
@@ -1551,6 +1555,7 @@ class CrawlerRunConfig():
|
||||
self.screenshot = screenshot
|
||||
self.screenshot_wait_for = screenshot_wait_for
|
||||
self.screenshot_height_threshold = screenshot_height_threshold
|
||||
self.force_viewport_screenshot = force_viewport_screenshot
|
||||
self.pdf = pdf
|
||||
self.capture_mhtml = capture_mhtml
|
||||
self.image_description_min_word_threshold = image_description_min_word_threshold
|
||||
|
||||
@@ -1019,7 +1019,9 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
|
||||
if config.screenshot_wait_for:
|
||||
await asyncio.sleep(config.screenshot_wait_for)
|
||||
screenshot_data = await self.take_screenshot(
|
||||
page, screenshot_height_threshold=config.screenshot_height_threshold
|
||||
page,
|
||||
screenshot_height_threshold=config.screenshot_height_threshold,
|
||||
force_viewport_screenshot=config.force_viewport_screenshot
|
||||
)
|
||||
|
||||
if screenshot_data or pdf_data or mhtml_data:
|
||||
@@ -1637,6 +1639,13 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
|
||||
Returns:
|
||||
str: The base64-encoded screenshot data
|
||||
"""
|
||||
# Check if viewport-only screenshot is forced
|
||||
force_viewport = kwargs.get('force_viewport_screenshot', False)
|
||||
|
||||
if force_viewport:
|
||||
# Use viewport-only screenshot
|
||||
return await self.take_screenshot_naive(page)
|
||||
|
||||
need_scroll = await self.page_need_scroll(page)
|
||||
|
||||
if not need_scroll:
|
||||
|
||||
Reference in New Issue
Block a user