Merge PR #1694: feat: add force viewport screenshot
This commit is contained in:
@@ -1271,6 +1271,9 @@ class CrawlerRunConfig():
|
|||||||
Default: None.
|
Default: None.
|
||||||
screenshot_height_threshold (int): Threshold for page height to decide screenshot strategy.
|
screenshot_height_threshold (int): Threshold for page height to decide screenshot strategy.
|
||||||
Default: SCREENSHOT_HEIGHT_TRESHOLD (from config, e.g. 20000).
|
Default: SCREENSHOT_HEIGHT_TRESHOLD (from config, e.g. 20000).
|
||||||
|
force_viewport_screenshot (bool): If True, always take viewport-only screenshots regardless of page height.
|
||||||
|
When False, uses automatic decision (viewport for short pages, full-page for long pages).
|
||||||
|
Default: False.
|
||||||
pdf (bool): Whether to generate a PDF of the page.
|
pdf (bool): Whether to generate a PDF of the page.
|
||||||
Default: False.
|
Default: False.
|
||||||
image_description_min_word_threshold (int): Minimum words for image description extraction.
|
image_description_min_word_threshold (int): Minimum words for image description extraction.
|
||||||
@@ -1423,6 +1426,7 @@ class CrawlerRunConfig():
|
|||||||
screenshot: bool = False,
|
screenshot: bool = False,
|
||||||
screenshot_wait_for: float = None,
|
screenshot_wait_for: float = None,
|
||||||
screenshot_height_threshold: int = SCREENSHOT_HEIGHT_TRESHOLD,
|
screenshot_height_threshold: int = SCREENSHOT_HEIGHT_TRESHOLD,
|
||||||
|
force_viewport_screenshot: bool = False,
|
||||||
pdf: bool = False,
|
pdf: bool = False,
|
||||||
capture_mhtml: bool = False,
|
capture_mhtml: bool = False,
|
||||||
image_description_min_word_threshold: int = IMAGE_DESCRIPTION_MIN_WORD_THRESHOLD,
|
image_description_min_word_threshold: int = IMAGE_DESCRIPTION_MIN_WORD_THRESHOLD,
|
||||||
@@ -1551,6 +1555,7 @@ class CrawlerRunConfig():
|
|||||||
self.screenshot = screenshot
|
self.screenshot = screenshot
|
||||||
self.screenshot_wait_for = screenshot_wait_for
|
self.screenshot_wait_for = screenshot_wait_for
|
||||||
self.screenshot_height_threshold = screenshot_height_threshold
|
self.screenshot_height_threshold = screenshot_height_threshold
|
||||||
|
self.force_viewport_screenshot = force_viewport_screenshot
|
||||||
self.pdf = pdf
|
self.pdf = pdf
|
||||||
self.capture_mhtml = capture_mhtml
|
self.capture_mhtml = capture_mhtml
|
||||||
self.image_description_min_word_threshold = image_description_min_word_threshold
|
self.image_description_min_word_threshold = image_description_min_word_threshold
|
||||||
|
|||||||
@@ -1019,7 +1019,9 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
|
|||||||
if config.screenshot_wait_for:
|
if config.screenshot_wait_for:
|
||||||
await asyncio.sleep(config.screenshot_wait_for)
|
await asyncio.sleep(config.screenshot_wait_for)
|
||||||
screenshot_data = await self.take_screenshot(
|
screenshot_data = await self.take_screenshot(
|
||||||
page, screenshot_height_threshold=config.screenshot_height_threshold
|
page,
|
||||||
|
screenshot_height_threshold=config.screenshot_height_threshold,
|
||||||
|
force_viewport_screenshot=config.force_viewport_screenshot
|
||||||
)
|
)
|
||||||
|
|
||||||
if screenshot_data or pdf_data or mhtml_data:
|
if screenshot_data or pdf_data or mhtml_data:
|
||||||
@@ -1637,6 +1639,13 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
|
|||||||
Returns:
|
Returns:
|
||||||
str: The base64-encoded screenshot data
|
str: The base64-encoded screenshot data
|
||||||
"""
|
"""
|
||||||
|
# Check if viewport-only screenshot is forced
|
||||||
|
force_viewport = kwargs.get('force_viewport_screenshot', False)
|
||||||
|
|
||||||
|
if force_viewport:
|
||||||
|
# Use viewport-only screenshot
|
||||||
|
return await self.take_screenshot_naive(page)
|
||||||
|
|
||||||
need_scroll = await self.page_need_scroll(page)
|
need_scroll = await self.page_need_scroll(page)
|
||||||
|
|
||||||
if not need_scroll:
|
if not need_scroll:
|
||||||
|
|||||||
Reference in New Issue
Block a user