fix(async_playwright_crawler): improve segment handling and viewport adjustments during screenshot capture (Fixed bug: Capturing Screenshot Twice and Increasing Image Size)
This commit is contained in:
@@ -1162,12 +1162,32 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
|
|||||||
num_segments = (page_height // viewport_height) + 1
|
num_segments = (page_height // viewport_height) + 1
|
||||||
for i in range(num_segments):
|
for i in range(num_segments):
|
||||||
y_offset = i * viewport_height
|
y_offset = i * viewport_height
|
||||||
|
# Special handling for the last segment
|
||||||
|
if i == num_segments - 1:
|
||||||
|
last_part_height = page_height % viewport_height
|
||||||
|
|
||||||
|
# If page_height is an exact multiple of viewport_height,
|
||||||
|
# we don't need an extra segment
|
||||||
|
if last_part_height == 0:
|
||||||
|
# Skip last segment if page height is exact multiple of viewport
|
||||||
|
break
|
||||||
|
|
||||||
|
# Adjust viewport to exactly match the remaining content height
|
||||||
|
await page.set_viewport_size({"width": page_width, "height": last_part_height})
|
||||||
|
|
||||||
await page.evaluate(f"window.scrollTo(0, {y_offset})")
|
await page.evaluate(f"window.scrollTo(0, {y_offset})")
|
||||||
await asyncio.sleep(0.01) # wait for render
|
await asyncio.sleep(0.01) # wait for render
|
||||||
seg_shot = await page.screenshot(full_page=False)
|
|
||||||
|
# Capture the current segment
|
||||||
|
# Note: Using compression options (format, quality) would go here
|
||||||
|
seg_shot = await page.screenshot(full_page=False, type="jpeg", quality=85)
|
||||||
|
# seg_shot = await page.screenshot(full_page=False)
|
||||||
img = Image.open(BytesIO(seg_shot)).convert("RGB")
|
img = Image.open(BytesIO(seg_shot)).convert("RGB")
|
||||||
segments.append(img)
|
segments.append(img)
|
||||||
|
|
||||||
|
# Reset viewport to original size after capturing segments
|
||||||
|
await page.set_viewport_size({"width": page_width, "height": viewport_height})
|
||||||
|
|
||||||
total_height = sum(img.height for img in segments)
|
total_height = sum(img.height for img in segments)
|
||||||
stitched = Image.new("RGB", (segments[0].width, total_height))
|
stitched = Image.new("RGB", (segments[0].width, total_height))
|
||||||
offset = 0
|
offset = 0
|
||||||
|
|||||||
Reference in New Issue
Block a user