Merge branch 'next' into 2025-MAY-2

This commit is contained in:
ntohidi
2025-06-02 20:26:40 +02:00
68 changed files with 104093 additions and 153 deletions

View File

@@ -971,8 +971,10 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
if config.wait_for:
try:
# Use wait_for_timeout if specified, otherwise fall back to page_timeout
timeout = config.wait_for_timeout if config.wait_for_timeout is not None else config.page_timeout
await self.smart_wait(
page, config.wait_for, timeout=config.page_timeout
page, config.wait_for, timeout=timeout
)
except Exception as e:
raise RuntimeError(f"Wait condition failed: {str(e)}")
@@ -1097,7 +1099,13 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
finally:
# If no session_id is given we should close the page
if not config.session_id:
all_contexts = page.context.browser.contexts
total_pages = sum(len(context.pages) for context in all_contexts)
if config.session_id:
pass
elif total_pages <= 1 and (self.browser_config.use_managed_browser or self.browser_config.headless):
pass
else:
# Detach listeners before closing to prevent potential errors during close
if config.capture_network_requests:
page.remove_listener("request", handle_request_capture)
@@ -1107,6 +1115,7 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
page.remove_listener("console", handle_console_capture)
page.remove_listener("pageerror", handle_pageerror_capture)
# Close the page
await page.close()
async def _handle_full_page_scan(self, page: Page, scroll_delay: float = 0.1):