fix: prevent memory leaks by ensuring proper closure of Playwright pages
- Fixes critical memory leak issue where browser pages remained open - Ensures proper cleanup of Playwright resources after page operations - Improves resource management in browser farm implementation This is an urgent fix to address resource leakage that could impact system stability.
This commit is contained in:
@@ -1475,8 +1475,13 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
|
||||
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
finally:
|
||||
# If no session_id is given we should close the page
|
||||
if not config.session_id:
|
||||
await page.close()
|
||||
|
||||
async def _handle_full_page_scan(self, page: Page, scroll_delay: float):
|
||||
async def _handle_full_page_scan(self, page: Page, scroll_delay: float = 0.1):
|
||||
"""
|
||||
Helper method to handle full page scanning.
|
||||
|
||||
@@ -1500,7 +1505,7 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
|
||||
current_position = viewport_height
|
||||
|
||||
# await page.evaluate(f"window.scrollTo(0, {current_position})")
|
||||
await self.safe_scroll(page, 0, current_position)
|
||||
await self.safe_scroll(page, 0, current_position, delay=scroll_delay)
|
||||
# await self.csp_scroll_to(page, 0, current_position)
|
||||
# await asyncio.sleep(scroll_delay)
|
||||
|
||||
@@ -1510,7 +1515,7 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
|
||||
|
||||
while current_position < total_height:
|
||||
current_position = min(current_position + viewport_height, total_height)
|
||||
await self.safe_scroll(page, 0, current_position)
|
||||
await self.safe_scroll(page, 0, current_position, delay=scroll_delay)
|
||||
# await page.evaluate(f"window.scrollTo(0, {current_position})")
|
||||
# await asyncio.sleep(scroll_delay)
|
||||
|
||||
@@ -2066,7 +2071,7 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
|
||||
}
|
||||
""")
|
||||
|
||||
async def safe_scroll(self, page: Page, x: int, y: int):
|
||||
async def safe_scroll(self, page: Page, x: int, y: int, delay: float = 0.1):
|
||||
"""
|
||||
Safely scroll the page with rendering time.
|
||||
|
||||
@@ -2077,7 +2082,7 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
|
||||
"""
|
||||
result = await self.csp_scroll_to(page, x, y)
|
||||
if result['success']:
|
||||
await page.wait_for_timeout(100) # Allow for rendering
|
||||
await page.wait_for_timeout(delay * 1000)
|
||||
return result
|
||||
|
||||
async def csp_scroll_to(self, page: Page, x: int, y: int) -> Dict[str, Any]:
|
||||
|
||||
Reference in New Issue
Block a user