fix: prevent memory leaks by ensuring proper closure of Playwright pages

- Fixes critical memory leak issue where browser pages remained open
- Ensures proper cleanup of Playwright resources after page operations
- Improves resource management in browser farm implementation

This is an urgent fix to address resource leakage that could impact system stability.
This commit is contained in:
UncleCode
2025-01-03 21:17:23 +08:00
parent 24b3da717a
commit 196dc79ec7
2 changed files with 11 additions and 5 deletions

1
.gitignore vendored
View File

@@ -225,3 +225,4 @@ tree.md
.scripts .scripts
.local .local
.do .do
/plans

View File

@@ -1475,8 +1475,13 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
except Exception as e: except Exception as e:
raise e raise e
finally:
# If no session_id is given we should close the page
if not config.session_id:
await page.close()
async def _handle_full_page_scan(self, page: Page, scroll_delay: float): async def _handle_full_page_scan(self, page: Page, scroll_delay: float = 0.1):
""" """
Helper method to handle full page scanning. Helper method to handle full page scanning.
@@ -1500,7 +1505,7 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
current_position = viewport_height current_position = viewport_height
# await page.evaluate(f"window.scrollTo(0, {current_position})") # await page.evaluate(f"window.scrollTo(0, {current_position})")
await self.safe_scroll(page, 0, current_position) await self.safe_scroll(page, 0, current_position, delay=scroll_delay)
# await self.csp_scroll_to(page, 0, current_position) # await self.csp_scroll_to(page, 0, current_position)
# await asyncio.sleep(scroll_delay) # await asyncio.sleep(scroll_delay)
@@ -1510,7 +1515,7 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
while current_position < total_height: while current_position < total_height:
current_position = min(current_position + viewport_height, total_height) current_position = min(current_position + viewport_height, total_height)
await self.safe_scroll(page, 0, current_position) await self.safe_scroll(page, 0, current_position, delay=scroll_delay)
# await page.evaluate(f"window.scrollTo(0, {current_position})") # await page.evaluate(f"window.scrollTo(0, {current_position})")
# await asyncio.sleep(scroll_delay) # await asyncio.sleep(scroll_delay)
@@ -2066,7 +2071,7 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
} }
""") """)
async def safe_scroll(self, page: Page, x: int, y: int): async def safe_scroll(self, page: Page, x: int, y: int, delay: float = 0.1):
""" """
Safely scroll the page with rendering time. Safely scroll the page with rendering time.
@@ -2077,7 +2082,7 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
""" """
result = await self.csp_scroll_to(page, x, y) result = await self.csp_scroll_to(page, x, y)
if result['success']: if result['success']:
await page.wait_for_timeout(100) # Allow for rendering await page.wait_for_timeout(delay * 1000)
return result return result
async def csp_scroll_to(self, page: Page, x: int, y: int) -> Dict[str, Any]: async def csp_scroll_to(self, page: Page, x: int, y: int) -> Dict[str, Any]: