fix: prevent memory leaks by ensuring proper closure of Playwright pages
- Fixes critical memory leak issue where browser pages remained open - Ensures proper cleanup of Playwright resources after page operations - Improves resource management in browser farm implementation This is an urgent fix to address resource leakage that could impact system stability.
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -225,3 +225,4 @@ tree.md
|
|||||||
.scripts
|
.scripts
|
||||||
.local
|
.local
|
||||||
.do
|
.do
|
||||||
|
/plans
|
||||||
@@ -1475,8 +1475,13 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
|
|||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
finally:
|
||||||
|
# If no session_id is given we should close the page
|
||||||
|
if not config.session_id:
|
||||||
|
await page.close()
|
||||||
|
|
||||||
async def _handle_full_page_scan(self, page: Page, scroll_delay: float):
|
async def _handle_full_page_scan(self, page: Page, scroll_delay: float = 0.1):
|
||||||
"""
|
"""
|
||||||
Helper method to handle full page scanning.
|
Helper method to handle full page scanning.
|
||||||
|
|
||||||
@@ -1500,7 +1505,7 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
|
|||||||
current_position = viewport_height
|
current_position = viewport_height
|
||||||
|
|
||||||
# await page.evaluate(f"window.scrollTo(0, {current_position})")
|
# await page.evaluate(f"window.scrollTo(0, {current_position})")
|
||||||
await self.safe_scroll(page, 0, current_position)
|
await self.safe_scroll(page, 0, current_position, delay=scroll_delay)
|
||||||
# await self.csp_scroll_to(page, 0, current_position)
|
# await self.csp_scroll_to(page, 0, current_position)
|
||||||
# await asyncio.sleep(scroll_delay)
|
# await asyncio.sleep(scroll_delay)
|
||||||
|
|
||||||
@@ -1510,7 +1515,7 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
|
|||||||
|
|
||||||
while current_position < total_height:
|
while current_position < total_height:
|
||||||
current_position = min(current_position + viewport_height, total_height)
|
current_position = min(current_position + viewport_height, total_height)
|
||||||
await self.safe_scroll(page, 0, current_position)
|
await self.safe_scroll(page, 0, current_position, delay=scroll_delay)
|
||||||
# await page.evaluate(f"window.scrollTo(0, {current_position})")
|
# await page.evaluate(f"window.scrollTo(0, {current_position})")
|
||||||
# await asyncio.sleep(scroll_delay)
|
# await asyncio.sleep(scroll_delay)
|
||||||
|
|
||||||
@@ -2066,7 +2071,7 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
|
|||||||
}
|
}
|
||||||
""")
|
""")
|
||||||
|
|
||||||
async def safe_scroll(self, page: Page, x: int, y: int):
|
async def safe_scroll(self, page: Page, x: int, y: int, delay: float = 0.1):
|
||||||
"""
|
"""
|
||||||
Safely scroll the page with rendering time.
|
Safely scroll the page with rendering time.
|
||||||
|
|
||||||
@@ -2077,7 +2082,7 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
|
|||||||
"""
|
"""
|
||||||
result = await self.csp_scroll_to(page, x, y)
|
result = await self.csp_scroll_to(page, x, y)
|
||||||
if result['success']:
|
if result['success']:
|
||||||
await page.wait_for_timeout(100) # Allow for rendering
|
await page.wait_for_timeout(delay * 1000)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
async def csp_scroll_to(self, page: Page, x: int, y: int) -> Dict[str, Any]:
|
async def csp_scroll_to(self, page: Page, x: int, y: int) -> Dict[str, Any]:
|
||||||
|
|||||||
Reference in New Issue
Block a user