Fixed issues with the Manage Browser, including its inability to connect to the user directory and inability to create new pages within the Manage Browser context; all issues are now resolved.
This commit is contained in:
@@ -187,6 +187,7 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
|
|||||||
self.use_managed_browser = kwargs.get("use_managed_browser", False)
|
self.use_managed_browser = kwargs.get("use_managed_browser", False)
|
||||||
self.user_data_dir = kwargs.get("user_data_dir", None)
|
self.user_data_dir = kwargs.get("user_data_dir", None)
|
||||||
self.managed_browser = None
|
self.managed_browser = None
|
||||||
|
self.default_context = None
|
||||||
self.hooks = {
|
self.hooks = {
|
||||||
'on_browser_created': None,
|
'on_browser_created': None,
|
||||||
'on_user_agent_updated': None,
|
'on_user_agent_updated': None,
|
||||||
@@ -217,6 +218,25 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
|
|||||||
)
|
)
|
||||||
cdp_url = await self.managed_browser.start()
|
cdp_url = await self.managed_browser.start()
|
||||||
self.browser = await self.playwright.chromium.connect_over_cdp(cdp_url)
|
self.browser = await self.playwright.chromium.connect_over_cdp(cdp_url)
|
||||||
|
|
||||||
|
# Get the default context that maintains the user profile
|
||||||
|
contexts = self.browser.contexts
|
||||||
|
if contexts:
|
||||||
|
self.default_context = contexts[0]
|
||||||
|
else:
|
||||||
|
# If no default context exists, create one
|
||||||
|
self.default_context = await self.browser.new_context(
|
||||||
|
viewport={"width": 1920, "height": 1080}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Set up the default context
|
||||||
|
if self.default_context:
|
||||||
|
await self.default_context.set_extra_http_headers(self.headers)
|
||||||
|
|
||||||
|
if self.user_agent:
|
||||||
|
await self.default_context.set_extra_http_headers({
|
||||||
|
"User-Agent": self.user_agent
|
||||||
|
})
|
||||||
else:
|
else:
|
||||||
browser_args = {
|
browser_args = {
|
||||||
"headless": self.headless,
|
"headless": self.headless,
|
||||||
@@ -254,12 +274,20 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
|
|||||||
async def close(self):
|
async def close(self):
|
||||||
if self.sleep_on_close:
|
if self.sleep_on_close:
|
||||||
await asyncio.sleep(0.5)
|
await asyncio.sleep(0.5)
|
||||||
|
|
||||||
|
# Close all active sessions
|
||||||
|
session_ids = list(self.sessions.keys())
|
||||||
|
for session_id in session_ids:
|
||||||
|
await self.kill_session(session_id)
|
||||||
|
|
||||||
if self.browser:
|
if self.browser:
|
||||||
await self.browser.close()
|
await self.browser.close()
|
||||||
self.browser = None
|
self.browser = None
|
||||||
|
|
||||||
if self.managed_browser:
|
if self.managed_browser:
|
||||||
await self.managed_browser.cleanup()
|
await self.managed_browser.cleanup()
|
||||||
self.managed_browser = None
|
self.managed_browser = None
|
||||||
|
|
||||||
if self.playwright:
|
if self.playwright:
|
||||||
await self.playwright.stop()
|
await self.playwright.stop()
|
||||||
self.playwright = None
|
self.playwright = None
|
||||||
@@ -293,6 +321,7 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
|
|||||||
if session_id in self.sessions:
|
if session_id in self.sessions:
|
||||||
context, page, _ = self.sessions[session_id]
|
context, page, _ = self.sessions[session_id]
|
||||||
await page.close()
|
await page.close()
|
||||||
|
if not self.use_managed_browser:
|
||||||
await context.close()
|
await context.close()
|
||||||
del self.sessions[session_id]
|
del self.sessions[session_id]
|
||||||
|
|
||||||
@@ -415,6 +444,20 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
|
|||||||
|
|
||||||
self._cleanup_expired_sessions()
|
self._cleanup_expired_sessions()
|
||||||
session_id = kwargs.get("session_id")
|
session_id = kwargs.get("session_id")
|
||||||
|
|
||||||
|
# Handle page creation differently for managed browser
|
||||||
|
if self.use_managed_browser:
|
||||||
|
if session_id:
|
||||||
|
# Reuse existing session if available
|
||||||
|
context, page, _ = self.sessions.get(session_id, (None, None, None))
|
||||||
|
if not page:
|
||||||
|
# Create new page in default context if session doesn't exist
|
||||||
|
page = await self.default_context.new_page()
|
||||||
|
self.sessions[session_id] = (self.default_context, page, time.time())
|
||||||
|
else:
|
||||||
|
# Create new page in default context for non-session requests
|
||||||
|
page = await self.default_context.new_page()
|
||||||
|
else:
|
||||||
if session_id:
|
if session_id:
|
||||||
context, page, _ = self.sessions.get(session_id, (None, None, None))
|
context, page, _ = self.sessions.get(session_id, (None, None, None))
|
||||||
if not context:
|
if not context:
|
||||||
|
|||||||
Reference in New Issue
Block a user