Revert context matching attempts - Playwright cannot see CDP-created contexts
This commit is contained in:
@@ -676,78 +676,12 @@ class BrowserManager:
|
|||||||
f"Using pre-existing browser context: {self.config.browser_context_id}",
|
f"Using pre-existing browser context: {self.config.browser_context_id}",
|
||||||
tag="BROWSER"
|
tag="BROWSER"
|
||||||
)
|
)
|
||||||
|
# When connecting to a pre-created context, it should be in contexts
|
||||||
# Find the specific context by matching browserContextId
|
if contexts:
|
||||||
# Each Playwright context has pages, and we need to find which one
|
|
||||||
# corresponds to our CDP browserContextId
|
|
||||||
found_context = None
|
|
||||||
if contexts and len(contexts) == 1:
|
|
||||||
# Only one context - use it directly
|
|
||||||
found_context = contexts[0]
|
|
||||||
elif contexts and len(contexts) > 1:
|
|
||||||
# Multiple contexts - need to find the right one
|
|
||||||
# Use CDP to query which context owns our target
|
|
||||||
try:
|
|
||||||
# Get first page from any context to create CDP session
|
|
||||||
any_page = None
|
|
||||||
for ctx in contexts:
|
|
||||||
if ctx.pages:
|
|
||||||
any_page = ctx.pages[0]
|
|
||||||
break
|
|
||||||
|
|
||||||
if any_page:
|
|
||||||
cdp = await any_page.context.new_cdp_session(any_page)
|
|
||||||
try:
|
|
||||||
result = await cdp.send("Target.getTargets")
|
|
||||||
targets = result.get("targetInfos", [])
|
|
||||||
|
|
||||||
# Find our target and its browserContextId
|
|
||||||
for target in targets:
|
|
||||||
if target.get("targetId") == self.config.target_id:
|
|
||||||
target_browser_context_id = target.get("browserContextId")
|
|
||||||
if target_browser_context_id == self.config.browser_context_id:
|
|
||||||
# Found it - now find which Playwright context has a page matching
|
|
||||||
for ctx in contexts:
|
|
||||||
for page in ctx.pages:
|
|
||||||
# Check if this page's context matches
|
|
||||||
# by checking if the page is in the right context
|
|
||||||
page_cdp = await ctx.new_cdp_session(page)
|
|
||||||
try:
|
|
||||||
page_targets = await page_cdp.send("Target.getTargets")
|
|
||||||
for pt in page_targets.get("targetInfos", []):
|
|
||||||
if pt.get("browserContextId") == self.config.browser_context_id:
|
|
||||||
found_context = ctx
|
|
||||||
break
|
|
||||||
finally:
|
|
||||||
await page_cdp.detach()
|
|
||||||
if found_context:
|
|
||||||
break
|
|
||||||
if found_context:
|
|
||||||
break
|
|
||||||
break
|
|
||||||
finally:
|
|
||||||
await cdp.detach()
|
|
||||||
except Exception as e:
|
|
||||||
if self.logger:
|
|
||||||
self.logger.warning(
|
|
||||||
f"Failed to find context via CDP: {e}",
|
|
||||||
tag="BROWSER"
|
|
||||||
)
|
|
||||||
|
|
||||||
if found_context:
|
|
||||||
self.default_context = found_context
|
|
||||||
if self.logger:
|
|
||||||
self.logger.debug(
|
|
||||||
f"Found context for browserContextId: {self.config.browser_context_id}",
|
|
||||||
tag="BROWSER"
|
|
||||||
)
|
|
||||||
elif contexts:
|
|
||||||
# Fallback to first context if we can't find the specific one
|
|
||||||
self.default_context = contexts[0]
|
self.default_context = contexts[0]
|
||||||
if self.logger:
|
if self.logger:
|
||||||
self.logger.warning(
|
self.logger.debug(
|
||||||
f"Could not find context for browserContextId {self.config.browser_context_id}, "
|
f"Found {len(contexts)} existing context(s), using first one",
|
||||||
f"using first of {len(contexts)} context(s)",
|
|
||||||
tag="BROWSER"
|
tag="BROWSER"
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
@@ -1226,33 +1160,29 @@ class BrowserManager:
|
|||||||
await self._apply_stealth_to_page(page)
|
await self._apply_stealth_to_page(page)
|
||||||
else:
|
else:
|
||||||
context = self.default_context
|
context = self.default_context
|
||||||
|
pages = context.pages
|
||||||
# When target_id is provided, use it to find the specific page
|
page = next((p for p in pages if p.url == crawlerRunConfig.url), None)
|
||||||
# This is critical for concurrent requests sharing a browser
|
if not page:
|
||||||
if self.config.browser_context_id and self.config.target_id:
|
if pages:
|
||||||
page = await self._get_page_by_target_id(context, self.config.target_id)
|
page = pages[0]
|
||||||
if not page:
|
else:
|
||||||
# Fallback: create new page in existing context
|
# Double-check under lock to avoid TOCTOU and ensure only
|
||||||
|
# one task calls new_page when pages=[] concurrently
|
||||||
async with self._page_lock:
|
async with self._page_lock:
|
||||||
page = await context.new_page()
|
pages = context.pages
|
||||||
await self._apply_stealth_to_page(page)
|
if pages:
|
||||||
else:
|
page = pages[0]
|
||||||
# Original logic for cases without pre-created target
|
elif self.config.browser_context_id and self.config.target_id:
|
||||||
pages = context.pages
|
# Pre-existing context/target provided - use CDP to get the page
|
||||||
page = next((p for p in pages if p.url == crawlerRunConfig.url), None)
|
# This handles the case where Playwright doesn't see the target yet
|
||||||
if not page:
|
page = await self._get_page_by_target_id(context, self.config.target_id)
|
||||||
if pages:
|
if not page:
|
||||||
page = pages[0]
|
# Fallback: create new page in existing context
|
||||||
else:
|
|
||||||
# Double-check under lock to avoid TOCTOU and ensure only
|
|
||||||
# one task calls new_page when pages=[] concurrently
|
|
||||||
async with self._page_lock:
|
|
||||||
pages = context.pages
|
|
||||||
if pages:
|
|
||||||
page = pages[0]
|
|
||||||
else:
|
|
||||||
page = await context.new_page()
|
page = await context.new_page()
|
||||||
await self._apply_stealth_to_page(page)
|
await self._apply_stealth_to_page(page)
|
||||||
|
else:
|
||||||
|
page = await context.new_page()
|
||||||
|
await self._apply_stealth_to_page(page)
|
||||||
else:
|
else:
|
||||||
# Otherwise, check if we have an existing context for this config
|
# Otherwise, check if we have an existing context for this config
|
||||||
config_signature = self._make_config_signature(crawlerRunConfig)
|
config_signature = self._make_config_signature(crawlerRunConfig)
|
||||||
|
|||||||
Reference in New Issue
Block a user