From 6185d3cb327422196bbd143e86a7ac7d7cf8ad7c Mon Sep 17 00:00:00 2001
From: unclecode <unclecode@kidocode.com>
Date: Sat, 13 Dec 2025 07:57:29 +0000
Subject: [PATCH] Revert context matching attempts - Playwright cannot see
 CDP-created contexts

---
 crawl4ai/browser_manager.py | 118 ++++++++----------------------------
 1 file changed, 24 insertions(+), 94 deletions(-)

diff --git a/crawl4ai/browser_manager.py b/crawl4ai/browser_manager.py
index 89e7f829..45d7b223 100644
--- a/crawl4ai/browser_manager.py
+++ b/crawl4ai/browser_manager.py
@@ -676,78 +676,12 @@ class BrowserManager:
                         f"Using pre-existing browser context: {self.config.browser_context_id}",
                         tag="BROWSER"
                     )
-
-                # Find the specific context by matching browserContextId
-                # Each Playwright context has pages, and we need to find which one
-                # corresponds to our CDP browserContextId
-                found_context = None
-                if contexts and len(contexts) == 1:
-                    # Only one context - use it directly
-                    found_context = contexts[0]
-                elif contexts and len(contexts) > 1:
-                    # Multiple contexts - need to find the right one
-                    # Use CDP to query which context owns our target
-                    try:
-                        # Get first page from any context to create CDP session
-                        any_page = None
-                        for ctx in contexts:
-                            if ctx.pages:
-                                any_page = ctx.pages[0]
-                                break
-
-                        if any_page:
-                            cdp = await any_page.context.new_cdp_session(any_page)
-                            try:
-                                result = await cdp.send("Target.getTargets")
-                                targets = result.get("targetInfos", [])
-
-                                # Find our target and its browserContextId
-                                for target in targets:
-                                    if target.get("targetId") == self.config.target_id:
-                                        target_browser_context_id = target.get("browserContextId")
-                                        if target_browser_context_id == self.config.browser_context_id:
-                                            # Found it - now find which Playwright context has a page matching
-                                            for ctx in contexts:
-                                                for page in ctx.pages:
-                                                    # Check if this page's context matches
-                                                    # by checking if the page is in the right context
-                                                    page_cdp = await ctx.new_cdp_session(page)
-                                                    try:
-                                                        page_targets = await page_cdp.send("Target.getTargets")
-                                                        for pt in page_targets.get("targetInfos", []):
-                                                            if pt.get("browserContextId") == self.config.browser_context_id:
-                                                                found_context = ctx
-                                                                break
-                                                    finally:
-                                                        await page_cdp.detach()
-                                                    if found_context:
-                                                        break
-                                                if found_context:
-                                                    break
-                                        break
-                            finally:
-                                await cdp.detach()
-                    except Exception as e:
-                        if self.logger:
-                            self.logger.warning(
-                                f"Failed to find context via CDP: {e}",
-                                tag="BROWSER"
-                            )
-
-                if found_context:
-                    self.default_context = found_context
-                    if self.logger:
-                        self.logger.debug(
-                            f"Found context for browserContextId: {self.config.browser_context_id}",
-                            tag="BROWSER"
-                        )
-                elif contexts:
-                    # Fallback to first context if we can't find the specific one
+                # When connecting to a pre-created context, it should be in contexts
+                if contexts:
                     self.default_context = contexts[0]
                     if self.logger:
-                        self.logger.warning(
-                            f"Could not find context for browserContextId {self.config.browser_context_id}, "
-                            f"using first of {len(contexts)} context(s)",
+                        self.logger.debug(
+                            f"Found {len(contexts)} existing context(s), using first one",
                             tag="BROWSER"
                         )
                 else:
@@ -1226,33 +1160,29 @@ class BrowserManager:
                 await self._apply_stealth_to_page(page)
             else:
                 context = self.default_context
-
-                # When target_id is provided, use it to find the specific page
-                # This is critical for concurrent requests sharing a browser
-                if self.config.browser_context_id and self.config.target_id:
-                    page = await self._get_page_by_target_id(context, self.config.target_id)
-                    if not page:
-                        # Fallback: create new page in existing context
+                pages = context.pages
+                page = next((p for p in pages if p.url == crawlerRunConfig.url), None)
+                if not page:
+                    if pages:
+                        page = pages[0]
+                    else:
+                        # Double-check under lock to avoid TOCTOU and ensure only
+                        # one task calls new_page when pages=[] concurrently
                         async with self._page_lock:
-                            page = await context.new_page()
-                            await self._apply_stealth_to_page(page)
-                else:
-                    # Original logic for cases without pre-created target
-                    pages = context.pages
-                    page = next((p for p in pages if p.url == crawlerRunConfig.url), None)
-                    if not page:
-                        if pages:
-                            page = pages[0]
-                        else:
-                            # Double-check under lock to avoid TOCTOU and ensure only
-                            # one task calls new_page when pages=[] concurrently
-                            async with self._page_lock:
-                                pages = context.pages
-                                if pages:
-                                    page = pages[0]
-                                else:
+                            pages = context.pages
+                            if pages:
+                                page = pages[0]
+                            elif self.config.browser_context_id and self.config.target_id:
+                                # Pre-existing context/target provided - use CDP to get the page
+                                # This handles the case where Playwright doesn't see the target yet
+                                page = await self._get_page_by_target_id(context, self.config.target_id)
+                                if not page:
+                                    # Fallback: create new page in existing context
                                     page = await context.new_page()
                                     await self._apply_stealth_to_page(page)
+                            else:
+                                page = await context.new_page()
+                                await self._apply_stealth_to_page(page)
         else:
             # Otherwise, check if we have an existing context for this config
             config_signature = self._make_config_signature(crawlerRunConfig)