From b2e4a1f2e30528e0ea0078c124e3ca58b990aa8d Mon Sep 17 00:00:00 2001 From: unclecode Date: Sat, 13 Dec 2025 06:41:13 +0000 Subject: [PATCH] Fix: find context by target_id for concurrent CDP connections --- crawl4ai/browser_manager.py | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/crawl4ai/browser_manager.py b/crawl4ai/browser_manager.py index 45d7b223..5cf5cd8a 100644 --- a/crawl4ai/browser_manager.py +++ b/crawl4ai/browser_manager.py @@ -676,12 +676,36 @@ class BrowserManager: f"Using pre-existing browser context: {self.config.browser_context_id}", tag="BROWSER" ) - # When connecting to a pre-created context, it should be in contexts - if contexts: + + # Find the specific context by matching target_id to pages + # This is critical for concurrent crawls sharing a browser + found_context = None + if self.config.target_id and contexts: + for ctx in contexts: + for page in ctx.pages: + # Playwright stores target ID in internal implementation + page_impl = getattr(page, '_impl_obj', None) + page_target_id = getattr(page_impl, '_target_id', None) if page_impl else None + if page_target_id == self.config.target_id: + found_context = ctx + if self.logger: + self.logger.debug( + f"Found context by target_id: {self.config.target_id}", + tag="BROWSER" + ) + break + if found_context: + break + + if found_context: + self.default_context = found_context + elif contexts: + # Fallback to first context if we can't find the specific one self.default_context = contexts[0] if self.logger: - self.logger.debug( - f"Found {len(contexts)} existing context(s), using first one", + self.logger.warning( + f"Could not find context for target_id {self.config.target_id}, " + f"using first of {len(contexts)} context(s)", tag="BROWSER" ) else: