Add create_isolated_context flag for concurrent CDP crawls

When True, forces creation of a new browser context instead of reusing
the default context. Essential for concurrent crawls on the same browser
to prevent navigation conflicts.
This commit is contained in:
unclecode
2025-12-13 08:29:05 +00:00
parent 6185d3cb32
commit 55eb968a8d
3 changed files with 1788 additions and 2067 deletions

View File

@@ -384,6 +384,9 @@ class BrowserConfig:
the local Playwright client resources. Useful for cloud/server scenarios
where you don't own the remote browser but need to prevent memory leaks
from accumulated Playwright instances. Default: False.
create_isolated_context (bool): When True and using cdp_url, forces creation of a new browser context
instead of reusing the default context. Essential for concurrent crawls
on the same browser to prevent navigation conflicts. Default: False.
debugging_port (int): Port for the browser debugging protocol. Default: 9222.
use_persistent_context (bool): Use a persistent browser context (like a persistent profile).
Automatically sets use_managed_browser=True. Default: False.
@@ -441,6 +444,7 @@ class BrowserConfig:
browser_context_id: str = None,
target_id: str = None,
cdp_cleanup_on_close: bool = False,
create_isolated_context: bool = False,
use_persistent_context: bool = False,
user_data_dir: str = None,
chrome_channel: str = "chromium",
@@ -483,6 +487,7 @@ class BrowserConfig:
self.browser_context_id = browser_context_id
self.target_id = target_id
self.cdp_cleanup_on_close = cdp_cleanup_on_close
self.create_isolated_context = create_isolated_context
self.use_persistent_context = use_persistent_context
self.user_data_dir = user_data_dir
self.chrome_channel = chrome_channel or self.browser_type or "chromium"
@@ -581,6 +586,7 @@ class BrowserConfig:
browser_context_id=kwargs.get("browser_context_id"),
target_id=kwargs.get("target_id"),
cdp_cleanup_on_close=kwargs.get("cdp_cleanup_on_close", False),
create_isolated_context=kwargs.get("create_isolated_context", False),
use_persistent_context=kwargs.get("use_persistent_context", False),
user_data_dir=kwargs.get("user_data_dir"),
chrome_channel=kwargs.get("chrome_channel", "chromium"),
@@ -620,6 +626,8 @@ class BrowserConfig:
"cdp_url": self.cdp_url,
"browser_context_id": self.browser_context_id,
"target_id": self.target_id,
"cdp_cleanup_on_close": self.cdp_cleanup_on_close,
"create_isolated_context": self.create_isolated_context,
"use_persistent_context": self.use_persistent_context,
"user_data_dir": self.user_data_dir,
"chrome_channel": self.chrome_channel,

View File

@@ -1149,7 +1149,15 @@ class BrowserManager:
# If using a managed browser, just grab the shared default_context
if self.config.use_managed_browser:
if self.config.storage_state:
# If create_isolated_context is True, create a fresh context for this crawl
# This is essential for concurrent crawls on the same browser to prevent
# navigation conflicts (multiple crawls sharing the same page)
if self.config.create_isolated_context:
context = await self.create_browser_context(crawlerRunConfig)
await self.setup_context(context, crawlerRunConfig)
page = await context.new_page()
await self._apply_stealth_to_page(page)
elif self.config.storage_state:
context = await self.create_browser_context(crawlerRunConfig)
ctx = self.default_context # default context, one window only
ctx = await clone_runtime_state(context, ctx, crawlerRunConfig, self.config)

3837
uv.lock generated

File diff suppressed because it is too large Load Diff