Add create_isolated_context flag for concurrent CDP crawls

When True, forces creation of a new browser context instead of reusing
the default context. Essential for concurrent crawls on the same browser
to prevent navigation conflicts.
This commit is contained in:
unclecode
2025-12-13 08:29:05 +00:00
parent 6185d3cb32
commit 55eb968a8d
3 changed files with 1788 additions and 2067 deletions

View File

@@ -384,6 +384,9 @@ class BrowserConfig:
the local Playwright client resources. Useful for cloud/server scenarios the local Playwright client resources. Useful for cloud/server scenarios
where you don't own the remote browser but need to prevent memory leaks where you don't own the remote browser but need to prevent memory leaks
from accumulated Playwright instances. Default: False. from accumulated Playwright instances. Default: False.
create_isolated_context (bool): When True and using cdp_url, forces creation of a new browser context
instead of reusing the default context. Essential for concurrent crawls
on the same browser to prevent navigation conflicts. Default: False.
debugging_port (int): Port for the browser debugging protocol. Default: 9222. debugging_port (int): Port for the browser debugging protocol. Default: 9222.
use_persistent_context (bool): Use a persistent browser context (like a persistent profile). use_persistent_context (bool): Use a persistent browser context (like a persistent profile).
Automatically sets use_managed_browser=True. Default: False. Automatically sets use_managed_browser=True. Default: False.
@@ -441,6 +444,7 @@ class BrowserConfig:
browser_context_id: str = None, browser_context_id: str = None,
target_id: str = None, target_id: str = None,
cdp_cleanup_on_close: bool = False, cdp_cleanup_on_close: bool = False,
create_isolated_context: bool = False,
use_persistent_context: bool = False, use_persistent_context: bool = False,
user_data_dir: str = None, user_data_dir: str = None,
chrome_channel: str = "chromium", chrome_channel: str = "chromium",
@@ -483,6 +487,7 @@ class BrowserConfig:
self.browser_context_id = browser_context_id self.browser_context_id = browser_context_id
self.target_id = target_id self.target_id = target_id
self.cdp_cleanup_on_close = cdp_cleanup_on_close self.cdp_cleanup_on_close = cdp_cleanup_on_close
self.create_isolated_context = create_isolated_context
self.use_persistent_context = use_persistent_context self.use_persistent_context = use_persistent_context
self.user_data_dir = user_data_dir self.user_data_dir = user_data_dir
self.chrome_channel = chrome_channel or self.browser_type or "chromium" self.chrome_channel = chrome_channel or self.browser_type or "chromium"
@@ -581,6 +586,7 @@ class BrowserConfig:
browser_context_id=kwargs.get("browser_context_id"), browser_context_id=kwargs.get("browser_context_id"),
target_id=kwargs.get("target_id"), target_id=kwargs.get("target_id"),
cdp_cleanup_on_close=kwargs.get("cdp_cleanup_on_close", False), cdp_cleanup_on_close=kwargs.get("cdp_cleanup_on_close", False),
create_isolated_context=kwargs.get("create_isolated_context", False),
use_persistent_context=kwargs.get("use_persistent_context", False), use_persistent_context=kwargs.get("use_persistent_context", False),
user_data_dir=kwargs.get("user_data_dir"), user_data_dir=kwargs.get("user_data_dir"),
chrome_channel=kwargs.get("chrome_channel", "chromium"), chrome_channel=kwargs.get("chrome_channel", "chromium"),
@@ -620,6 +626,8 @@ class BrowserConfig:
"cdp_url": self.cdp_url, "cdp_url": self.cdp_url,
"browser_context_id": self.browser_context_id, "browser_context_id": self.browser_context_id,
"target_id": self.target_id, "target_id": self.target_id,
"cdp_cleanup_on_close": self.cdp_cleanup_on_close,
"create_isolated_context": self.create_isolated_context,
"use_persistent_context": self.use_persistent_context, "use_persistent_context": self.use_persistent_context,
"user_data_dir": self.user_data_dir, "user_data_dir": self.user_data_dir,
"chrome_channel": self.chrome_channel, "chrome_channel": self.chrome_channel,

View File

@@ -1149,7 +1149,15 @@ class BrowserManager:
# If using a managed browser, just grab the shared default_context # If using a managed browser, just grab the shared default_context
if self.config.use_managed_browser: if self.config.use_managed_browser:
if self.config.storage_state: # If create_isolated_context is True, create a fresh context for this crawl
# This is essential for concurrent crawls on the same browser to prevent
# navigation conflicts (multiple crawls sharing the same page)
if self.config.create_isolated_context:
context = await self.create_browser_context(crawlerRunConfig)
await self.setup_context(context, crawlerRunConfig)
page = await context.new_page()
await self._apply_stealth_to_page(page)
elif self.config.storage_state:
context = await self.create_browser_context(crawlerRunConfig) context = await self.create_browser_context(crawlerRunConfig)
ctx = self.default_context # default context, one window only ctx = self.default_context # default context, one window only
ctx = await clone_runtime_state(context, ctx, crawlerRunConfig, self.config) ctx = await clone_runtime_state(context, ctx, crawlerRunConfig, self.config)

3837
uv.lock generated

File diff suppressed because it is too large Load Diff