Add create_isolated_context flag for concurrent CDP crawls
When True, forces creation of a new browser context instead of reusing the default context. Essential for concurrent crawls on the same browser to prevent navigation conflicts.
This commit is contained in:
@@ -384,6 +384,9 @@ class BrowserConfig:
|
||||
the local Playwright client resources. Useful for cloud/server scenarios
|
||||
where you don't own the remote browser but need to prevent memory leaks
|
||||
from accumulated Playwright instances. Default: False.
|
||||
create_isolated_context (bool): When True and using cdp_url, forces creation of a new browser context
|
||||
instead of reusing the default context. Essential for concurrent crawls
|
||||
on the same browser to prevent navigation conflicts. Default: False.
|
||||
debugging_port (int): Port for the browser debugging protocol. Default: 9222.
|
||||
use_persistent_context (bool): Use a persistent browser context (like a persistent profile).
|
||||
Automatically sets use_managed_browser=True. Default: False.
|
||||
@@ -441,6 +444,7 @@ class BrowserConfig:
|
||||
browser_context_id: str = None,
|
||||
target_id: str = None,
|
||||
cdp_cleanup_on_close: bool = False,
|
||||
create_isolated_context: bool = False,
|
||||
use_persistent_context: bool = False,
|
||||
user_data_dir: str = None,
|
||||
chrome_channel: str = "chromium",
|
||||
@@ -483,6 +487,7 @@ class BrowserConfig:
|
||||
self.browser_context_id = browser_context_id
|
||||
self.target_id = target_id
|
||||
self.cdp_cleanup_on_close = cdp_cleanup_on_close
|
||||
self.create_isolated_context = create_isolated_context
|
||||
self.use_persistent_context = use_persistent_context
|
||||
self.user_data_dir = user_data_dir
|
||||
self.chrome_channel = chrome_channel or self.browser_type or "chromium"
|
||||
@@ -581,6 +586,7 @@ class BrowserConfig:
|
||||
browser_context_id=kwargs.get("browser_context_id"),
|
||||
target_id=kwargs.get("target_id"),
|
||||
cdp_cleanup_on_close=kwargs.get("cdp_cleanup_on_close", False),
|
||||
create_isolated_context=kwargs.get("create_isolated_context", False),
|
||||
use_persistent_context=kwargs.get("use_persistent_context", False),
|
||||
user_data_dir=kwargs.get("user_data_dir"),
|
||||
chrome_channel=kwargs.get("chrome_channel", "chromium"),
|
||||
@@ -620,6 +626,8 @@ class BrowserConfig:
|
||||
"cdp_url": self.cdp_url,
|
||||
"browser_context_id": self.browser_context_id,
|
||||
"target_id": self.target_id,
|
||||
"cdp_cleanup_on_close": self.cdp_cleanup_on_close,
|
||||
"create_isolated_context": self.create_isolated_context,
|
||||
"use_persistent_context": self.use_persistent_context,
|
||||
"user_data_dir": self.user_data_dir,
|
||||
"chrome_channel": self.chrome_channel,
|
||||
|
||||
@@ -1149,7 +1149,15 @@ class BrowserManager:
|
||||
|
||||
# If using a managed browser, just grab the shared default_context
|
||||
if self.config.use_managed_browser:
|
||||
if self.config.storage_state:
|
||||
# If create_isolated_context is True, create a fresh context for this crawl
|
||||
# This is essential for concurrent crawls on the same browser to prevent
|
||||
# navigation conflicts (multiple crawls sharing the same page)
|
||||
if self.config.create_isolated_context:
|
||||
context = await self.create_browser_context(crawlerRunConfig)
|
||||
await self.setup_context(context, crawlerRunConfig)
|
||||
page = await context.new_page()
|
||||
await self._apply_stealth_to_page(page)
|
||||
elif self.config.storage_state:
|
||||
context = await self.create_browser_context(crawlerRunConfig)
|
||||
ctx = self.default_context # default context, one window only
|
||||
ctx = await clone_runtime_state(context, ctx, crawlerRunConfig, self.config)
|
||||
|
||||
Reference in New Issue
Block a user