Add create_isolated_context flag for concurrent CDP crawls
When True, forces creation of a new browser context instead of reusing the default context. Essential for concurrent crawls on the same browser to prevent navigation conflicts.
This commit is contained in:
@@ -384,6 +384,9 @@ class BrowserConfig:
|
|||||||
the local Playwright client resources. Useful for cloud/server scenarios
|
the local Playwright client resources. Useful for cloud/server scenarios
|
||||||
where you don't own the remote browser but need to prevent memory leaks
|
where you don't own the remote browser but need to prevent memory leaks
|
||||||
from accumulated Playwright instances. Default: False.
|
from accumulated Playwright instances. Default: False.
|
||||||
|
create_isolated_context (bool): When True and using cdp_url, forces creation of a new browser context
|
||||||
|
instead of reusing the default context. Essential for concurrent crawls
|
||||||
|
on the same browser to prevent navigation conflicts. Default: False.
|
||||||
debugging_port (int): Port for the browser debugging protocol. Default: 9222.
|
debugging_port (int): Port for the browser debugging protocol. Default: 9222.
|
||||||
use_persistent_context (bool): Use a persistent browser context (like a persistent profile).
|
use_persistent_context (bool): Use a persistent browser context (like a persistent profile).
|
||||||
Automatically sets use_managed_browser=True. Default: False.
|
Automatically sets use_managed_browser=True. Default: False.
|
||||||
@@ -441,6 +444,7 @@ class BrowserConfig:
|
|||||||
browser_context_id: str = None,
|
browser_context_id: str = None,
|
||||||
target_id: str = None,
|
target_id: str = None,
|
||||||
cdp_cleanup_on_close: bool = False,
|
cdp_cleanup_on_close: bool = False,
|
||||||
|
create_isolated_context: bool = False,
|
||||||
use_persistent_context: bool = False,
|
use_persistent_context: bool = False,
|
||||||
user_data_dir: str = None,
|
user_data_dir: str = None,
|
||||||
chrome_channel: str = "chromium",
|
chrome_channel: str = "chromium",
|
||||||
@@ -483,6 +487,7 @@ class BrowserConfig:
|
|||||||
self.browser_context_id = browser_context_id
|
self.browser_context_id = browser_context_id
|
||||||
self.target_id = target_id
|
self.target_id = target_id
|
||||||
self.cdp_cleanup_on_close = cdp_cleanup_on_close
|
self.cdp_cleanup_on_close = cdp_cleanup_on_close
|
||||||
|
self.create_isolated_context = create_isolated_context
|
||||||
self.use_persistent_context = use_persistent_context
|
self.use_persistent_context = use_persistent_context
|
||||||
self.user_data_dir = user_data_dir
|
self.user_data_dir = user_data_dir
|
||||||
self.chrome_channel = chrome_channel or self.browser_type or "chromium"
|
self.chrome_channel = chrome_channel or self.browser_type or "chromium"
|
||||||
@@ -581,6 +586,7 @@ class BrowserConfig:
|
|||||||
browser_context_id=kwargs.get("browser_context_id"),
|
browser_context_id=kwargs.get("browser_context_id"),
|
||||||
target_id=kwargs.get("target_id"),
|
target_id=kwargs.get("target_id"),
|
||||||
cdp_cleanup_on_close=kwargs.get("cdp_cleanup_on_close", False),
|
cdp_cleanup_on_close=kwargs.get("cdp_cleanup_on_close", False),
|
||||||
|
create_isolated_context=kwargs.get("create_isolated_context", False),
|
||||||
use_persistent_context=kwargs.get("use_persistent_context", False),
|
use_persistent_context=kwargs.get("use_persistent_context", False),
|
||||||
user_data_dir=kwargs.get("user_data_dir"),
|
user_data_dir=kwargs.get("user_data_dir"),
|
||||||
chrome_channel=kwargs.get("chrome_channel", "chromium"),
|
chrome_channel=kwargs.get("chrome_channel", "chromium"),
|
||||||
@@ -620,6 +626,8 @@ class BrowserConfig:
|
|||||||
"cdp_url": self.cdp_url,
|
"cdp_url": self.cdp_url,
|
||||||
"browser_context_id": self.browser_context_id,
|
"browser_context_id": self.browser_context_id,
|
||||||
"target_id": self.target_id,
|
"target_id": self.target_id,
|
||||||
|
"cdp_cleanup_on_close": self.cdp_cleanup_on_close,
|
||||||
|
"create_isolated_context": self.create_isolated_context,
|
||||||
"use_persistent_context": self.use_persistent_context,
|
"use_persistent_context": self.use_persistent_context,
|
||||||
"user_data_dir": self.user_data_dir,
|
"user_data_dir": self.user_data_dir,
|
||||||
"chrome_channel": self.chrome_channel,
|
"chrome_channel": self.chrome_channel,
|
||||||
|
|||||||
@@ -1149,7 +1149,15 @@ class BrowserManager:
|
|||||||
|
|
||||||
# If using a managed browser, just grab the shared default_context
|
# If using a managed browser, just grab the shared default_context
|
||||||
if self.config.use_managed_browser:
|
if self.config.use_managed_browser:
|
||||||
if self.config.storage_state:
|
# If create_isolated_context is True, create a fresh context for this crawl
|
||||||
|
# This is essential for concurrent crawls on the same browser to prevent
|
||||||
|
# navigation conflicts (multiple crawls sharing the same page)
|
||||||
|
if self.config.create_isolated_context:
|
||||||
|
context = await self.create_browser_context(crawlerRunConfig)
|
||||||
|
await self.setup_context(context, crawlerRunConfig)
|
||||||
|
page = await context.new_page()
|
||||||
|
await self._apply_stealth_to_page(page)
|
||||||
|
elif self.config.storage_state:
|
||||||
context = await self.create_browser_context(crawlerRunConfig)
|
context = await self.create_browser_context(crawlerRunConfig)
|
||||||
ctx = self.default_context # default context, one window only
|
ctx = self.default_context # default context, one window only
|
||||||
ctx = await clone_runtime_state(context, ctx, crawlerRunConfig, self.config)
|
ctx = await clone_runtime_state(context, ctx, crawlerRunConfig, self.config)
|
||||||
|
|||||||
Reference in New Issue
Block a user