feat(proxy): add proxy configuration support to CrawlerRunConfig

Add proxy_config parameter to CrawlerRunConfig to support dynamic proxy configuration per crawl request. This enables users to specify different proxy settings for each crawl operation without modifying the browser config.

- Added proxy_config parameter to CrawlerRunConfig
- Updated BrowserManager to apply proxy settings from CrawlerRunConfig
- Updated proxy-security documentation with new usage examples
This commit is contained in:
UncleCode
2025-01-20 22:14:05 +08:00
parent 2cec527a22
commit 9247877037
4 changed files with 43 additions and 13 deletions

View File

@@ -270,6 +270,8 @@ class CrawlerRunConfig:
Default: "lxml".
scraping_strategy (ContentScrapingStrategy): Scraping strategy to use.
Default: WebScrapingStrategy.
proxy_config (dict or None): Detailed proxy configuration, e.g. {"server": "...", "username": "..."}.
If None, no additional proxy config. Default: None.
# Caching Parameters
cache_mode (CacheMode or None): Defines how caching is handled.
@@ -389,6 +391,7 @@ class CrawlerRunConfig:
prettiify: bool = False,
parser_type: str = "lxml",
scraping_strategy: ContentScrapingStrategy = None,
proxy_config: dict = None,
# SSL Parameters
fetch_ssl_certificate: bool = False,
# Caching Parameters
@@ -457,6 +460,7 @@ class CrawlerRunConfig:
self.prettiify = prettiify
self.parser_type = parser_type
self.scraping_strategy = scraping_strategy or WebScrapingStrategy()
self.proxy_config = proxy_config
# SSL Parameters
self.fetch_ssl_certificate = fetch_ssl_certificate
@@ -553,6 +557,7 @@ class CrawlerRunConfig:
prettiify=kwargs.get("prettiify", False),
parser_type=kwargs.get("parser_type", "lxml"),
scraping_strategy=kwargs.get("scraping_strategy"),
proxy_config=kwargs.get("proxy_config"),
# SSL Parameters
fetch_ssl_certificate=kwargs.get("fetch_ssl_certificate", False),
# Caching Parameters
@@ -631,6 +636,7 @@ class CrawlerRunConfig:
"prettiify": self.prettiify,
"parser_type": self.parser_type,
"scraping_strategy": self.scraping_strategy,
"proxy_config": self.proxy_config,
"fetch_ssl_certificate": self.fetch_ssl_certificate,
"cache_mode": self.cache_mode,
"session_id": self.session_id,

View File

@@ -543,9 +543,9 @@ class BrowserManager:
or crawlerRunConfig.simulate_user
or crawlerRunConfig.magic
):
await context.add_init_script(load_js_script("navigator_overrider"))
await context.add_init_script(load_js_script("navigator_overrider"))
async def create_browser_context(self):
async def create_browser_context(self, crawlerRunConfig: CrawlerRunConfig = None):
"""
Creates and returns a new browser context with configured settings.
Applies text-only mode settings if text_mode is enabled in config.
@@ -627,6 +627,16 @@ class BrowserManager:
"device_scale_factor": 1.0,
"java_script_enabled": self.config.java_script_enabled,
}
if crawlerRunConfig:
# Check if there is value for crawlerRunConfig.proxy_config set add that to context
if crawlerRunConfig.proxy_config:
proxy_settings = {
"server": crawlerRunConfig.proxy_config.get("server"),
"username": crawlerRunConfig.proxy_config.get("username"),
"password": crawlerRunConfig.proxy_config.get("password"),
}
context_settings["proxy"] = proxy_settings
if self.config.text_mode:
text_mode_settings = {
@@ -710,7 +720,7 @@ class BrowserManager:
context = self.contexts_by_config[config_signature]
else:
# Create and setup a new context
context = await self.create_browser_context()
context = await self.create_browser_context(crawlerRunConfig)
await self.setup_context(context, crawlerRunConfig)
self.contexts_by_config[config_signature] = context