From 9247877037395dbf9b2fca67241a134724ec0155 Mon Sep 17 00:00:00 2001
From: UncleCode <unclecode@kidocode.com>
Date: Mon, 20 Jan 2025 22:14:05 +0800
Subject: [PATCH] feat(proxy): add proxy configuration support to
 CrawlerRunConfig

Add proxy_config parameter to CrawlerRunConfig to support dynamic proxy configuration per crawl request. This enables users to specify different proxy settings for each crawl operation without modifying the browser config.

- Added proxy_config parameter to CrawlerRunConfig
- Updated BrowserManager to apply proxy settings from CrawlerRunConfig
- Updated proxy-security documentation with new usage examples
---
 CHANGELOG.md                          |  4 ++++
 crawl4ai/async_configs.py             |  6 ++++++
 crawl4ai/async_crawler_strategy.py    | 16 +++++++++++---
 docs/md_v2/advanced/proxy-security.md | 30 ++++++++++++++++++---------
 4 files changed, 43 insertions(+), 13 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3bea14df..93bd9bdc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,7 @@
+### [Added] 2025-01-20
+- Added proxy configuration support to CrawlerRunConfig allowing dynamic proxy settings per crawl request
+- Updated documentation with examples for using proxy configuration in crawl operations
+
 ### [Added] 2025-01-20
 - New LLM-powered schema generation utility for JsonElementExtractionStrategy
 - Support for automatic CSS and XPath schema generation using OpenAI or Ollama
diff --git a/crawl4ai/async_configs.py b/crawl4ai/async_configs.py
index f4914726..fbcb6e70 100644
--- a/crawl4ai/async_configs.py
+++ b/crawl4ai/async_configs.py
@@ -270,6 +270,8 @@ class CrawlerRunConfig:
                            Default: "lxml".
         scraping_strategy (ContentScrapingStrategy): Scraping strategy to use.
                            Default: WebScrapingStrategy.
+        proxy_config (dict or None): Detailed proxy configuration, e.g. {"server": "...", "username": "..."}.
+                                     If None, no additional proxy config. Default: None.
 
         # Caching Parameters
         cache_mode (CacheMode or None): Defines how caching is handled.
@@ -389,6 +391,7 @@ class CrawlerRunConfig:
         prettiify: bool = False,
         parser_type: str = "lxml",
         scraping_strategy: ContentScrapingStrategy = None,
+        proxy_config: dict = None,
         # SSL Parameters
         fetch_ssl_certificate: bool = False,
         # Caching Parameters
@@ -457,6 +460,7 @@ class CrawlerRunConfig:
         self.prettiify = prettiify
         self.parser_type = parser_type
         self.scraping_strategy = scraping_strategy or WebScrapingStrategy()
+        self.proxy_config = proxy_config
 
         # SSL Parameters
         self.fetch_ssl_certificate = fetch_ssl_certificate
@@ -553,6 +557,7 @@ class CrawlerRunConfig:
             prettiify=kwargs.get("prettiify", False),
             parser_type=kwargs.get("parser_type", "lxml"),
             scraping_strategy=kwargs.get("scraping_strategy"),
+            proxy_config=kwargs.get("proxy_config"),
             # SSL Parameters
             fetch_ssl_certificate=kwargs.get("fetch_ssl_certificate", False),
             # Caching Parameters
@@ -631,6 +636,7 @@ class CrawlerRunConfig:
             "prettiify": self.prettiify,
             "parser_type": self.parser_type,
             "scraping_strategy": self.scraping_strategy,
+            "proxy_config": self.proxy_config,
             "fetch_ssl_certificate": self.fetch_ssl_certificate,
             "cache_mode": self.cache_mode,
             "session_id": self.session_id,
diff --git a/crawl4ai/async_crawler_strategy.py b/crawl4ai/async_crawler_strategy.py
index 786d2fb9..ae1788f1 100644
--- a/crawl4ai/async_crawler_strategy.py
+++ b/crawl4ai/async_crawler_strategy.py
@@ -543,9 +543,9 @@ class BrowserManager:
                 or crawlerRunConfig.simulate_user
                 or crawlerRunConfig.magic
             ):
-                await context.add_init_script(load_js_script("navigator_overrider"))
+                await context.add_init_script(load_js_script("navigator_overrider"))        
 
-    async def create_browser_context(self):
+    async def create_browser_context(self, crawlerRunConfig: CrawlerRunConfig = None):
         """
         Creates and returns a new browser context with configured settings.
         Applies text-only mode settings if text_mode is enabled in config.
@@ -627,6 +627,16 @@ class BrowserManager:
             "device_scale_factor": 1.0,
             "java_script_enabled": self.config.java_script_enabled,
         }
+        
+        if crawlerRunConfig:
+            # Check if there is value for crawlerRunConfig.proxy_config set add that to context
+            if crawlerRunConfig.proxy_config:
+                proxy_settings = {
+                    "server": crawlerRunConfig.proxy_config.get("server"),
+                    "username": crawlerRunConfig.proxy_config.get("username"),
+                    "password": crawlerRunConfig.proxy_config.get("password"),
+                }
+                context_settings["proxy"] = proxy_settings
 
         if self.config.text_mode:
             text_mode_settings = {
@@ -710,7 +720,7 @@ class BrowserManager:
                     context = self.contexts_by_config[config_signature]
                 else:
                     # Create and setup a new context
-                    context = await self.create_browser_context()
+                    context = await self.create_browser_context(crawlerRunConfig)
                     await self.setup_context(context, crawlerRunConfig)
                     self.contexts_by_config[config_signature] = context
 
diff --git a/docs/md_v2/advanced/proxy-security.md b/docs/md_v2/advanced/proxy-security.md
index b98c17e5..9b64fd84 100644
--- a/docs/md_v2/advanced/proxy-security.md
+++ b/docs/md_v2/advanced/proxy-security.md
@@ -36,23 +36,33 @@ async with AsyncWebCrawler(config=browser_config) as crawler:
     result = await crawler.arun(url="https://example.com")
 ```
 
-## Rotating Proxies
+Here's the corrected documentation:
 
-Example using a proxy rotation service and updating `BrowserConfig` dynamically:
+## Rotating Proxies [COMING SOON]
+
+Example using a proxy rotation service dynamically:
 
 ```python
-from crawl4ai.async_configs import BrowserConfig
+from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
 
 async def get_next_proxy():
     # Your proxy rotation logic here
     return {"server": "http://next.proxy.com:8080"}
 
-browser_config = BrowserConfig()
-async with AsyncWebCrawler(config=browser_config) as crawler:
-    # Update proxy for each request
-    for url in urls:
-        proxy = await get_next_proxy()
-        browser_config.proxy_config = proxy
-        result = await crawler.arun(url=url, config=browser_config)
+async def main():
+    browser_config = BrowserConfig()
+    run_config = CrawlerRunConfig()
+    
+    async with AsyncWebCrawler(config=browser_config) as crawler:
+        # For each URL, create a new run config with different proxy
+        for url in urls:
+            proxy = await get_next_proxy()
+            # Clone the config and update proxy - this creates a new browser context
+            current_config = run_config.clone(proxy_config=proxy)
+            result = await crawler.arun(url=url, config=current_config)
+
+if __name__ == "__main__":
+    import asyncio
+    asyncio.run(main())
 ```