refactor(BrowserConfig): change deprecation warning for 'proxy' parameter to UserWarning

Remove deprecated test for 'proxy' parameter in BrowserConfig and update .gitignore to include test_scripts directory.
#1375 : refactor(proxy) Deprecate 'proxy' parameter in BrowserConfig and enhance proxy string parsing
2025-09-12 11:10:38 +08:00 · 2025-08-28 17:26:10 +08:00 · 2025-08-28 17:21:49 +08:00 · 2025-08-27 11:08:42 +08:00 · 2025-08-26 16:53:47 +08:00 · 2025-08-26 16:48:17 +08:00
16 changed files with 371 additions and 94 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -265,7 +265,7 @@ CLAUDE.md
 tests/**/test_site
 tests/**/reports
 tests/**/benchmark_reports
-
+test_scripts/
 docs/**/data
 .codecat/

--- a/crawl4ai/async_configs.py
+++ b/crawl4ai/async_configs.py
@@ -1,5 +1,6 @@
 import os
 from typing import Union
+import warnings
 from .config import (
    DEFAULT_PROVIDER,
    DEFAULT_PROVIDER_API_KEY,
@@ -97,13 +98,16 @@ def to_serializable_dict(obj: Any, ignore_default_value : bool = False) -> Dict:
                if value != param.default and not ignore_default_value:
                    current_values[name] = to_serializable_dict(value)
        
-        if hasattr(obj, '__slots__'):
-            for slot in obj.__slots__:
-                if slot.startswith('_'):  # Handle private slots
-                    attr_name = slot[1:]  # Remove leading '_'
-                    value = getattr(obj, slot, None)
-                    if value is not None:
-                        current_values[attr_name] = to_serializable_dict(value)
+        # Don't serialize private __slots__ - they're internal implementation details
+        # not constructor parameters. This was causing URLPatternFilter to fail
+        # because _simple_suffixes was being serialized as 'simple_suffixes'
+        # if hasattr(obj, '__slots__'):
+        #     for slot in obj.__slots__:
+        #         if slot.startswith('_'):  # Handle private slots
+        #             attr_name = slot[1:]  # Remove leading '_'
+        #             value = getattr(obj, slot, None)
+        #             if value is not None:
+        #                 current_values[attr_name] = to_serializable_dict(value)

            
        
@@ -254,24 +258,39 @@ class ProxyConfig:
    
    @staticmethod
    def from_string(proxy_str: str) -> "ProxyConfig":
-        """Create a ProxyConfig from a string in the format 'ip:port:username:password'."""
-        parts = proxy_str.split(":")
-        if len(parts) == 4:  # ip:port:username:password
+        """Create a ProxyConfig from a string.
+
+        Supported formats:
+        - 'http://username:password@ip:port'
+        - 'http://ip:port'
+        - 'socks5://ip:port'
+        - 'ip:port:username:password'
+        - 'ip:port'
+        """
+        s = (proxy_str or "").strip()
+        # URL with credentials
+        if "@" in s and "://" in s:
+            auth_part, server_part = s.split("@", 1)
+            protocol, credentials = auth_part.split("://", 1)
+            if ":" in credentials:
+                username, password = credentials.split(":", 1)
+                return ProxyConfig(
+                    server=f"{protocol}://{server_part}",
+                    username=username,
+                    password=password,
+                )
+        # URL without credentials (keep scheme)
+        if "://" in s and "@" not in s:
+            return ProxyConfig(server=s)
+        # Colon separated forms
+        parts = s.split(":")
+        if len(parts) == 4:
            ip, port, username, password = parts
-            return ProxyConfig(
-                server=f"http://{ip}:{port}",
-                username=username,
-                password=password,
-                ip=ip
-            )
-        elif len(parts) == 2:  # ip:port only
+            return ProxyConfig(server=f"http://{ip}:{port}", username=username, password=password)
+        if len(parts) == 2:
            ip, port = parts
-            return ProxyConfig(
-                server=f"http://{ip}:{port}",
-                ip=ip
-            )
-        else:
-            raise ValueError(f"Invalid proxy string format: {proxy_str}")
+            return ProxyConfig(server=f"http://{ip}:{port}")
+        raise ValueError(f"Invalid proxy string format: {proxy_str}")
    
    @staticmethod
    def from_dict(proxy_dict: Dict) -> "ProxyConfig":
@@ -435,6 +454,7 @@ class BrowserConfig:
        host: str = "localhost",
        enable_stealth: bool = False,
    ):
+        
        self.browser_type = browser_type
        self.headless = headless 
        self.browser_mode = browser_mode
@@ -447,13 +467,22 @@ class BrowserConfig:
        if self.browser_type in ["firefox", "webkit"]:
            self.channel = ""
            self.chrome_channel = ""
+        if proxy:
+            warnings.warn("The 'proxy' parameter is deprecated and will be removed in a future release. Use 'proxy_config' instead.", UserWarning)
        self.proxy = proxy
        self.proxy_config = proxy_config
        if isinstance(self.proxy_config, dict):
            self.proxy_config = ProxyConfig.from_dict(self.proxy_config)
        if isinstance(self.proxy_config, str):
            self.proxy_config = ProxyConfig.from_string(self.proxy_config)
-
+        
+        if self.proxy and self.proxy_config:
+            warnings.warn("Both 'proxy' and 'proxy_config' are provided. 'proxy_config' will take precedence.", UserWarning)
+            self.proxy = None
+        elif self.proxy:
+            # Convert proxy string to ProxyConfig if proxy_config is not provided
+            self.proxy_config = ProxyConfig.from_string(self.proxy)
+            self.proxy = None

        self.viewport_width = viewport_width
        self.viewport_height = viewport_height
--- a/crawl4ai/browser_manager.py
+++ b/crawl4ai/browser_manager.py
@@ -15,6 +15,7 @@ from .js_snippet import load_js_script
 from .config import DOWNLOAD_PAGE_TIMEOUT
 from .async_configs import BrowserConfig, CrawlerRunConfig
 from .utils import get_chromium_path
+import warnings


 BROWSER_DISABLE_OPTIONS = [
@@ -741,17 +742,18 @@ class BrowserManager:
            )
            os.makedirs(browser_args["downloads_path"], exist_ok=True)

-        if self.config.proxy or self.config.proxy_config:
+        if self.config.proxy:
+            warnings.warn(
+                "BrowserConfig.proxy is deprecated and ignored. Use proxy_config instead.",
+                DeprecationWarning,
+            )
+        if self.config.proxy_config:
            from playwright.async_api import ProxySettings

-            proxy_settings = (
-                ProxySettings(server=self.config.proxy)
-                if self.config.proxy
-                else ProxySettings(
-                    server=self.config.proxy_config.server,
-                    username=self.config.proxy_config.username,
-                    password=self.config.proxy_config.password,
-                )
+            proxy_settings = ProxySettings(
+                server=self.config.proxy_config.server,
+                username=self.config.proxy_config.username,
+                password=self.config.proxy_config.password,
            )
            browser_args["proxy"] = proxy_settings

--- a/crawl4ai/deep_crawling/bff_strategy.py
+++ b/crawl4ai/deep_crawling/bff_strategy.py
@@ -47,7 +47,13 @@ class BestFirstCrawlingStrategy(DeepCrawlStrategy):
        self.url_scorer = url_scorer
        self.include_external = include_external
        self.max_pages = max_pages
-        self.logger = logger or logging.getLogger(__name__)
+        # self.logger = logger or logging.getLogger(__name__)
+        # Ensure logger is always a Logger instance, not a dict from serialization
+        if isinstance(logger, logging.Logger):
+            self.logger = logger
+        else:
+            # Create a new logger if logger is None, dict, or any other non-Logger type
+            self.logger = logging.getLogger(__name__)
        self.stats = TraversalStats(start_time=datetime.now())
        self._cancel_event = asyncio.Event()
        self._pages_crawled = 0
--- a/crawl4ai/deep_crawling/bfs_strategy.py
+++ b/crawl4ai/deep_crawling/bfs_strategy.py
@@ -38,7 +38,13 @@ class BFSDeepCrawlStrategy(DeepCrawlStrategy):
        self.include_external = include_external
        self.score_threshold = score_threshold
        self.max_pages = max_pages
-        self.logger = logger or logging.getLogger(__name__)
+        # self.logger = logger or logging.getLogger(__name__)
+        # Ensure logger is always a Logger instance, not a dict from serialization
+        if isinstance(logger, logging.Logger):
+            self.logger = logger
+        else:
+            # Create a new logger if logger is None, dict, or any other non-Logger type
+            self.logger = logging.getLogger(__name__)
        self.stats = TraversalStats(start_time=datetime.now())
        self._cancel_event = asyncio.Event()
        self._pages_crawled = 0
--- a/crawl4ai/deep_crawling/filters.py
+++ b/crawl4ai/deep_crawling/filters.py
@@ -120,6 +120,9 @@ class URLPatternFilter(URLFilter):
    """Pattern filter balancing speed and completeness"""

    __slots__ = (
+        "patterns",  # Store original patterns for serialization
+        "use_glob",  # Store original use_glob for serialization  
+        "reverse",   # Store original reverse for serialization
        "_simple_suffixes",
        "_simple_prefixes",
        "_domain_patterns",
@@ -142,6 +145,11 @@ class URLPatternFilter(URLFilter):
        reverse: bool = False,
    ):
        super().__init__()
+        # Store original constructor params for serialization
+        self.patterns = patterns
+        self.use_glob = use_glob
+        self.reverse = reverse
+        
        self._reverse = reverse
        patterns = [patterns] if isinstance(patterns, (str, Pattern)) else patterns

--- a/crawl4ai/models.py
+++ b/crawl4ai/models.py
@@ -253,6 +253,16 @@ class CrawlResult(BaseModel):
        requirements change, this is where you would update the logic.
        """
        result = super().model_dump(*args, **kwargs)
+        
+        # Remove any property descriptors that might have been included
+        # These deprecated properties should not be in the serialized output
+        for key in ['fit_html', 'fit_markdown', 'markdown_v2']:
+            if key in result and isinstance(result[key], property):
+                # del result[key]
+                # Nasrin: I decided to convert it to string instead of removing it.
+                result[key] = str(result[key])
+        
+        # Add the markdown field properly
        if self._markdown is not None:
            result["markdown"] = self._markdown.model_dump() 
        return result
--- a/deploy/docker/c4ai-code-context.md
+++ b/deploy/docker/c4ai-code-context.md
@@ -7520,17 +7520,18 @@ class BrowserManager:
            )
            os.makedirs(browser_args["downloads_path"], exist_ok=True)

-        if self.config.proxy or self.config.proxy_config:
+        if self.config.proxy:
+            warnings.warn(
+                "BrowserConfig.proxy is deprecated and ignored. Use proxy_config instead.",
+                DeprecationWarning,
+            )
+        if self.config.proxy_config:
            from playwright.async_api import ProxySettings

-            proxy_settings = (
-                ProxySettings(server=self.config.proxy)
-                if self.config.proxy
-                else ProxySettings(
-                    server=self.config.proxy_config.server,
-                    username=self.config.proxy_config.username,
-                    password=self.config.proxy_config.password,
-                )
+            proxy_settings = ProxySettings(
+                server=self.config.proxy_config.server,
+                username=self.config.proxy_config.username,
+                password=self.config.proxy_config.password,
            )
            browser_args["proxy"] = proxy_settings

--- a/docs/md_v2/advanced/adaptive-strategies.md
+++ b/docs/md_v2/advanced/adaptive-strategies.md
@@ -126,30 +126,6 @@ Factors:
 - URL depth (fewer slashes = higher authority)
 - Clean URL structure

-### Custom Link Scoring
-
-```python
-class CustomLinkScorer:
-    def score(self, link: Link, query: str, state: CrawlState) -> float:
-        # Prioritize specific URL patterns
-        if "/api/reference/" in link.href:
-            return 2.0  # Double the score
-        
-        # Deprioritize certain sections
-        if "/archive/" in link.href:
-            return 0.1  # Reduce score by 90%
-        
-        # Default scoring
-        return 1.0
-
-# Use with adaptive crawler
-adaptive = AdaptiveCrawler(
-    crawler,
-    config=config,
-    link_scorer=CustomLinkScorer()
-)
-```
-
 ## Domain-Specific Configurations

 ### Technical Documentation
@@ -230,8 +206,12 @@ config = AdaptiveConfig(

 # Periodically clean state
 if len(state.knowledge_base) > 1000:
-    # Keep only most relevant
-    state.knowledge_base = get_top_relevant(state.knowledge_base, 500)
+    # Keep only the top 500 most relevant docs
+    top_content = adaptive.get_relevant_content(top_k=500)
+    keep_indices = {d["index"] for d in top_content}
+    state.knowledge_base = [
+        doc for i, doc in enumerate(state.knowledge_base) if i in keep_indices
+    ]
 ```

 ### Parallel Processing
@@ -252,18 +232,6 @@ tasks = [
 results = await asyncio.gather(*tasks)
 ```

-### Caching Strategy
-
-```python
-# Enable caching for repeated crawls
-async with AsyncWebCrawler(
-    config=BrowserConfig(
-        cache_mode=CacheMode.ENABLED
-    )
-) as crawler:
-    adaptive = AdaptiveCrawler(crawler, config)
-```
-
 ## Debugging & Analysis

 ### Enable Verbose Logging
@@ -322,9 +290,9 @@ with open("crawl_analysis.json", "w") as f:
 ### Implementing a Custom Strategy

 ```python
-from crawl4ai.adaptive_crawler import BaseStrategy
+from crawl4ai.adaptive_crawler import CrawlStrategy

-class DomainSpecificStrategy(BaseStrategy):
+class DomainSpecificStrategy(CrawlStrategy):
    def calculate_coverage(self, state: CrawlState) -> float:
        # Custom coverage calculation
        # e.g., weight certain terms more heavily
@@ -351,7 +319,7 @@ adaptive = AdaptiveCrawler(
 ### Combining Strategies

 ```python
-class HybridStrategy(BaseStrategy):
+class HybridStrategy(CrawlStrategy):
    def __init__(self):
        self.strategies = [
            TechnicalDocStrategy(),
--- a/docs/md_v2/advanced/proxy-security.md
+++ b/docs/md_v2/advanced/proxy-security.md
@@ -7,13 +7,13 @@ Simple proxy configuration with `BrowserConfig`:
 ```python
 from crawl4ai.async_configs import BrowserConfig

-# Using proxy URL
-browser_config = BrowserConfig(proxy="http://proxy.example.com:8080")
+# Using HTTP proxy
+browser_config = BrowserConfig(proxy_config={"server": "http://proxy.example.com:8080"})
 async with AsyncWebCrawler(config=browser_config) as crawler:
    result = await crawler.arun(url="https://example.com")

 # Using SOCKS proxy
-browser_config = BrowserConfig(proxy="socks5://proxy.example.com:1080")
+browser_config = BrowserConfig(proxy_config={"server": "socks5://proxy.example.com:1080"})
 async with AsyncWebCrawler(config=browser_config) as crawler:
    result = await crawler.arun(url="https://example.com")
 ```
@@ -25,7 +25,11 @@ Use an authenticated proxy with `BrowserConfig`:
 ```python
 from crawl4ai.async_configs import BrowserConfig

-browser_config = BrowserConfig(proxy="http://[username]:[password]@[host]:[port]")
+browser_config = BrowserConfig(proxy_config={
+    "server": "http://[host]:[port]",
+    "username": "[username]",
+    "password": "[password]",
+})
 async with AsyncWebCrawler(config=browser_config) as crawler:
    result = await crawler.arun(url="https://example.com")
 ```
--- a/docs/md_v2/api/parameters.md
+++ b/docs/md_v2/api/parameters.md
@@ -23,7 +23,7 @@ browser_cfg = BrowserConfig(
 | **`headless`**        | `bool` (default: `True`)               | Headless means no visible UI. `False` is handy for debugging.                                                                         |
 | **`viewport_width`**  | `int` (default: `1080`)                | Initial page width (in px). Useful for testing responsive layouts.                                                                    |
 | **`viewport_height`** | `int` (default: `600`)                 | Initial page height (in px).                                                                                                          |
-| **`proxy`**           | `str` (default: `None`)                | Single-proxy URL if you want all traffic to go through it, e.g. `"http://user:pass@proxy:8080"`.                                      |
+| **`proxy`**           | `str` (deprecated)                      | Deprecated. Use `proxy_config` instead. If set, it will be auto-converted internally. |
 | **`proxy_config`**    | `dict` (default: `None`)               | For advanced or multi-proxy needs, specify details like `{"server": "...", "username": "...", ...}`.                                  |
 | **`use_persistent_context`** | `bool` (default: `False`)       | If `True`, uses a **persistent** browser context (keep cookies, sessions across runs). Also sets `use_managed_browser=True`.          |
 | **`user_data_dir`**   | `str or None` (default: `None`)        | Directory to store user data (profiles, cookies). Must be set if you want permanent sessions.                                         |
--- a/docs/md_v2/core/quickstart.md
+++ b/docs/md_v2/core/quickstart.md
@@ -79,7 +79,7 @@ if __name__ == "__main__":
    asyncio.run(main())
 ```

-> IMPORTANT: By default cache mode is set to `CacheMode.ENABLED`. So to have fresh content, you need to set it to `CacheMode.BYPASS`
+> IMPORTANT: By default cache mode is set to `CacheMode.BYPASS` to have fresh content. Set `CacheMode.ENABLED` to enable caching.

 We’ll explore more advanced config in later tutorials (like enabling proxies, PDF output, multi-tab sessions, etc.). For now, just note how you pass these objects to manage crawling.

--- a/tests/async/test_0.4.2_browser_manager.py
+++ b/tests/async/test_0.4.2_browser_manager.py
@@ -112,7 +112,7 @@ async def test_proxy_settings():
        headless=True,
        verbose=False,
        user_agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36",
-        proxy="http://127.0.0.1:8080",  # Assuming local proxy server for test
+        proxy_config={"server": "http://127.0.0.1:8080"},  # Assuming local proxy server for test
        use_managed_browser=False,
        use_persistent_context=False,
    ) as crawler:
--- a/tests/docker/test_filter_deep_crawl.py
+++ b/tests/docker/test_filter_deep_crawl.py
@@ -0,0 +1,201 @@
+"""
+Test the complete fix for both the filter serialization and JSON serialization issues.
+"""
+
+import asyncio
+import httpx
+
+from crawl4ai import BrowserConfig, CacheMode, CrawlerRunConfig
+from crawl4ai.deep_crawling import BFSDeepCrawlStrategy, FilterChain, URLPatternFilter
+
+BASE_URL = "http://localhost:11234/"  # Adjust port as needed
+
+async def test_with_docker_client():
+    """Test using the Docker client (same as 1419.py)."""
+    from crawl4ai.docker_client import Crawl4aiDockerClient
+    
+    print("=" * 60)
+    print("Testing with Docker Client")
+    print("=" * 60)
+    
+    try:
+        async with Crawl4aiDockerClient(
+            base_url=BASE_URL,
+            verbose=True,
+        ) as client:
+            
+            # Create filter chain - testing the serialization fix
+            filter_chain = [
+                URLPatternFilter(
+                    # patterns=["*about*", "*privacy*", "*terms*"],
+                    patterns=["*advanced*"],
+                    reverse=True
+                ),
+            ]
+            
+            crawler_config = CrawlerRunConfig(
+                deep_crawl_strategy=BFSDeepCrawlStrategy(
+                    max_depth=2,  # Keep it shallow for testing
+                    # max_pages=5,  # Limit pages for testing
+                    filter_chain=FilterChain(filter_chain)
+                ),
+                cache_mode=CacheMode.BYPASS,
+            )
+            
+            print("\n1. Testing crawl with filters...")
+            results = await client.crawl(
+                ["https://docs.crawl4ai.com"],  # Simple test page
+                browser_config=BrowserConfig(headless=True),
+                crawler_config=crawler_config,
+            )
+            
+            if results:
+                print(f"✅ Crawl succeeded! Type: {type(results)}")
+                if hasattr(results, 'success'):
+                    print(f"✅ Results success: {results.success}")
+                    # Test that we can iterate results without JSON errors
+                    if hasattr(results, '__iter__'):
+                        for i, result in enumerate(results):
+                            if hasattr(result, 'url'):
+                                print(f"   Result {i}: {result.url[:50]}...")
+                            else:
+                                print(f"   Result {i}: {str(result)[:50]}...")
+                else:
+                    # Handle list of results
+                    print(f"✅ Got {len(results)} results")
+                    for i, result in enumerate(results[:3]):  # Show first 3
+                        print(f"   Result {i}: {result.url[:50]}...")
+            else:
+                print("❌ Crawl failed - no results returned")
+                return False
+                
+        print("\n✅ Docker client test completed successfully!")
+        return True
+        
+    except Exception as e:
+        print(f"❌ Docker client test failed: {e}")
+        import traceback
+        traceback.print_exc()
+        return False
+
+
+async def test_with_rest_api():
+    """Test using REST API directly."""
+    print("\n" + "=" * 60)
+    print("Testing with REST API")
+    print("=" * 60)
+    
+    # Create filter configuration
+    deep_crawl_strategy_payload = {
+        "type": "BFSDeepCrawlStrategy",
+        "params": {
+            "max_depth": 2,
+            # "max_pages": 5,
+            "filter_chain": {
+                "type": "FilterChain",
+                "params": {
+                    "filters": [
+                        {
+                            "type": "URLPatternFilter",
+                            "params": {
+                                "patterns": ["*advanced*"],
+                                "reverse": True
+                            }
+                        }
+                    ]
+                }
+            }
+        }
+    }
+    
+    crawl_payload = {
+        "urls": ["https://docs.crawl4ai.com"],
+        "browser_config": {"type": "BrowserConfig", "params": {"headless": True}},
+        "crawler_config": {
+            "type": "CrawlerRunConfig",
+            "params": {
+                "deep_crawl_strategy": deep_crawl_strategy_payload,
+                "cache_mode": "bypass"
+            }
+        }
+    }
+    
+    try:
+        async with httpx.AsyncClient() as client:
+            print("\n1. Sending crawl request to REST API...")
+            response = await client.post(
+                f"{BASE_URL}crawl",
+                json=crawl_payload,
+                timeout=30
+            )
+            
+            if response.status_code == 200:
+                print(f"✅ REST API returned 200 OK")
+                data = response.json()
+                if data.get("success"):
+                    results = data.get("results", [])
+                    print(f"✅ Got {len(results)} results")
+                    for i, result in enumerate(results[:3]):
+                        print(f"   Result {i}: {result.get('url', 'unknown')[:50]}...")
+                else:
+                    print(f"❌ Crawl not successful: {data}")
+                    return False
+            else:
+                print(f"❌ REST API returned {response.status_code}")
+                print(f"   Response: {response.text[:500]}")
+                return False
+                
+        print("\n✅ REST API test completed successfully!")
+        return True
+        
+    except Exception as e:
+        print(f"❌ REST API test failed: {e}")
+        import traceback
+        traceback.print_exc()
+        return False
+
+
+async def main():
+    """Run all tests."""
+    print("\n🧪 TESTING COMPLETE FIX FOR DOCKER FILTER AND JSON ISSUES")
+    print("=" * 60)
+    print("Make sure the server is running with the updated code!")
+    print("=" * 60)
+    
+    results = []
+    
+    # Test 1: Docker client
+    docker_passed = await test_with_docker_client()
+    results.append(("Docker Client", docker_passed))
+    
+    # Test 2: REST API
+    rest_passed = await test_with_rest_api()
+    results.append(("REST API", rest_passed))
+    
+    # Summary
+    print("\n" + "=" * 60)
+    print("FINAL TEST SUMMARY")
+    print("=" * 60)
+    
+    all_passed = True
+    for test_name, passed in results:
+        status = "✅ PASSED" if passed else "❌ FAILED"
+        print(f"{test_name:20} {status}")
+        if not passed:
+            all_passed = False
+    
+    print("=" * 60)
+    if all_passed:
+        print("🎉 ALL TESTS PASSED! Both issues are fully resolved!")
+        print("\nThe fixes:")
+        print("1. Filter serialization: Fixed by not serializing private __slots__")
+        print("2. JSON serialization: Fixed by removing property descriptors from model_dump()")
+    else:
+        print("⚠️ Some tests failed. Please check the server logs for details.")
+    
+    return 0 if all_passed else 1
+
+
+if __name__ == "__main__":
+    import sys
+    sys.exit(asyncio.run(main()))
--- a/tests/memory/test_docker_config_gen.py
+++ b/tests/memory/test_docker_config_gen.py
@@ -24,7 +24,7 @@ CASES = [
    # --- BrowserConfig variants ---
    "BrowserConfig()",
    "BrowserConfig(headless=False, extra_args=['--disable-gpu'])",
-    "BrowserConfig(browser_mode='builtin', proxy='http://1.2.3.4:8080')",
+    "BrowserConfig(browser_mode='builtin', proxy_config={'server': 'http://1.2.3.4:8080'})",
 ]

 for code in CASES:
--- a/tests/proxy/test_proxy_deprecation.py
+++ b/tests/proxy/test_proxy_deprecation.py
@@ -0,0 +1,42 @@
+import warnings
+
+import pytest
+
+from crawl4ai.async_configs import BrowserConfig, ProxyConfig
+
+
+def test_browser_config_proxy_string_emits_deprecation_and_autoconverts():
+    warnings.simplefilter("always", DeprecationWarning)
+
+    proxy_str = "23.95.150.145:6114:username:password"
+    with warnings.catch_warnings(record=True) as caught:
+        cfg = BrowserConfig(proxy=proxy_str, headless=True)
+
+    dep_warnings = [w for w in caught if issubclass(w.category, DeprecationWarning)]
+    assert dep_warnings, "Expected DeprecationWarning when using BrowserConfig(proxy=...)"
+
+    assert cfg.proxy is None, "cfg.proxy should be None after auto-conversion"
+    assert isinstance(cfg.proxy_config, ProxyConfig), "cfg.proxy_config should be ProxyConfig instance"
+    assert cfg.proxy_config.username == "username"
+    assert cfg.proxy_config.password == "password"
+    assert cfg.proxy_config.server.startswith("http://")
+    assert cfg.proxy_config.server.endswith(":6114")
+
+
+def test_browser_config_with_proxy_config_emits_no_deprecation():
+    warnings.simplefilter("always", DeprecationWarning)
+
+    with warnings.catch_warnings(record=True) as caught:
+        cfg = BrowserConfig(
+            headless=True,
+            proxy_config={
+                "server": "http://127.0.0.1:8080",
+                "username": "u",
+                "password": "p",
+            },
+        )
+
+    dep_warnings = [w for w in caught if issubclass(w.category, DeprecationWarning)]
+    assert not dep_warnings, "Did not expect DeprecationWarning when using proxy_config"
+    assert cfg.proxy is None
+    assert isinstance(cfg.proxy_config, ProxyConfig)
Author	SHA1	Message	Date
AHMET YILMAZ	1717827732	refactor(BrowserConfig): change deprecation warning for 'proxy' parameter to UserWarning	2025-09-12 11:10:38 +08:00
AHMET YILMAZ	4ed33fce9e	Remove deprecated test for 'proxy' parameter in BrowserConfig and update .gitignore to include test_scripts directory.	2025-08-28 17:26:10 +08:00
AHMET YILMAZ	f7a3366f72	#1375 : refactor(proxy) Deprecate 'proxy' parameter in BrowserConfig and enhance proxy string parsing - Updated ProxyConfig.from_string to support multiple proxy formats, including URLs with credentials. - Deprecated the 'proxy' parameter in BrowserConfig, replacing it with 'proxy_config' for better flexibility. - Added warnings for deprecated usage and clarified behavior when both parameters are provided. - Updated documentation and tests to reflect changes in proxy configuration handling.	2025-08-28 17:21:49 +08:00
Nasrin	4e1c4bd24e	Merge pull request #1436 from unclecode/fix/docker-filter fix(docker): resolve filter serialization and JSON encoding errors in deep crawl strategy	2025-08-27 11:08:42 +08:00
Nasrin	cce3390a2d	Merge pull request #1426 from unclecode/fix/update-quickstart-and-adaptive-strategies-docs Update Quickstart and Adaptive Strategies documentation	2025-08-26 16:53:47 +08:00
Nasrin	4fe2d01361	Merge pull request #1440 from unclecode/feature/docker-llm-parameters feat(docker): Add temperature and base_url parameters for LLM configuration	2025-08-26 16:48:17 +08:00
ntohidi	38f3ea42a7	fix(logger): ensure logger is a Logger instance in crawling strategies. ref #1437	2025-08-26 12:06:56 +08:00
ntohidi	102352eac4	fix(docker): resolve filter serialization and JSON encoding errors in deep crawl strategy (ref #1419 ) - Fix URLPatternFilter serialization by preventing private __slots__ from being serialized as constructor params - Add public attributes to URLPatternFilter to store original constructor parameters for proper serialization - Handle property descriptors in CrawlResult.model_dump() to prevent JSON serialization errors - Ensure filter chains work correctly with Docker client and REST API The issue occurred because: 1. Private implementation details (_simple_suffixes, etc.) were being serialized and passed as constructor arguments during deserialization 2. Property descriptors were being included in the serialized output, causing "Object of type property is not JSON serializable" errors Changes: - async_configs.py: Comment out __slots__ serialization logic (lines 100-109) - filters.py: Add patterns, use_glob, reverse to URLPatternFilter __slots__ and store as public attributes - models.py: Convert property descriptors to strings in model_dump() instead of including them directly	2025-08-25 14:04:08 +08:00
Soham Kukreti	c09a57644f	docs: update adaptive crawler docs and cache defaults; remove deprecated examples (#1330 ) - Replace BaseStrategy with CrawlStrategy in custom strategy examples (DomainSpecificStrategy, HybridStrategy) - Remove “Custom Link Scoring” and “Caching Strategy” sections no longer aligned with current library - Revise memory pruning example to use adaptive.get_relevant_content and index-based retention of top 500 docs - Correct Quickstart note: default cache mode is CacheMode.BYPASS; instruct enabling with CacheMode.ENABLED	2025-08-21 19:11:31 +05:30