#1490 feat(ManagedBrowser): add viewport size configuration for browser launch

Merge pull request #1464 from unclecode/fix/proxy_deprecation
Fix/proxy deprecation
2025-09-17 17:40:38 +08:00 · 2025-09-16 15:48:45 +08:00 · 2025-09-16 15:45:54 +08:00 · 2025-09-12 11:10:38 +08:00 · 2025-09-11 17:40:43 +08:00 · 2025-09-09 12:56:33 +08:00
14 changed files with 314 additions and 63 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -265,7 +265,7 @@ CLAUDE.md
 tests/**/test_site
 tests/**/reports
 tests/**/benchmark_reports
-
+test_scripts/
 docs/**/data
 .codecat/

--- a/crawl4ai/async_configs.py
+++ b/crawl4ai/async_configs.py
@@ -1,5 +1,6 @@
 import os
 from typing import Union
+import warnings
 from .config import (
    DEFAULT_PROVIDER,
    DEFAULT_PROVIDER_API_KEY,
@@ -257,24 +258,39 @@ class ProxyConfig:
    
    @staticmethod
    def from_string(proxy_str: str) -> "ProxyConfig":
-        """Create a ProxyConfig from a string in the format 'ip:port:username:password'."""
-        parts = proxy_str.split(":")
-        if len(parts) == 4:  # ip:port:username:password
+        """Create a ProxyConfig from a string.
+
+        Supported formats:
+        - 'http://username:password@ip:port'
+        - 'http://ip:port'
+        - 'socks5://ip:port'
+        - 'ip:port:username:password'
+        - 'ip:port'
+        """
+        s = (proxy_str or "").strip()
+        # URL with credentials
+        if "@" in s and "://" in s:
+            auth_part, server_part = s.split("@", 1)
+            protocol, credentials = auth_part.split("://", 1)
+            if ":" in credentials:
+                username, password = credentials.split(":", 1)
+                return ProxyConfig(
+                    server=f"{protocol}://{server_part}",
+                    username=username,
+                    password=password,
+                )
+        # URL without credentials (keep scheme)
+        if "://" in s and "@" not in s:
+            return ProxyConfig(server=s)
+        # Colon separated forms
+        parts = s.split(":")
+        if len(parts) == 4:
            ip, port, username, password = parts
-            return ProxyConfig(
-                server=f"http://{ip}:{port}",
-                username=username,
-                password=password,
-                ip=ip
-            )
-        elif len(parts) == 2:  # ip:port only
+            return ProxyConfig(server=f"http://{ip}:{port}", username=username, password=password)
+        if len(parts) == 2:
            ip, port = parts
-            return ProxyConfig(
-                server=f"http://{ip}:{port}",
-                ip=ip
-            )
-        else:
-            raise ValueError(f"Invalid proxy string format: {proxy_str}")
+            return ProxyConfig(server=f"http://{ip}:{port}")
+        raise ValueError(f"Invalid proxy string format: {proxy_str}")
    
    @staticmethod
    def from_dict(proxy_dict: Dict) -> "ProxyConfig":
@@ -438,6 +454,7 @@ class BrowserConfig:
        host: str = "localhost",
        enable_stealth: bool = False,
    ):
+        
        self.browser_type = browser_type
        self.headless = headless 
        self.browser_mode = browser_mode
@@ -450,13 +467,22 @@ class BrowserConfig:
        if self.browser_type in ["firefox", "webkit"]:
            self.channel = ""
            self.chrome_channel = ""
+        if proxy:
+            warnings.warn("The 'proxy' parameter is deprecated and will be removed in a future release. Use 'proxy_config' instead.", UserWarning)
        self.proxy = proxy
        self.proxy_config = proxy_config
        if isinstance(self.proxy_config, dict):
            self.proxy_config = ProxyConfig.from_dict(self.proxy_config)
        if isinstance(self.proxy_config, str):
            self.proxy_config = ProxyConfig.from_string(self.proxy_config)
-
+        
+        if self.proxy and self.proxy_config:
+            warnings.warn("Both 'proxy' and 'proxy_config' are provided. 'proxy_config' will take precedence.", UserWarning)
+            self.proxy = None
+        elif self.proxy:
+            # Convert proxy string to ProxyConfig if proxy_config is not provided
+            self.proxy_config = ProxyConfig.from_string(self.proxy)
+            self.proxy = None

        self.viewport_width = viewport_width
        self.viewport_height = viewport_height
--- a/crawl4ai/browser_manager.py
+++ b/crawl4ai/browser_manager.py
@@ -15,6 +15,7 @@ from .js_snippet import load_js_script
 from .config import DOWNLOAD_PAGE_TIMEOUT
 from .async_configs import BrowserConfig, CrawlerRunConfig
 from .utils import get_chromium_path
+import warnings


 BROWSER_DISABLE_OPTIONS = [
@@ -368,6 +369,9 @@ class ManagedBrowser:
            ]
            if self.headless:
                flags.append("--headless=new")
+            # Add viewport flag if specified in config
+            if self.browser_config.viewport_height and self.browser_config.viewport_width:
+                flags.append(f"--window-size={self.browser_config.viewport_width},{self.browser_config.viewport_height}")
            # merge common launch flags
            flags.extend(self.build_browser_flags(self.browser_config))
        elif self.browser_type == "firefox":
@@ -741,17 +745,18 @@ class BrowserManager:
            )
            os.makedirs(browser_args["downloads_path"], exist_ok=True)

-        if self.config.proxy or self.config.proxy_config:
+        if self.config.proxy:
+            warnings.warn(
+                "BrowserConfig.proxy is deprecated and ignored. Use proxy_config instead.",
+                DeprecationWarning,
+            )
+        if self.config.proxy_config:
            from playwright.async_api import ProxySettings

-            proxy_settings = (
-                ProxySettings(server=self.config.proxy)
-                if self.config.proxy
-                else ProxySettings(
-                    server=self.config.proxy_config.server,
-                    username=self.config.proxy_config.username,
-                    password=self.config.proxy_config.password,
-                )
+            proxy_settings = ProxySettings(
+                server=self.config.proxy_config.server,
+                username=self.config.proxy_config.username,
+                password=self.config.proxy_config.password,
            )
            browser_args["proxy"] = proxy_settings

--- a/crawl4ai/deep_crawling/bff_strategy.py
+++ b/crawl4ai/deep_crawling/bff_strategy.py
@@ -122,11 +122,6 @@ class BestFirstCrawlingStrategy(DeepCrawlStrategy):
                
            valid_links.append(base_url)
            
-        # If we have more valid links than capacity, limit them
-        if len(valid_links) > remaining_capacity:
-            valid_links = valid_links[:remaining_capacity]
-            self.logger.info(f"Limiting to {remaining_capacity} URLs due to max_pages limit")
-            
        # Record the new depths and add to next_links
        for url in valid_links:
            depths[url] = new_depth
@@ -146,7 +141,8 @@ class BestFirstCrawlingStrategy(DeepCrawlStrategy):
        """
        queue: asyncio.PriorityQueue = asyncio.PriorityQueue()
        # Push the initial URL with score 0 and depth 0.
-        await queue.put((0, 0, start_url, None))
+        initial_score = self.url_scorer.score(start_url) if self.url_scorer else 0
+        await queue.put((-initial_score, 0, start_url, None))
        visited: Set[str] = set()
        depths: Dict[str, int] = {start_url: 0}

@@ -193,7 +189,7 @@ class BestFirstCrawlingStrategy(DeepCrawlStrategy):
                result.metadata = result.metadata or {}
                result.metadata["depth"] = depth
                result.metadata["parent_url"] = parent_url
-                result.metadata["score"] = score
+                result.metadata["score"] = -score
                
                # Count only successful crawls toward max_pages limit
                if result.success:
@@ -214,7 +210,7 @@ class BestFirstCrawlingStrategy(DeepCrawlStrategy):
                    for new_url, new_parent in new_links:
                        new_depth = depths.get(new_url, depth + 1)
                        new_score = self.url_scorer.score(new_url) if self.url_scorer else 0
-                        await queue.put((new_score, new_depth, new_url, new_parent))
+                        await queue.put((-new_score, new_depth, new_url, new_parent))

        # End of crawl.

--- a/deploy/docker/c4ai-code-context.md
+++ b/deploy/docker/c4ai-code-context.md
@@ -7520,17 +7520,18 @@ class BrowserManager:
            )
            os.makedirs(browser_args["downloads_path"], exist_ok=True)

-        if self.config.proxy or self.config.proxy_config:
+        if self.config.proxy:
+            warnings.warn(
+                "BrowserConfig.proxy is deprecated and ignored. Use proxy_config instead.",
+                DeprecationWarning,
+            )
+        if self.config.proxy_config:
            from playwright.async_api import ProxySettings

-            proxy_settings = (
-                ProxySettings(server=self.config.proxy)
-                if self.config.proxy
-                else ProxySettings(
-                    server=self.config.proxy_config.server,
-                    username=self.config.proxy_config.username,
-                    password=self.config.proxy_config.password,
-                )
+            proxy_settings = ProxySettings(
+                server=self.config.proxy_config.server,
+                username=self.config.proxy_config.username,
+                password=self.config.proxy_config.password,
            )
            browser_args["proxy"] = proxy_settings

--- a/deploy/docker/server.py
+++ b/deploy/docker/server.py
@@ -482,9 +482,14 @@ async def crawl(
 ):
    """
    Crawl a list of URLs and return the results as JSON.
+    For streaming responses, use /crawl/stream endpoint.
    """
    if not crawl_request.urls:
        raise HTTPException(400, "At least one URL required")
+    # Check whether it is a redirection for a streaming request
+    crawler_config = CrawlerRunConfig.load(crawl_request.crawler_config)
+    if crawler_config.stream:
+        return await stream_process(crawl_request=crawl_request)
    results = await handle_crawl_request(
        urls=crawl_request.urls,
        browser_config=crawl_request.browser_config,
@@ -506,12 +511,16 @@ async def crawl_stream(
 ):
    if not crawl_request.urls:
        raise HTTPException(400, "At least one URL required")
+
+    return await stream_process(crawl_request=crawl_request)
+
+async def stream_process(crawl_request: CrawlRequest):
    crawler, gen = await handle_stream_crawl_request(
        urls=crawl_request.urls,
        browser_config=crawl_request.browser_config,
        crawler_config=crawl_request.crawler_config,
        config=config,
-    )
+)
    return StreamingResponse(
        stream_results(crawler, gen),
        media_type="application/x-ndjson",
--- a/deploy/docker/static/playground/index.html
+++ b/deploy/docker/static/playground/index.html
@@ -371,7 +371,7 @@

                <div class="flex items-center">
                    <input id="st-stream" type="checkbox" class="mr-2">
-                    <label for="st-stream" class="text-sm">Use /crawl/stream</label>
+                    <label for="st-stream" class="text-sm">Enable streaming mode</label>
                    <button id="st-run"
                        class="ml-auto bg-accent text-dark px-4 py-2 rounded hover:bg-opacity-90 font-medium">
                        Run Stress Test
@@ -596,6 +596,14 @@
            forceHighlightElement(curlCodeEl);
        }

+        // Detect if stream is requested inside payload
+        function shouldUseStream(payload) {
+            const toBool = (v) => v === true || (typeof v === 'string' && v.toLowerCase() === 'true');
+            const fromCrawler = payload && payload.crawler_config && payload.crawler_config.params && payload.crawler_config.params.stream;
+            const direct = payload && payload.stream;
+            return toBool(fromCrawler) || toBool(direct);
+        }
+
        // Main run function
        async function runCrawl() {
            const endpoint = document.getElementById('endpoint').value;
@@ -611,16 +619,24 @@
                        : { browser_config: cfgJson };
                }
            } catch (err) {
-                updateStatus('error');
-                document.querySelector('#response-content code').textContent =
-                    JSON.stringify({ error: err.message }, null, 2);
-                forceHighlightElement(document.querySelector('#response-content code'));
-                return; // stop run
+                const codeText = cm.getValue();
+                const streamFlag = /stream\s*=\s*True/i.test(codeText);
+                const isCrawlEndpoint = document.getElementById('endpoint').value === 'crawl';
+                if (isCrawlEndpoint && streamFlag) {
+                    // Fallback: proceed with minimal config only for stream
+                    advConfig = { crawler_config: { stream: true } };
+                } else {
+                    updateStatus('error');
+                    document.querySelector('#response-content code').textContent =
+                        JSON.stringify({ error: err.message }, null, 2);
+                    forceHighlightElement(document.querySelector('#response-content code'));
+                    return; // stop run
+                }
            }

            const endpointMap = {
                crawl: '/crawl',
-                // crawl_stream: '/crawl/stream',
+                crawl_stream: '/crawl/stream', // Keep for backward compatibility
                md: '/md',
                llm: '/llm'
            };
@@ -647,7 +663,7 @@
                // This will be handled directly in the fetch below
                payload = null;
            } else {
-                // Default payload for /crawl and /crawl/stream
+                // Default payload for /crawl (supports both streaming and batch modes)
                payload = {
                    urls,
                    ...advConfig
@@ -659,6 +675,7 @@
            try {
                const startTime = performance.now();
                let response, responseData;
+                const useStreamOverride = (endpoint === 'crawl') && shouldUseStream(payload);

                if (endpoint === 'llm') {
                    // Special handling for LLM endpoint which uses URL pattern: /llm/{encoded_url}?q={query}
@@ -681,8 +698,8 @@
                    document.querySelector('#response-content code').textContent = JSON.stringify(responseData, null, 2);
                    document.querySelector('#response-content code').className = 'json hljs';
                    forceHighlightElement(document.querySelector('#response-content code'));
-                } else if (endpoint === 'crawl_stream') {
-                    // Stream processing
+                } else if (endpoint === 'crawl_stream' || useStreamOverride) {
+                    // Stream processing - now handled directly by /crawl endpoint
                    response = await fetch(api, {
                        method: 'POST',
                        headers: { 'Content-Type': 'application/json' },
@@ -757,6 +774,7 @@
                    const question = document.getElementById('llm-question').value.trim() || "What is this page about?";
                    generateSnippets(`${api}/${encodedUrl}?q=${encodeURIComponent(question)}`, null, 'GET');
                } else {
+                    // Use the same API endpoint for both streaming and non-streaming
                    generateSnippets(api, payload);
                }
            } catch (error) {
@@ -786,7 +804,7 @@
            document.getElementById('stress-avg-time').textContent = '0';
            document.getElementById('stress-peak-mem').textContent = '0';

-            const api = useStream ? '/crawl/stream' : '/crawl';
+            const api = '/crawl'; // Always use /crawl - backend handles streaming internally
            const urls = Array.from({ length: total }, (_, i) => `https://httpbin.org/anything/stress-${i}-${Date.now()}`);
            const chunks = [];

--- a/docs/md_v2/advanced/proxy-security.md
+++ b/docs/md_v2/advanced/proxy-security.md
@@ -7,13 +7,13 @@ Simple proxy configuration with `BrowserConfig`:
 ```python
 from crawl4ai.async_configs import BrowserConfig

-# Using proxy URL
-browser_config = BrowserConfig(proxy="http://proxy.example.com:8080")
+# Using HTTP proxy
+browser_config = BrowserConfig(proxy_config={"server": "http://proxy.example.com:8080"})
 async with AsyncWebCrawler(config=browser_config) as crawler:
    result = await crawler.arun(url="https://example.com")

 # Using SOCKS proxy
-browser_config = BrowserConfig(proxy="socks5://proxy.example.com:1080")
+browser_config = BrowserConfig(proxy_config={"server": "socks5://proxy.example.com:1080"})
 async with AsyncWebCrawler(config=browser_config) as crawler:
    result = await crawler.arun(url="https://example.com")
 ```
@@ -25,7 +25,11 @@ Use an authenticated proxy with `BrowserConfig`:
 ```python
 from crawl4ai.async_configs import BrowserConfig

-browser_config = BrowserConfig(proxy="http://[username]:[password]@[host]:[port]")
+browser_config = BrowserConfig(proxy_config={
+    "server": "http://[host]:[port]",
+    "username": "[username]",
+    "password": "[password]",
+})
 async with AsyncWebCrawler(config=browser_config) as crawler:
    result = await crawler.arun(url="https://example.com")
 ```
--- a/docs/md_v2/api/parameters.md
+++ b/docs/md_v2/api/parameters.md
@@ -23,7 +23,7 @@ browser_cfg = BrowserConfig(
 | **`headless`**        | `bool` (default: `True`)               | Headless means no visible UI. `False` is handy for debugging.                                                                         |
 | **`viewport_width`**  | `int` (default: `1080`)                | Initial page width (in px). Useful for testing responsive layouts.                                                                    |
 | **`viewport_height`** | `int` (default: `600`)                 | Initial page height (in px).                                                                                                          |
-| **`proxy`**           | `str` (default: `None`)                | Single-proxy URL if you want all traffic to go through it, e.g. `"http://user:pass@proxy:8080"`.                                      |
+| **`proxy`**           | `str` (deprecated)                      | Deprecated. Use `proxy_config` instead. If set, it will be auto-converted internally. |
 | **`proxy_config`**    | `dict` (default: `None`)               | For advanced or multi-proxy needs, specify details like `{"server": "...", "username": "...", ...}`.                                  |
 | **`use_persistent_context`** | `bool` (default: `False`)       | If `True`, uses a **persistent** browser context (keep cookies, sessions across runs). Also sets `use_managed_browser=True`.          |
 | **`user_data_dir`**   | `str or None` (default: `None`)        | Directory to store user data (profiles, cookies). Must be set if you want permanent sessions.                                         |
--- a/tests/async/test_0.4.2_browser_manager.py
+++ b/tests/async/test_0.4.2_browser_manager.py
@@ -112,7 +112,7 @@ async def test_proxy_settings():
        headless=True,
        verbose=False,
        user_agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36",
-        proxy="http://127.0.0.1:8080",  # Assuming local proxy server for test
+        proxy_config={"server": "http://127.0.0.1:8080"},  # Assuming local proxy server for test
        use_managed_browser=False,
        use_persistent_context=False,
    ) as crawler:
--- a/tests/docker/test_server_requests.py
+++ b/tests/docker/test_server_requests.py
@@ -143,7 +143,40 @@ class TestCrawlEndpoints:
        assert "<h1>Herman Melville - Moby-Dick</h1>" in result["html"]
        # We don't specify a markdown generator in this test, so don't make assumptions about markdown field
        # It might be null, missing, or populated depending on the server's default behavior
+    async def test_crawl_with_stream_direct(self, async_client: httpx.AsyncClient):
+        """Test that /crawl endpoint handles stream=True directly without redirect."""
+        payload = {
+            "urls": [SIMPLE_HTML_URL],
+            "browser_config": {
+                "type": "BrowserConfig",
+                "params": {
+                    "headless": True,
+                }
+            },
+            "crawler_config": {
+                "type": "CrawlerRunConfig", 
+                "params": {
+                    "stream": True,  # Set stream to True for direct streaming
+                    "screenshot": False,
+                    "cache_mode": CacheMode.BYPASS.value
+                }
+            }
+        }

+        # Send a request to the /crawl endpoint - should handle streaming directly
+        async with async_client.stream("POST", "/crawl", json=payload) as response:
+            assert response.status_code == 200
+            assert response.headers["content-type"] == "application/x-ndjson"
+            assert response.headers.get("x-stream-status") == "active"
+
+            results = await process_streaming_response(response)
+
+            assert len(results) == 1
+            result = results[0]
+            await assert_crawl_result_structure(result)
+            assert result["success"] is True
+            assert result["url"] == SIMPLE_HTML_URL
+            assert "<h1>Herman Melville - Moby-Dick</h1>" in result["html"]
    async def test_simple_crawl_single_url_streaming(self, async_client: httpx.AsyncClient):
        """Test /crawl/stream with a single URL and simple config values."""
        payload = {
--- a/tests/general/test_bff_scoring.py
+++ b/tests/general/test_bff_scoring.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python3
+"""
+Simple test to verify BestFirstCrawlingStrategy fixes.
+This test crawls a real website and shows that:
+1. Higher-scoring pages are crawled first (priority queue fix)
+2. Links are scored before truncation (link discovery fix)
+"""
+
+import asyncio
+from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
+from crawl4ai.deep_crawling import BestFirstCrawlingStrategy
+from crawl4ai.deep_crawling.scorers import KeywordRelevanceScorer
+
+async def test_best_first_strategy():
+    """Test BestFirstCrawlingStrategy with keyword scoring"""
+    
+    print("=" * 70)
+    print("Testing BestFirstCrawlingStrategy with Real URL")
+    print("=" * 70)
+    print("\nThis test will:")
+    print("1. Crawl Python.org documentation")
+    print("2. Score pages based on keywords: 'tutorial', 'guide', 'reference'")
+    print("3. Show that higher-scoring pages are crawled first")
+    print("-" * 70)
+    
+    # Create a keyword scorer that prioritizes tutorial/guide pages
+    scorer = KeywordRelevanceScorer(
+        keywords=["tutorial", "guide", "reference", "documentation"],
+        weight=1.0,
+        case_sensitive=False
+    )
+    
+    # Create the strategy with scoring
+    strategy = BestFirstCrawlingStrategy(
+        max_depth=2,          # Crawl 2 levels deep
+        max_pages=10,         # Limit to 10 pages total
+        url_scorer=scorer,    # Use keyword scoring
+        include_external=False  # Only internal links
+    )
+    
+    # Configure browser and crawler
+    browser_config = BrowserConfig(
+        headless=True,    # Run in background
+        verbose=False     # Reduce output noise
+    )
+    
+    crawler_config = CrawlerRunConfig(
+        deep_crawl_strategy=strategy,
+        verbose=False
+    )
+    
+    print("\nStarting crawl of https://docs.python.org/3/")
+    print("Looking for pages with keywords: tutorial, guide, reference, documentation")
+    print("-" * 70)
+    
+    crawled_urls = []
+    
+    async with AsyncWebCrawler(config=browser_config) as crawler:
+        # Crawl and collect results
+        results = await crawler.arun(
+            url="https://docs.python.org/3/",
+            config=crawler_config
+        )
+        
+        # Process results
+        if isinstance(results, list):
+            for result in results:
+                score = result.metadata.get('score', 0) if result.metadata else 0
+                depth = result.metadata.get('depth', 0) if result.metadata else 0
+                crawled_urls.append({
+                    'url': result.url,
+                    'score': score,
+                    'depth': depth,
+                    'success': result.success
+                })
+    
+    print("\n" + "=" * 70)
+    print("CRAWL RESULTS (in order of crawling)")
+    print("=" * 70)
+    
+    for i, item in enumerate(crawled_urls, 1):
+        status = "✓" if item['success'] else "✗"
+        # Highlight high-scoring pages
+        if item['score'] > 0.5:
+            print(f"{i:2}. [{status}] Score: {item['score']:.2f} | Depth: {item['depth']} | {item['url']}")
+            print(f"     ^ HIGH SCORE - Contains keywords!")
+        else:
+            print(f"{i:2}. [{status}] Score: {item['score']:.2f} | Depth: {item['depth']} | {item['url']}")
+    
+    print("\n" + "=" * 70)
+    print("ANALYSIS")
+    print("=" * 70)
+    
+    # Check if higher scores appear early in the crawl
+    scores = [item['score'] for item in crawled_urls[1:]]  # Skip initial URL
+    high_score_indices = [i for i, s in enumerate(scores) if s > 0.3]
+    
+    if high_score_indices and high_score_indices[0] < len(scores) / 2:
+        print("✅ SUCCESS: Higher-scoring pages (with keywords) were crawled early!")
+        print("   This confirms the priority queue fix is working.")
+    else:
+        print("⚠️  Check the crawl order above - higher scores should appear early")
+    
+    # Show score distribution
+    print(f"\nScore Statistics:")
+    print(f"  - Total pages crawled: {len(crawled_urls)}")
+    print(f"  - Average score: {sum(item['score'] for item in crawled_urls) / len(crawled_urls):.2f}")
+    print(f"  - Max score: {max(item['score'] for item in crawled_urls):.2f}")
+    print(f"  - Pages with keywords: {sum(1 for item in crawled_urls if item['score'] > 0.3)}")
+    
+    print("\n" + "=" * 70)
+    print("TEST COMPLETE")
+    print("=" * 70)
+
+if __name__ == "__main__":
+    print("\n🔍 BestFirstCrawlingStrategy Simple Test\n")
+    asyncio.run(test_best_first_strategy())
--- a/tests/memory/test_docker_config_gen.py
+++ b/tests/memory/test_docker_config_gen.py
@@ -24,7 +24,7 @@ CASES = [
    # --- BrowserConfig variants ---
    "BrowserConfig()",
    "BrowserConfig(headless=False, extra_args=['--disable-gpu'])",
-    "BrowserConfig(browser_mode='builtin', proxy='http://1.2.3.4:8080')",
+    "BrowserConfig(browser_mode='builtin', proxy_config={'server': 'http://1.2.3.4:8080'})",
 ]

 for code in CASES:
--- a/tests/proxy/test_proxy_deprecation.py
+++ b/tests/proxy/test_proxy_deprecation.py
@@ -0,0 +1,42 @@
+import warnings
+
+import pytest
+
+from crawl4ai.async_configs import BrowserConfig, ProxyConfig
+
+
+def test_browser_config_proxy_string_emits_deprecation_and_autoconverts():
+    warnings.simplefilter("always", DeprecationWarning)
+
+    proxy_str = "23.95.150.145:6114:username:password"
+    with warnings.catch_warnings(record=True) as caught:
+        cfg = BrowserConfig(proxy=proxy_str, headless=True)
+
+    dep_warnings = [w for w in caught if issubclass(w.category, DeprecationWarning)]
+    assert dep_warnings, "Expected DeprecationWarning when using BrowserConfig(proxy=...)"
+
+    assert cfg.proxy is None, "cfg.proxy should be None after auto-conversion"
+    assert isinstance(cfg.proxy_config, ProxyConfig), "cfg.proxy_config should be ProxyConfig instance"
+    assert cfg.proxy_config.username == "username"
+    assert cfg.proxy_config.password == "password"
+    assert cfg.proxy_config.server.startswith("http://")
+    assert cfg.proxy_config.server.endswith(":6114")
+
+
+def test_browser_config_with_proxy_config_emits_no_deprecation():
+    warnings.simplefilter("always", DeprecationWarning)
+
+    with warnings.catch_warnings(record=True) as caught:
+        cfg = BrowserConfig(
+            headless=True,
+            proxy_config={
+                "server": "http://127.0.0.1:8080",
+                "username": "u",
+                "password": "p",
+            },
+        )
+
+    dep_warnings = [w for w in caught if issubclass(w.category, DeprecationWarning)]
+    assert not dep_warnings, "Did not expect DeprecationWarning when using proxy_config"
+    assert cfg.proxy is None
+    assert isinstance(cfg.proxy_config, ProxyConfig)
Author	SHA1	Message	Date
AHMET YILMAZ	e3467c08f6	#1490 feat(ManagedBrowser): add viewport size configuration for browser launch	2025-09-17 17:40:38 +08:00
Nasrin	3899ac3d3b	Merge pull request #1464 from unclecode/fix/proxy_deprecation Fix/proxy deprecation	2025-09-16 15:48:45 +08:00
Nasrin	23431d8109	Merge pull request #1389 from unclecode/fix/deep-crawl-scoring fix(deep-crawl): BestFirst priority inversion	2025-09-16 15:45:54 +08:00
AHMET YILMAZ	1717827732	refactor(BrowserConfig): change deprecation warning for 'proxy' parameter to UserWarning	2025-09-12 11:10:38 +08:00
Nasrin	f8eaf01ed1	Merge pull request #1467 from unclecode/fix/request-crawl-stream Fix: request /crawl with stream: true issue	2025-09-11 17:40:43 +08:00
Nasrin	14b42b1f9a	Merge pull request #1471 from unclecode/fix/adaptive-crawler-llm-config Fix: allow custom LLM providers for adaptive crawler embedding config…	2025-09-09 12:56:33 +08:00
AHMET YILMAZ	1874a7b8d2	fix: update option labels in request builder for clarity	2025-09-05 17:06:25 +08:00
AHMET YILMAZ	6a3b3e9d38	Commit without API	2025-09-03 17:02:40 +08:00
AHMET YILMAZ	4ed33fce9e	Remove deprecated test for 'proxy' parameter in BrowserConfig and update .gitignore to include test_scripts directory.	2025-08-28 17:26:10 +08:00
AHMET YILMAZ	f7a3366f72	#1375 : refactor(proxy) Deprecate 'proxy' parameter in BrowserConfig and enhance proxy string parsing - Updated ProxyConfig.from_string to support multiple proxy formats, including URLs with credentials. - Deprecated the 'proxy' parameter in BrowserConfig, replacing it with 'proxy_config' for better flexibility. - Added warnings for deprecated usage and clarified behavior when both parameters are provided. - Updated documentation and tests to reflect changes in proxy configuration handling.	2025-08-28 17:21:49 +08:00
ntohidi	88a9fbbb7e	fix(deep-crawl): BestFirst priority inversion; remove pre-scoring truncation. ref #1253 Use negative scores in PQ to visit high-score URLs first and drop link cap prior to scoring; add test for ordering.	2025-08-11 18:16:57 +08:00