#1268 fix: update redirected_url to current page URL and enhance normalize_url function

2025-09-08 19:09:33 +08:00
21 changed files with 135 additions and 705 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -265,7 +265,7 @@ CLAUDE.md
 tests/**/test_site
 tests/**/reports
 tests/**/benchmark_reports
-test_scripts/
+
 docs/**/data
 .codecat/

--- a/crawl4ai/adaptive_crawler.py
+++ b/crawl4ai/adaptive_crawler.py
@@ -19,7 +19,7 @@ import re
 from pathlib import Path

 from crawl4ai.async_webcrawler import AsyncWebCrawler
-from crawl4ai.async_configs import CrawlerRunConfig, LinkPreviewConfig, LLMConfig
+from crawl4ai.async_configs import CrawlerRunConfig, LinkPreviewConfig
 from crawl4ai.models import Link, CrawlResult
 import numpy as np

@@ -178,7 +178,7 @@ class AdaptiveConfig:
    
    # Embedding strategy parameters
    embedding_model: str = "sentence-transformers/all-MiniLM-L6-v2"
-    embedding_llm_config: Optional[Union[LLMConfig, Dict]] = None  # Separate config for embeddings
+    embedding_llm_config: Optional[Dict] = None  # Separate config for embeddings
    n_query_variations: int = 10
    coverage_threshold: float = 0.85
    alpha_shape_alpha: float = 0.5
@@ -250,30 +250,6 @@ class AdaptiveConfig:
        assert 0 <= self.embedding_quality_max_confidence <= 1, "embedding_quality_max_confidence must be between 0 and 1"
        assert self.embedding_quality_scale_factor > 0, "embedding_quality_scale_factor must be positive"
        assert 0 <= self.embedding_min_confidence_threshold <= 1, "embedding_min_confidence_threshold must be between 0 and 1"
-    
-    @property
-    def _embedding_llm_config_dict(self) -> Optional[Dict]:
-        """Convert LLMConfig to dict format for backward compatibility."""
-        if self.embedding_llm_config is None:
-            return None
-        
-        if isinstance(self.embedding_llm_config, dict):
-            # Already a dict - return as-is for backward compatibility
-            return self.embedding_llm_config
-        
-        # Convert LLMConfig object to dict format
-        return {
-            'provider': self.embedding_llm_config.provider,
-            'api_token': self.embedding_llm_config.api_token,
-            'base_url': getattr(self.embedding_llm_config, 'base_url', None),
-            'temperature': getattr(self.embedding_llm_config, 'temperature', None),
-            'max_tokens': getattr(self.embedding_llm_config, 'max_tokens', None),
-            'top_p': getattr(self.embedding_llm_config, 'top_p', None),
-            'frequency_penalty': getattr(self.embedding_llm_config, 'frequency_penalty', None),
-            'presence_penalty': getattr(self.embedding_llm_config, 'presence_penalty', None),
-            'stop': getattr(self.embedding_llm_config, 'stop', None),
-            'n': getattr(self.embedding_llm_config, 'n', None),
-        }


 class CrawlStrategy(ABC):
@@ -617,7 +593,7 @@ class StatisticalStrategy(CrawlStrategy):
 class EmbeddingStrategy(CrawlStrategy):
    """Embedding-based adaptive crawling using semantic space coverage"""
    
-    def __init__(self, embedding_model: str = None, llm_config: Union[LLMConfig, Dict] = None):
+    def __init__(self, embedding_model: str = None, llm_config: Dict = None):
        self.embedding_model = embedding_model or "sentence-transformers/all-MiniLM-L6-v2"
        self.llm_config = llm_config
        self._embedding_cache = {}
@@ -629,24 +605,14 @@ class EmbeddingStrategy(CrawlStrategy):
        self._kb_embeddings_hash = None  # Track KB changes
        self._validation_embeddings_cache = None  # Cache validation query embeddings
        self._kb_similarity_threshold = 0.95  # Threshold for deduplication
-    
-    def _get_embedding_llm_config_dict(self) -> Dict:
-        """Get embedding LLM config as dict with fallback to default."""
-        if hasattr(self, 'config') and self.config:
-            config_dict = self.config._embedding_llm_config_dict
-            if config_dict:
-                return config_dict
-        
-        # Fallback to default if no config provided
-        return {
-            'provider': 'openai/text-embedding-3-small',
-            'api_token': os.getenv('OPENAI_API_KEY')
-        }
        
    async def _get_embeddings(self, texts: List[str]) -> Any:
        """Get embeddings using configured method"""
        from .utils import get_text_embeddings
-        embedding_llm_config = self._get_embedding_llm_config_dict()
+        embedding_llm_config = {
+            'provider': 'openai/text-embedding-3-small',
+            'api_token': os.getenv('OPENAI_API_KEY')
+        }
        return await get_text_embeddings(
            texts, 
            embedding_llm_config,
@@ -713,20 +679,8 @@ class EmbeddingStrategy(CrawlStrategy):
        Return as a JSON array of strings."""
        
        # Use the LLM for query generation
-        # Convert LLMConfig to dict if needed
-        llm_config_dict = None
-        if self.llm_config:
-            if isinstance(self.llm_config, dict):
-                llm_config_dict = self.llm_config
-            else:
-                # Convert LLMConfig object to dict
-                llm_config_dict = {
-                    'provider': self.llm_config.provider,
-                    'api_token': self.llm_config.api_token
-                }
-        
-        provider = llm_config_dict.get('provider', 'openai/gpt-4o-mini') if llm_config_dict else 'openai/gpt-4o-mini'
-        api_token = llm_config_dict.get('api_token') if llm_config_dict else None
+        provider = self.llm_config.get('provider', 'openai/gpt-4o-mini') if self.llm_config else 'openai/gpt-4o-mini'
+        api_token = self.llm_config.get('api_token') if self.llm_config else None
        
        # response = perform_completion_with_backoff(
        #     provider=provider,
@@ -889,7 +843,10 @@ class EmbeddingStrategy(CrawlStrategy):
        
        # Batch embed only uncached links
        if texts_to_embed:
-            embedding_llm_config = self._get_embedding_llm_config_dict()
+            embedding_llm_config = {
+                'provider': 'openai/text-embedding-3-small',
+                'api_token': os.getenv('OPENAI_API_KEY')
+            }
            new_embeddings = await get_text_embeddings(texts_to_embed, embedding_llm_config, self.embedding_model)

            # Cache the new embeddings
@@ -1227,7 +1184,10 @@ class EmbeddingStrategy(CrawlStrategy):
            return
            
        # Get embeddings for new texts
-        embedding_llm_config = self._get_embedding_llm_config_dict()      
+        embedding_llm_config = {
+            'provider': 'openai/text-embedding-3-small',
+            'api_token': os.getenv('OPENAI_API_KEY')
+        }        
        new_embeddings = await get_text_embeddings(new_texts, embedding_llm_config, self.embedding_model)

        # Deduplicate embeddings before adding to KB
@@ -1296,12 +1256,10 @@ class AdaptiveCrawler:
        if strategy_name == "statistical":
            return StatisticalStrategy()
        elif strategy_name == "embedding":
-            strategy = EmbeddingStrategy(
+            return EmbeddingStrategy(
                embedding_model=self.config.embedding_model,
                llm_config=self.config.embedding_llm_config
            )
-            strategy.config = self.config  # Pass config to strategy
-            return strategy
        else:
            raise ValueError(f"Unknown strategy: {strategy_name}")
    
--- a/crawl4ai/async_configs.py
+++ b/crawl4ai/async_configs.py
@@ -1,6 +1,5 @@
 import os
 from typing import Union
-import warnings
 from .config import (
    DEFAULT_PROVIDER,
    DEFAULT_PROVIDER_API_KEY,
@@ -258,39 +257,24 @@ class ProxyConfig:
    
    @staticmethod
    def from_string(proxy_str: str) -> "ProxyConfig":
-        """Create a ProxyConfig from a string.
-
-        Supported formats:
-        - 'http://username:password@ip:port'
-        - 'http://ip:port'
-        - 'socks5://ip:port'
-        - 'ip:port:username:password'
-        - 'ip:port'
-        """
-        s = (proxy_str or "").strip()
-        # URL with credentials
-        if "@" in s and "://" in s:
-            auth_part, server_part = s.split("@", 1)
-            protocol, credentials = auth_part.split("://", 1)
-            if ":" in credentials:
-                username, password = credentials.split(":", 1)
-                return ProxyConfig(
-                    server=f"{protocol}://{server_part}",
-                    username=username,
-                    password=password,
-                )
-        # URL without credentials (keep scheme)
-        if "://" in s and "@" not in s:
-            return ProxyConfig(server=s)
-        # Colon separated forms
-        parts = s.split(":")
-        if len(parts) == 4:
+        """Create a ProxyConfig from a string in the format 'ip:port:username:password'."""
+        parts = proxy_str.split(":")
+        if len(parts) == 4:  # ip:port:username:password
            ip, port, username, password = parts
-            return ProxyConfig(server=f"http://{ip}:{port}", username=username, password=password)
-        if len(parts) == 2:
+            return ProxyConfig(
+                server=f"http://{ip}:{port}",
+                username=username,
+                password=password,
+                ip=ip
+            )
+        elif len(parts) == 2:  # ip:port only
            ip, port = parts
-            return ProxyConfig(server=f"http://{ip}:{port}")
-        raise ValueError(f"Invalid proxy string format: {proxy_str}")
+            return ProxyConfig(
+                server=f"http://{ip}:{port}",
+                ip=ip
+            )
+        else:
+            raise ValueError(f"Invalid proxy string format: {proxy_str}")
    
    @staticmethod
    def from_dict(proxy_dict: Dict) -> "ProxyConfig":
@@ -454,7 +438,6 @@ class BrowserConfig:
        host: str = "localhost",
        enable_stealth: bool = False,
    ):
-        
        self.browser_type = browser_type
        self.headless = headless 
        self.browser_mode = browser_mode
@@ -467,22 +450,13 @@ class BrowserConfig:
        if self.browser_type in ["firefox", "webkit"]:
            self.channel = ""
            self.chrome_channel = ""
-        if proxy:
-            warnings.warn("The 'proxy' parameter is deprecated and will be removed in a future release. Use 'proxy_config' instead.", UserWarning)
        self.proxy = proxy
        self.proxy_config = proxy_config
        if isinstance(self.proxy_config, dict):
            self.proxy_config = ProxyConfig.from_dict(self.proxy_config)
        if isinstance(self.proxy_config, str):
            self.proxy_config = ProxyConfig.from_string(self.proxy_config)
-        
-        if self.proxy and self.proxy_config:
-            warnings.warn("Both 'proxy' and 'proxy_config' are provided. 'proxy_config' will take precedence.", UserWarning)
-            self.proxy = None
-        elif self.proxy:
-            # Convert proxy string to ProxyConfig if proxy_config is not provided
-            self.proxy_config = ProxyConfig.from_string(self.proxy)
-            self.proxy = None
+

        self.viewport_width = viewport_width
        self.viewport_height = viewport_height
--- a/crawl4ai/async_crawler_strategy.py
+++ b/crawl4ai/async_crawler_strategy.py
@@ -1037,7 +1037,7 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
                downloaded_files=(
                    self._downloaded_files if self._downloaded_files else None
                ),
-                redirected_url=redirected_url,
+                redirected_url=page.url,  # Update to current URL in case of JavaScript navigation
                # Include captured data if enabled
                network_requests=captured_requests if config.capture_network_requests else None,
                console_messages=captured_console if config.capture_console_messages else None,
--- a/crawl4ai/async_webcrawler.py
+++ b/crawl4ai/async_webcrawler.py
@@ -480,7 +480,7 @@ class AsyncWebCrawler:
            # Scraping Strategy Execution  #
            ################################
            result: ScrapingResult = scraping_strategy.scrap(
-                url, html, **params)
+                kwargs.get("redirected_url", url), html, **params)

            if result is None:
                raise ValueError(
--- a/crawl4ai/browser_manager.py
+++ b/crawl4ai/browser_manager.py
@@ -15,7 +15,6 @@ from .js_snippet import load_js_script
 from .config import DOWNLOAD_PAGE_TIMEOUT
 from .async_configs import BrowserConfig, CrawlerRunConfig
 from .utils import get_chromium_path
-import warnings


 BROWSER_DISABLE_OPTIONS = [
@@ -369,9 +368,6 @@ class ManagedBrowser:
            ]
            if self.headless:
                flags.append("--headless=new")
-            # Add viewport flag if specified in config
-            if self.browser_config.viewport_height and self.browser_config.viewport_width:
-                flags.append(f"--window-size={self.browser_config.viewport_width},{self.browser_config.viewport_height}")
            # merge common launch flags
            flags.extend(self.build_browser_flags(self.browser_config))
        elif self.browser_type == "firefox":
@@ -745,18 +741,17 @@ class BrowserManager:
            )
            os.makedirs(browser_args["downloads_path"], exist_ok=True)

-        if self.config.proxy:
-            warnings.warn(
-                "BrowserConfig.proxy is deprecated and ignored. Use proxy_config instead.",
-                DeprecationWarning,
-            )
-        if self.config.proxy_config:
+        if self.config.proxy or self.config.proxy_config:
            from playwright.async_api import ProxySettings

-            proxy_settings = ProxySettings(
-                server=self.config.proxy_config.server,
-                username=self.config.proxy_config.username,
-                password=self.config.proxy_config.password,
+            proxy_settings = (
+                ProxySettings(server=self.config.proxy)
+                if self.config.proxy
+                else ProxySettings(
+                    server=self.config.proxy_config.server,
+                    username=self.config.proxy_config.username,
+                    password=self.config.proxy_config.password,
+                )
            )
            browser_args["proxy"] = proxy_settings

--- a/crawl4ai/deep_crawling/bff_strategy.py
+++ b/crawl4ai/deep_crawling/bff_strategy.py
@@ -122,6 +122,11 @@ class BestFirstCrawlingStrategy(DeepCrawlStrategy):
                
            valid_links.append(base_url)
            
+        # If we have more valid links than capacity, limit them
+        if len(valid_links) > remaining_capacity:
+            valid_links = valid_links[:remaining_capacity]
+            self.logger.info(f"Limiting to {remaining_capacity} URLs due to max_pages limit")
+            
        # Record the new depths and add to next_links
        for url in valid_links:
            depths[url] = new_depth
@@ -141,8 +146,7 @@ class BestFirstCrawlingStrategy(DeepCrawlStrategy):
        """
        queue: asyncio.PriorityQueue = asyncio.PriorityQueue()
        # Push the initial URL with score 0 and depth 0.
-        initial_score = self.url_scorer.score(start_url) if self.url_scorer else 0
-        await queue.put((-initial_score, 0, start_url, None))
+        await queue.put((0, 0, start_url, None))
        visited: Set[str] = set()
        depths: Dict[str, int] = {start_url: 0}

@@ -189,7 +193,7 @@ class BestFirstCrawlingStrategy(DeepCrawlStrategy):
                result.metadata = result.metadata or {}
                result.metadata["depth"] = depth
                result.metadata["parent_url"] = parent_url
-                result.metadata["score"] = -score
+                result.metadata["score"] = score
                
                # Count only successful crawls toward max_pages limit
                if result.success:
@@ -210,7 +214,7 @@ class BestFirstCrawlingStrategy(DeepCrawlStrategy):
                    for new_url, new_parent in new_links:
                        new_depth = depths.get(new_url, depth + 1)
                        new_score = self.url_scorer.score(new_url) if self.url_scorer else 0
-                        await queue.put((-new_score, new_depth, new_url, new_parent))
+                        await queue.put((new_score, new_depth, new_url, new_parent))

        # End of crawl.

--- a/crawl4ai/utils.py
+++ b/crawl4ai/utils.py
@@ -2149,8 +2149,10 @@ def normalize_url(
    *,
    drop_query_tracking=True,
    sort_query=True,
-    keep_fragment=False,
+    keep_fragment=True,
+    remove_fragments=None,  # alias for keep_fragment=False
    extra_drop_params=None,
+    params_to_remove=None,  # alias for extra_drop_params
    preserve_https=False,
    original_scheme=None
 ):
@@ -2175,10 +2177,20 @@ def normalize_url(
    Returns
    -------
    str | None
-        A clean, canonical URL or None if href is empty/None.
+        A clean, canonical URL or the base URL if href is empty/None.
    """
    if not href:
-        return None
+        # For empty href, return the base URL (matching urljoin behavior)
+        return base_url
+
+    # Validate base URL format
+    parsed_base = urlparse(base_url)
+    if not parsed_base.scheme or not parsed_base.netloc:
+        raise ValueError(f"Invalid base URL format: {base_url}")
+    
+    if parsed_base.scheme.lower() not in ["http", "https"]:
+        # Handle special protocols
+        raise ValueError(f"Invalid base URL format: {base_url}")

    # Resolve relative paths first
    full_url = urljoin(base_url, href.strip())
@@ -2199,6 +2211,12 @@ def normalize_url(

    # ── netloc ──
    netloc = parsed.netloc.lower()
+    
+    # Remove default ports (80 for http, 443 for https)
+    if ':' in netloc:
+        host, port = netloc.rsplit(':', 1)
+        if (parsed.scheme == 'http' and port == '80') or (parsed.scheme == 'https' and port == '443'):
+            netloc = host

    # ── path ──
    # Strip duplicate slashes and trailing "/" (except root)
@@ -2206,7 +2224,17 @@ def normalize_url(
    # The path from urlparse is already properly encoded
    path = parsed.path
    if path.endswith('/') and path != '/':
-        path = path.rstrip('/')
+        # Only strip trailing slash if the original href didn't have a trailing slash
+        # and the base_url didn't end with a slash
+        base_parsed = urlparse(base_url)
+        if not href.strip().endswith('/') and not base_parsed.path.endswith('/'):
+            path = path.rstrip('/')
+    # Add trailing slash for URLs without explicit paths (indicates directory)
+    # But skip this for special protocols that don't use standard URL structure
+    elif not path:
+        special_protocols = {"javascript:", "mailto:", "tel:", "file:", "data:"}
+        if not any(href.strip().lower().startswith(p) for p in special_protocols):
+            path = '/'

    # ── query ──
    query = parsed.query
@@ -2221,6 +2249,8 @@ def normalize_url(
            }
            if extra_drop_params:
                default_tracking |= {p.lower() for p in extra_drop_params}
+            if params_to_remove:
+                default_tracking |= {p.lower() for p in params_to_remove}
            params = [(k, v) for k, v in params if k not in default_tracking]

        if sort_query:
@@ -2229,7 +2259,10 @@ def normalize_url(
        query = urlencode(params, doseq=True) if params else ''

    # ── fragment ──
-    fragment = parsed.fragment if keep_fragment else ''
+    if remove_fragments is True:
+        fragment = ''
+    else:
+        fragment = parsed.fragment if keep_fragment else ''

    # Re-assemble
    normalized = urlunparse((
@@ -2453,9 +2486,19 @@ def is_external_url(url: str, base_domain: str) -> bool:
        if not parsed.netloc:  # Relative URL
            return False

-        # Strip 'www.' from both domains for comparison
-        url_domain = parsed.netloc.lower().replace("www.", "")
-        base = base_domain.lower().replace("www.", "")
+        # Don't strip 'www.' from domains for comparison - treat www.example.com and example.com as different
+        url_domain = parsed.netloc.lower()
+        base = base_domain.lower()
+        
+        # Strip user credentials from URL domain
+        if '@' in url_domain:
+            url_domain = url_domain.split('@', 1)[1]
+        
+        # Strip ports from both for comparison (any port should be considered same domain)
+        if ':' in url_domain:
+            url_domain = url_domain.rsplit(':', 1)[0]
+        if ':' in base:
+            base = base.rsplit(':', 1)[0]

        # Check if URL domain ends with base domain
        return not url_domain.endswith(base)
--- a/deploy/docker/c4ai-code-context.md
+++ b/deploy/docker/c4ai-code-context.md
@@ -7520,18 +7520,17 @@ class BrowserManager:
            )
            os.makedirs(browser_args["downloads_path"], exist_ok=True)

-        if self.config.proxy:
-            warnings.warn(
-                "BrowserConfig.proxy is deprecated and ignored. Use proxy_config instead.",
-                DeprecationWarning,
-            )
-        if self.config.proxy_config:
+        if self.config.proxy or self.config.proxy_config:
            from playwright.async_api import ProxySettings

-            proxy_settings = ProxySettings(
-                server=self.config.proxy_config.server,
-                username=self.config.proxy_config.username,
-                password=self.config.proxy_config.password,
+            proxy_settings = (
+                ProxySettings(server=self.config.proxy)
+                if self.config.proxy
+                else ProxySettings(
+                    server=self.config.proxy_config.server,
+                    username=self.config.proxy_config.username,
+                    password=self.config.proxy_config.password,
+                )
            )
            browser_args["proxy"] = proxy_settings

--- a/deploy/docker/server.py
+++ b/deploy/docker/server.py
@@ -482,14 +482,9 @@ async def crawl(
 ):
    """
    Crawl a list of URLs and return the results as JSON.
-    For streaming responses, use /crawl/stream endpoint.
    """
    if not crawl_request.urls:
        raise HTTPException(400, "At least one URL required")
-    # Check whether it is a redirection for a streaming request
-    crawler_config = CrawlerRunConfig.load(crawl_request.crawler_config)
-    if crawler_config.stream:
-        return await stream_process(crawl_request=crawl_request)
    results = await handle_crawl_request(
        urls=crawl_request.urls,
        browser_config=crawl_request.browser_config,
@@ -511,16 +506,12 @@ async def crawl_stream(
 ):
    if not crawl_request.urls:
        raise HTTPException(400, "At least one URL required")
-
-    return await stream_process(crawl_request=crawl_request)
-
-async def stream_process(crawl_request: CrawlRequest):
    crawler, gen = await handle_stream_crawl_request(
        urls=crawl_request.urls,
        browser_config=crawl_request.browser_config,
        crawler_config=crawl_request.crawler_config,
        config=config,
-)
+    )
    return StreamingResponse(
        stream_results(crawler, gen),
        media_type="application/x-ndjson",
--- a/deploy/docker/static/playground/index.html
+++ b/deploy/docker/static/playground/index.html
@@ -371,7 +371,7 @@

                <div class="flex items-center">
                    <input id="st-stream" type="checkbox" class="mr-2">
-                    <label for="st-stream" class="text-sm">Enable streaming mode</label>
+                    <label for="st-stream" class="text-sm">Use /crawl/stream</label>
                    <button id="st-run"
                        class="ml-auto bg-accent text-dark px-4 py-2 rounded hover:bg-opacity-90 font-medium">
                        Run Stress Test
@@ -596,14 +596,6 @@
            forceHighlightElement(curlCodeEl);
        }

-        // Detect if stream is requested inside payload
-        function shouldUseStream(payload) {
-            const toBool = (v) => v === true || (typeof v === 'string' && v.toLowerCase() === 'true');
-            const fromCrawler = payload && payload.crawler_config && payload.crawler_config.params && payload.crawler_config.params.stream;
-            const direct = payload && payload.stream;
-            return toBool(fromCrawler) || toBool(direct);
-        }
-
        // Main run function
        async function runCrawl() {
            const endpoint = document.getElementById('endpoint').value;
@@ -619,24 +611,16 @@
                        : { browser_config: cfgJson };
                }
            } catch (err) {
-                const codeText = cm.getValue();
-                const streamFlag = /stream\s*=\s*True/i.test(codeText);
-                const isCrawlEndpoint = document.getElementById('endpoint').value === 'crawl';
-                if (isCrawlEndpoint && streamFlag) {
-                    // Fallback: proceed with minimal config only for stream
-                    advConfig = { crawler_config: { stream: true } };
-                } else {
-                    updateStatus('error');
-                    document.querySelector('#response-content code').textContent =
-                        JSON.stringify({ error: err.message }, null, 2);
-                    forceHighlightElement(document.querySelector('#response-content code'));
-                    return; // stop run
-                }
+                updateStatus('error');
+                document.querySelector('#response-content code').textContent =
+                    JSON.stringify({ error: err.message }, null, 2);
+                forceHighlightElement(document.querySelector('#response-content code'));
+                return; // stop run
            }

            const endpointMap = {
                crawl: '/crawl',
-                crawl_stream: '/crawl/stream', // Keep for backward compatibility
+                // crawl_stream: '/crawl/stream',
                md: '/md',
                llm: '/llm'
            };
@@ -663,7 +647,7 @@
                // This will be handled directly in the fetch below
                payload = null;
            } else {
-                // Default payload for /crawl (supports both streaming and batch modes)
+                // Default payload for /crawl and /crawl/stream
                payload = {
                    urls,
                    ...advConfig
@@ -675,7 +659,6 @@
            try {
                const startTime = performance.now();
                let response, responseData;
-                const useStreamOverride = (endpoint === 'crawl') && shouldUseStream(payload);

                if (endpoint === 'llm') {
                    // Special handling for LLM endpoint which uses URL pattern: /llm/{encoded_url}?q={query}
@@ -698,8 +681,8 @@
                    document.querySelector('#response-content code').textContent = JSON.stringify(responseData, null, 2);
                    document.querySelector('#response-content code').className = 'json hljs';
                    forceHighlightElement(document.querySelector('#response-content code'));
-                } else if (endpoint === 'crawl_stream' || useStreamOverride) {
-                    // Stream processing - now handled directly by /crawl endpoint
+                } else if (endpoint === 'crawl_stream') {
+                    // Stream processing
                    response = await fetch(api, {
                        method: 'POST',
                        headers: { 'Content-Type': 'application/json' },
@@ -774,7 +757,6 @@
                    const question = document.getElementById('llm-question').value.trim() || "What is this page about?";
                    generateSnippets(`${api}/${encodedUrl}?q=${encodeURIComponent(question)}`, null, 'GET');
                } else {
-                    // Use the same API endpoint for both streaming and non-streaming
                    generateSnippets(api, payload);
                }
            } catch (error) {
@@ -804,7 +786,7 @@
            document.getElementById('stress-avg-time').textContent = '0';
            document.getElementById('stress-peak-mem').textContent = '0';

-            const api = '/crawl'; // Always use /crawl - backend handles streaming internally
+            const api = useStream ? '/crawl/stream' : '/crawl';
            const urls = Array.from({ length: total }, (_, i) => `https://httpbin.org/anything/stress-${i}-${Date.now()}`);
            const chunks = [];

--- a/docs/examples/adaptive_crawling/llm_config_example.py
+++ b/docs/examples/adaptive_crawling/llm_config_example.py
@@ -1,154 +0,0 @@
-import asyncio
-import os
-from crawl4ai import AsyncWebCrawler, AdaptiveCrawler, AdaptiveConfig, LLMConfig
-
-
-async def test_configuration(name: str, config: AdaptiveConfig, url: str, query: str):
-    """Test a specific configuration"""
-    print(f"\n{'='*60}")
-    print(f"Configuration: {name}")
-    print(f"{'='*60}")
-    
-    async with AsyncWebCrawler(verbose=False) as crawler:
-        adaptive = AdaptiveCrawler(crawler, config)
-        result = await adaptive.digest(start_url=url, query=query)
-        
-        print("\n" + "="*50)
-        print("CRAWL STATISTICS")
-        print("="*50)
-        adaptive.print_stats(detailed=False)
-        
-        # Get the most relevant content found
-        print("\n" + "="*50)
-        print("MOST RELEVANT PAGES")
-        print("="*50)
-        
-        relevant_pages = adaptive.get_relevant_content(top_k=5)
-        for i, page in enumerate(relevant_pages, 1):
-            print(f"\n{i}. {page['url']}")
-            print(f"   Relevance Score: {page['score']:.2%}")
-            
-            # Show a snippet of the content
-            content = page['content'] or ""
-            if content:
-                snippet = content[:200].replace('\n', ' ')
-                if len(content) > 200:
-                    snippet += "..."
-                print(f"   Preview: {snippet}")
-        
-        print(f"\n{'='*50}")
-        print(f"Pages crawled: {len(result.crawled_urls)}")
-        print(f"Final confidence: {adaptive.confidence:.1%}")
-        print(f"Stopped reason: {result.metrics.get('stopped_reason', 'max_pages')}")
-        
-        if result.metrics.get('is_irrelevant', False):
-            print("⚠️  Query detected as irrelevant!")
-        
-        return result
-
-
-async def llm_embedding():
-    """Demonstrate various embedding configurations"""
-    
-    print("EMBEDDING STRATEGY CONFIGURATION EXAMPLES")
-    print("=" * 60)
-    
-    # Base URL and query for testing
-    test_url = "https://docs.python.org/3/library/asyncio.html"
-    
-    openai_llm_config = LLMConfig(
-        provider='openai/text-embedding-3-small',
-        api_token=os.getenv('OPENAI_API_KEY'),
-        temperature=0.7,
-        max_tokens=2000
-    )
-    config_openai = AdaptiveConfig(
-        strategy="embedding",
-        max_pages=10,
-        
-        # Use OpenAI embeddings
-        embedding_llm_config=openai_llm_config,
-        # embedding_llm_config={
-        #     'provider': 'openai/text-embedding-3-small',
-        #     'api_token': os.getenv('OPENAI_API_KEY')
-        # },
-        
-        # OpenAI embeddings are high quality, can be stricter
-        embedding_k_exp=4.0,
-        n_query_variations=12
-    )
-    
-    await test_configuration(
-        "OpenAI Embeddings",
-        config_openai,
-        test_url,
-        # "event-driven architecture patterns"
-        "async await context managers coroutines"
-    )
-    return
-    
-    
-
-async def basic_adaptive_crawling():
-    """Basic adaptive crawling example"""
-    
-    # Initialize the crawler
-    async with AsyncWebCrawler(verbose=True) as crawler:
-        # Create an adaptive crawler with default settings (statistical strategy)
-        adaptive = AdaptiveCrawler(crawler)
-        
-        # Note: You can also use embedding strategy for semantic understanding:
-        # from crawl4ai import AdaptiveConfig
-        # config = AdaptiveConfig(strategy="embedding")
-        # adaptive = AdaptiveCrawler(crawler, config)
-        
-        # Start adaptive crawling
-        print("Starting adaptive crawl for Python async programming information...")
-        result = await adaptive.digest(
-            start_url="https://docs.python.org/3/library/asyncio.html",
-            query="async await context managers coroutines"
-        )
-        
-        # Display crawl statistics
-        print("\n" + "="*50)
-        print("CRAWL STATISTICS")
-        print("="*50)
-        adaptive.print_stats(detailed=False)
-        
-        # Get the most relevant content found
-        print("\n" + "="*50)
-        print("MOST RELEVANT PAGES")
-        print("="*50)
-        
-        relevant_pages = adaptive.get_relevant_content(top_k=5)
-        for i, page in enumerate(relevant_pages, 1):
-            print(f"\n{i}. {page['url']}")
-            print(f"   Relevance Score: {page['score']:.2%}")
-            
-            # Show a snippet of the content
-            content = page['content'] or ""
-            if content:
-                snippet = content[:200].replace('\n', ' ')
-                if len(content) > 200:
-                    snippet += "..."
-                print(f"   Preview: {snippet}")
-        
-        # Show final confidence
-        print(f"\n{'='*50}")
-        print(f"Final Confidence: {adaptive.confidence:.2%}")
-        print(f"Total Pages Crawled: {len(result.crawled_urls)}")
-        print(f"Knowledge Base Size: {len(adaptive.state.knowledge_base)} documents")
-        
-        
-        if adaptive.confidence >= 0.8:
-            print("✓ High confidence - can answer detailed questions about async Python")
-        elif adaptive.confidence >= 0.6:
-            print("~ Moderate confidence - can answer basic questions") 
-        else:
-            print("✗ Low confidence - need more information")
-
-
-
-if __name__ == "__main__":
-    asyncio.run(llm_embedding())
-    # asyncio.run(basic_adaptive_crawling())
--- a/docs/md_v2/advanced/proxy-security.md
+++ b/docs/md_v2/advanced/proxy-security.md
@@ -7,13 +7,13 @@ Simple proxy configuration with `BrowserConfig`:
 ```python
 from crawl4ai.async_configs import BrowserConfig

-# Using HTTP proxy
-browser_config = BrowserConfig(proxy_config={"server": "http://proxy.example.com:8080"})
+# Using proxy URL
+browser_config = BrowserConfig(proxy="http://proxy.example.com:8080")
 async with AsyncWebCrawler(config=browser_config) as crawler:
    result = await crawler.arun(url="https://example.com")

 # Using SOCKS proxy
-browser_config = BrowserConfig(proxy_config={"server": "socks5://proxy.example.com:1080"})
+browser_config = BrowserConfig(proxy="socks5://proxy.example.com:1080")
 async with AsyncWebCrawler(config=browser_config) as crawler:
    result = await crawler.arun(url="https://example.com")
 ```
@@ -25,11 +25,7 @@ Use an authenticated proxy with `BrowserConfig`:
 ```python
 from crawl4ai.async_configs import BrowserConfig

-browser_config = BrowserConfig(proxy_config={
-    "server": "http://[host]:[port]",
-    "username": "[username]",
-    "password": "[password]",
-})
+browser_config = BrowserConfig(proxy="http://[username]:[password]@[host]:[port]")
 async with AsyncWebCrawler(config=browser_config) as crawler:
    result = await crawler.arun(url="https://example.com")
 ```
--- a/docs/md_v2/api/parameters.md
+++ b/docs/md_v2/api/parameters.md
@@ -23,7 +23,7 @@ browser_cfg = BrowserConfig(
 | **`headless`**        | `bool` (default: `True`)               | Headless means no visible UI. `False` is handy for debugging.                                                                         |
 | **`viewport_width`**  | `int` (default: `1080`)                | Initial page width (in px). Useful for testing responsive layouts.                                                                    |
 | **`viewport_height`** | `int` (default: `600`)                 | Initial page height (in px).                                                                                                          |
-| **`proxy`**           | `str` (deprecated)                      | Deprecated. Use `proxy_config` instead. If set, it will be auto-converted internally. |
+| **`proxy`**           | `str` (default: `None`)                | Single-proxy URL if you want all traffic to go through it, e.g. `"http://user:pass@proxy:8080"`.                                      |
 | **`proxy_config`**    | `dict` (default: `None`)               | For advanced or multi-proxy needs, specify details like `{"server": "...", "username": "...", ...}`.                                  |
 | **`use_persistent_context`** | `bool` (default: `False`)       | If `True`, uses a **persistent** browser context (keep cookies, sessions across runs). Also sets `use_managed_browser=True`.          |
 | **`user_data_dir`**   | `str or None` (default: `None`)        | Directory to store user data (profiles, cookies). Must be set if you want permanent sessions.                                         |
--- a/docs/md_v2/core/adaptive-crawling.md
+++ b/docs/md_v2/core/adaptive-crawling.md
@@ -108,19 +108,7 @@ config = AdaptiveConfig(
    embedding_min_confidence_threshold=0.1  # Stop if completely irrelevant
 )

-# With custom LLM provider for query expansion (recommended)
-from crawl4ai import LLMConfig
-
-config = AdaptiveConfig(
-    strategy="embedding",
-    embedding_llm_config=LLMConfig(
-        provider='openai/text-embedding-3-small',
-        api_token='your-api-key',
-        temperature=0.7
-    )
-)
-
-# Alternative: Dictionary format (backward compatible)
+# With custom embedding provider (e.g., OpenAI)
 config = AdaptiveConfig(
    strategy="embedding",
    embedding_llm_config={
--- a/tests/adaptive/test_llm_embedding.py
+++ b/tests/adaptive/test_llm_embedding.py
@@ -1,154 +0,0 @@
-import asyncio
-import os
-from crawl4ai import AsyncWebCrawler, AdaptiveCrawler, AdaptiveConfig, LLMConfig
-
-
-async def test_configuration(name: str, config: AdaptiveConfig, url: str, query: str):
-    """Test a specific configuration"""
-    print(f"\n{'='*60}")
-    print(f"Configuration: {name}")
-    print(f"{'='*60}")
-    
-    async with AsyncWebCrawler(verbose=False) as crawler:
-        adaptive = AdaptiveCrawler(crawler, config)
-        result = await adaptive.digest(start_url=url, query=query)
-        
-        print("\n" + "="*50)
-        print("CRAWL STATISTICS")
-        print("="*50)
-        adaptive.print_stats(detailed=False)
-        
-        # Get the most relevant content found
-        print("\n" + "="*50)
-        print("MOST RELEVANT PAGES")
-        print("="*50)
-        
-        relevant_pages = adaptive.get_relevant_content(top_k=5)
-        for i, page in enumerate(relevant_pages, 1):
-            print(f"\n{i}. {page['url']}")
-            print(f"   Relevance Score: {page['score']:.2%}")
-            
-            # Show a snippet of the content
-            content = page['content'] or ""
-            if content:
-                snippet = content[:200].replace('\n', ' ')
-                if len(content) > 200:
-                    snippet += "..."
-                print(f"   Preview: {snippet}")
-        
-        print(f"\n{'='*50}")
-        print(f"Pages crawled: {len(result.crawled_urls)}")
-        print(f"Final confidence: {adaptive.confidence:.1%}")
-        print(f"Stopped reason: {result.metrics.get('stopped_reason', 'max_pages')}")
-        
-        if result.metrics.get('is_irrelevant', False):
-            print("⚠️  Query detected as irrelevant!")
-        
-        return result
-
-
-async def llm_embedding():
-    """Demonstrate various embedding configurations"""
-    
-    print("EMBEDDING STRATEGY CONFIGURATION EXAMPLES")
-    print("=" * 60)
-    
-    # Base URL and query for testing
-    test_url = "https://docs.python.org/3/library/asyncio.html"
-    
-    openai_llm_config = LLMConfig(
-        provider='openai/text-embedding-3-small',
-        api_token=os.getenv('OPENAI_API_KEY'),
-        temperature=0.7,
-        max_tokens=2000
-    )
-    config_openai = AdaptiveConfig(
-        strategy="embedding",
-        max_pages=10,
-        
-        # Use OpenAI embeddings
-        embedding_llm_config=openai_llm_config,
-        # embedding_llm_config={
-        #     'provider': 'openai/text-embedding-3-small',
-        #     'api_token': os.getenv('OPENAI_API_KEY')
-        # },
-        
-        # OpenAI embeddings are high quality, can be stricter
-        embedding_k_exp=4.0,
-        n_query_variations=12
-    )
-    
-    await test_configuration(
-        "OpenAI Embeddings",
-        config_openai,
-        test_url,
-        # "event-driven architecture patterns"
-        "async await context managers coroutines"
-    )
-    return
-    
-    
-
-async def basic_adaptive_crawling():
-    """Basic adaptive crawling example"""
-    
-    # Initialize the crawler
-    async with AsyncWebCrawler(verbose=True) as crawler:
-        # Create an adaptive crawler with default settings (statistical strategy)
-        adaptive = AdaptiveCrawler(crawler)
-        
-        # Note: You can also use embedding strategy for semantic understanding:
-        # from crawl4ai import AdaptiveConfig
-        # config = AdaptiveConfig(strategy="embedding")
-        # adaptive = AdaptiveCrawler(crawler, config)
-        
-        # Start adaptive crawling
-        print("Starting adaptive crawl for Python async programming information...")
-        result = await adaptive.digest(
-            start_url="https://docs.python.org/3/library/asyncio.html",
-            query="async await context managers coroutines"
-        )
-        
-        # Display crawl statistics
-        print("\n" + "="*50)
-        print("CRAWL STATISTICS")
-        print("="*50)
-        adaptive.print_stats(detailed=False)
-        
-        # Get the most relevant content found
-        print("\n" + "="*50)
-        print("MOST RELEVANT PAGES")
-        print("="*50)
-        
-        relevant_pages = adaptive.get_relevant_content(top_k=5)
-        for i, page in enumerate(relevant_pages, 1):
-            print(f"\n{i}. {page['url']}")
-            print(f"   Relevance Score: {page['score']:.2%}")
-            
-            # Show a snippet of the content
-            content = page['content'] or ""
-            if content:
-                snippet = content[:200].replace('\n', ' ')
-                if len(content) > 200:
-                    snippet += "..."
-                print(f"   Preview: {snippet}")
-        
-        # Show final confidence
-        print(f"\n{'='*50}")
-        print(f"Final Confidence: {adaptive.confidence:.2%}")
-        print(f"Total Pages Crawled: {len(result.crawled_urls)}")
-        print(f"Knowledge Base Size: {len(adaptive.state.knowledge_base)} documents")
-        
-        
-        if adaptive.confidence >= 0.8:
-            print("✓ High confidence - can answer detailed questions about async Python")
-        elif adaptive.confidence >= 0.6:
-            print("~ Moderate confidence - can answer basic questions") 
-        else:
-            print("✗ Low confidence - need more information")
-
-
-
-if __name__ == "__main__":
-    asyncio.run(llm_embedding())
-    # asyncio.run(basic_adaptive_crawling())
--- a/tests/async/test_0.4.2_browser_manager.py
+++ b/tests/async/test_0.4.2_browser_manager.py
@@ -112,7 +112,7 @@ async def test_proxy_settings():
        headless=True,
        verbose=False,
        user_agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36",
-        proxy_config={"server": "http://127.0.0.1:8080"},  # Assuming local proxy server for test
+        proxy="http://127.0.0.1:8080",  # Assuming local proxy server for test
        use_managed_browser=False,
        use_persistent_context=False,
    ) as crawler:
--- a/tests/docker/test_server_requests.py
+++ b/tests/docker/test_server_requests.py
@@ -143,40 +143,7 @@ class TestCrawlEndpoints:
        assert "<h1>Herman Melville - Moby-Dick</h1>" in result["html"]
        # We don't specify a markdown generator in this test, so don't make assumptions about markdown field
        # It might be null, missing, or populated depending on the server's default behavior
-    async def test_crawl_with_stream_direct(self, async_client: httpx.AsyncClient):
-        """Test that /crawl endpoint handles stream=True directly without redirect."""
-        payload = {
-            "urls": [SIMPLE_HTML_URL],
-            "browser_config": {
-                "type": "BrowserConfig",
-                "params": {
-                    "headless": True,
-                }
-            },
-            "crawler_config": {
-                "type": "CrawlerRunConfig", 
-                "params": {
-                    "stream": True,  # Set stream to True for direct streaming
-                    "screenshot": False,
-                    "cache_mode": CacheMode.BYPASS.value
-                }
-            }
-        }

-        # Send a request to the /crawl endpoint - should handle streaming directly
-        async with async_client.stream("POST", "/crawl", json=payload) as response:
-            assert response.status_code == 200
-            assert response.headers["content-type"] == "application/x-ndjson"
-            assert response.headers.get("x-stream-status") == "active"
-
-            results = await process_streaming_response(response)
-
-            assert len(results) == 1
-            result = results[0]
-            await assert_crawl_result_structure(result)
-            assert result["success"] is True
-            assert result["url"] == SIMPLE_HTML_URL
-            assert "<h1>Herman Melville - Moby-Dick</h1>" in result["html"]
    async def test_simple_crawl_single_url_streaming(self, async_client: httpx.AsyncClient):
        """Test /crawl/stream with a single URL and simple config values."""
        payload = {
--- a/tests/general/test_bff_scoring.py
+++ b/tests/general/test_bff_scoring.py
@@ -1,117 +0,0 @@
-#!/usr/bin/env python3
-"""
-Simple test to verify BestFirstCrawlingStrategy fixes.
-This test crawls a real website and shows that:
-1. Higher-scoring pages are crawled first (priority queue fix)
-2. Links are scored before truncation (link discovery fix)
-"""
-
-import asyncio
-from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
-from crawl4ai.deep_crawling import BestFirstCrawlingStrategy
-from crawl4ai.deep_crawling.scorers import KeywordRelevanceScorer
-
-async def test_best_first_strategy():
-    """Test BestFirstCrawlingStrategy with keyword scoring"""
-    
-    print("=" * 70)
-    print("Testing BestFirstCrawlingStrategy with Real URL")
-    print("=" * 70)
-    print("\nThis test will:")
-    print("1. Crawl Python.org documentation")
-    print("2. Score pages based on keywords: 'tutorial', 'guide', 'reference'")
-    print("3. Show that higher-scoring pages are crawled first")
-    print("-" * 70)
-    
-    # Create a keyword scorer that prioritizes tutorial/guide pages
-    scorer = KeywordRelevanceScorer(
-        keywords=["tutorial", "guide", "reference", "documentation"],
-        weight=1.0,
-        case_sensitive=False
-    )
-    
-    # Create the strategy with scoring
-    strategy = BestFirstCrawlingStrategy(
-        max_depth=2,          # Crawl 2 levels deep
-        max_pages=10,         # Limit to 10 pages total
-        url_scorer=scorer,    # Use keyword scoring
-        include_external=False  # Only internal links
-    )
-    
-    # Configure browser and crawler
-    browser_config = BrowserConfig(
-        headless=True,    # Run in background
-        verbose=False     # Reduce output noise
-    )
-    
-    crawler_config = CrawlerRunConfig(
-        deep_crawl_strategy=strategy,
-        verbose=False
-    )
-    
-    print("\nStarting crawl of https://docs.python.org/3/")
-    print("Looking for pages with keywords: tutorial, guide, reference, documentation")
-    print("-" * 70)
-    
-    crawled_urls = []
-    
-    async with AsyncWebCrawler(config=browser_config) as crawler:
-        # Crawl and collect results
-        results = await crawler.arun(
-            url="https://docs.python.org/3/",
-            config=crawler_config
-        )
-        
-        # Process results
-        if isinstance(results, list):
-            for result in results:
-                score = result.metadata.get('score', 0) if result.metadata else 0
-                depth = result.metadata.get('depth', 0) if result.metadata else 0
-                crawled_urls.append({
-                    'url': result.url,
-                    'score': score,
-                    'depth': depth,
-                    'success': result.success
-                })
-    
-    print("\n" + "=" * 70)
-    print("CRAWL RESULTS (in order of crawling)")
-    print("=" * 70)
-    
-    for i, item in enumerate(crawled_urls, 1):
-        status = "✓" if item['success'] else "✗"
-        # Highlight high-scoring pages
-        if item['score'] > 0.5:
-            print(f"{i:2}. [{status}] Score: {item['score']:.2f} | Depth: {item['depth']} | {item['url']}")
-            print(f"     ^ HIGH SCORE - Contains keywords!")
-        else:
-            print(f"{i:2}. [{status}] Score: {item['score']:.2f} | Depth: {item['depth']} | {item['url']}")
-    
-    print("\n" + "=" * 70)
-    print("ANALYSIS")
-    print("=" * 70)
-    
-    # Check if higher scores appear early in the crawl
-    scores = [item['score'] for item in crawled_urls[1:]]  # Skip initial URL
-    high_score_indices = [i for i, s in enumerate(scores) if s > 0.3]
-    
-    if high_score_indices and high_score_indices[0] < len(scores) / 2:
-        print("✅ SUCCESS: Higher-scoring pages (with keywords) were crawled early!")
-        print("   This confirms the priority queue fix is working.")
-    else:
-        print("⚠️  Check the crawl order above - higher scores should appear early")
-    
-    # Show score distribution
-    print(f"\nScore Statistics:")
-    print(f"  - Total pages crawled: {len(crawled_urls)}")
-    print(f"  - Average score: {sum(item['score'] for item in crawled_urls) / len(crawled_urls):.2f}")
-    print(f"  - Max score: {max(item['score'] for item in crawled_urls):.2f}")
-    print(f"  - Pages with keywords: {sum(1 for item in crawled_urls if item['score'] > 0.3)}")
-    
-    print("\n" + "=" * 70)
-    print("TEST COMPLETE")
-    print("=" * 70)
-
-if __name__ == "__main__":
-    print("\n🔍 BestFirstCrawlingStrategy Simple Test\n")
-    asyncio.run(test_best_first_strategy())
--- a/tests/memory/test_docker_config_gen.py
+++ b/tests/memory/test_docker_config_gen.py
@@ -24,7 +24,7 @@ CASES = [
    # --- BrowserConfig variants ---
    "BrowserConfig()",
    "BrowserConfig(headless=False, extra_args=['--disable-gpu'])",
-    "BrowserConfig(browser_mode='builtin', proxy_config={'server': 'http://1.2.3.4:8080'})",
+    "BrowserConfig(browser_mode='builtin', proxy='http://1.2.3.4:8080')",
 ]

 for code in CASES:
--- a/tests/proxy/test_proxy_deprecation.py
+++ b/tests/proxy/test_proxy_deprecation.py
@@ -1,42 +0,0 @@
-import warnings
-
-import pytest
-
-from crawl4ai.async_configs import BrowserConfig, ProxyConfig
-
-
-def test_browser_config_proxy_string_emits_deprecation_and_autoconverts():
-    warnings.simplefilter("always", DeprecationWarning)
-
-    proxy_str = "23.95.150.145:6114:username:password"
-    with warnings.catch_warnings(record=True) as caught:
-        cfg = BrowserConfig(proxy=proxy_str, headless=True)
-
-    dep_warnings = [w for w in caught if issubclass(w.category, DeprecationWarning)]
-    assert dep_warnings, "Expected DeprecationWarning when using BrowserConfig(proxy=...)"
-
-    assert cfg.proxy is None, "cfg.proxy should be None after auto-conversion"
-    assert isinstance(cfg.proxy_config, ProxyConfig), "cfg.proxy_config should be ProxyConfig instance"
-    assert cfg.proxy_config.username == "username"
-    assert cfg.proxy_config.password == "password"
-    assert cfg.proxy_config.server.startswith("http://")
-    assert cfg.proxy_config.server.endswith(":6114")
-
-
-def test_browser_config_with_proxy_config_emits_no_deprecation():
-    warnings.simplefilter("always", DeprecationWarning)
-
-    with warnings.catch_warnings(record=True) as caught:
-        cfg = BrowserConfig(
-            headless=True,
-            proxy_config={
-                "server": "http://127.0.0.1:8080",
-                "username": "u",
-                "password": "p",
-            },
-        )
-
-    dep_warnings = [w for w in caught if issubclass(w.category, DeprecationWarning)]
-    assert not dep_warnings, "Did not expect DeprecationWarning when using proxy_config"
-    assert cfg.proxy is None
-    assert isinstance(cfg.proxy_config, ProxyConfig)