Fix proxy auth for persistent browser contexts

Chromium's --proxy-server CLI flag silently ignores inline credentials (user:pass@server). For persistent contexts, crawl4ai was embedding credentials in this flag via ManagedBrowser.build_browser_flags(), causing proxy auth to fail and the browser to fall back to direct connection. Fix: Use Playwright's launch_persistent_context(proxy=...) API instead of subprocess + CDP when use_persistent_context=True. This handles proxy authentication properly via the HTTP CONNECT handshake. The non-persistent and CDP paths remain unchanged. Changes: - Strip credentials from --proxy-server flag in build_browser_flags() - Add launch_persistent_context() path in BrowserManager.start() - Add cleanup path in BrowserManager.close() - Guard create_browser_context() when self.browser is None - Add regression tests covering all 4 proxy/persistence combinations
2026-02-12 11:19:29 +00:00
parent 1a24ac785e
commit 112f44a97d
8 changed files with 690 additions and 5 deletions
--- a/crawl4ai/browser_manager.py
+++ b/crawl4ai/browser_manager.py
@@ -109,14 +109,13 @@ class ManagedBrowser:
                "--disable-software-rasterizer",
                "--disable-dev-shm-usage",
            ])
-        # proxy support
+        # proxy support — only pass server URL, never credentials.
+        # Chromium's --proxy-server flag silently ignores inline user:pass@.
+        # Auth credentials are handled at the Playwright context level instead.
        if config.proxy:
            flags.append(f"--proxy-server={config.proxy}")
        elif config.proxy_config:
-            creds = ""
-            if config.proxy_config.username and config.proxy_config.password:
-                creds = f"{config.proxy_config.username}:{config.proxy_config.password}@"
-            flags.append(f"--proxy-server={creds}{config.proxy_config.server}")
+            flags.append(f"--proxy-server={config.proxy_config.server}")
        # dedupe
        return list(dict.fromkeys(flags))

@@ -711,6 +710,7 @@ class BrowserManager:
        self.managed_browser = None
        self.playwright = None
        self._using_cached_cdp = False
+        self._launched_persistent = False  # True when using launch_persistent_context

        # Session management
        self.sessions = {}
@@ -793,6 +793,76 @@ class BrowserManager:
            # Initialize playwright
            self.playwright = await async_playwright().start()

+        # ── Persistent context via Playwright's native API ──────────────
+        # When use_persistent_context is set and we're not connecting to an
+        # external CDP endpoint, use launch_persistent_context() instead of
+        # subprocess + CDP.  This properly supports proxy authentication
+        # (server + username + password) which the --proxy-server CLI flag
+        # cannot handle.
+        if (
+            self.config.use_persistent_context
+            and not self.config.cdp_url
+            and not self._using_cached_cdp
+        ):
+            # Collect stealth / optimization CLI flags, excluding ones that
+            # launch_persistent_context handles via keyword arguments.
+            _skip_prefixes = (
+                "--proxy-server",
+                "--remote-debugging-port",
+                "--user-data-dir",
+                "--headless",
+                "--window-size",
+            )
+            cli_args = [
+                flag
+                for flag in ManagedBrowser.build_browser_flags(self.config)
+                if not flag.startswith(_skip_prefixes)
+            ]
+            if self.config.extra_args:
+                cli_args.extend(self.config.extra_args)
+
+            launch_kwargs = {
+                "headless": self.config.headless,
+                "args": list(dict.fromkeys(cli_args)),  # dedupe
+                "viewport": {
+                    "width": self.config.viewport_width,
+                    "height": self.config.viewport_height,
+                },
+                "user_agent": self.config.user_agent or None,
+                "ignore_https_errors": self.config.ignore_https_errors,
+                "accept_downloads": self.config.accept_downloads,
+            }
+
+            if self.config.proxy_config:
+                launch_kwargs["proxy"] = {
+                    "server": self.config.proxy_config.server,
+                    "username": self.config.proxy_config.username,
+                    "password": self.config.proxy_config.password,
+                }
+
+            if self.config.storage_state:
+                launch_kwargs["storage_state"] = self.config.storage_state
+
+            user_data_dir = self.config.user_data_dir or tempfile.mkdtemp(
+                prefix="crawl4ai-persistent-"
+            )
+
+            self.default_context = (
+                await self.playwright.chromium.launch_persistent_context(
+                    user_data_dir, **launch_kwargs
+                )
+            )
+            self.browser = None  # persistent context has no separate Browser
+            self._launched_persistent = True
+
+            await self.setup_context(self.default_context)
+
+            # Set the browser endpoint key for global page tracking
+            self._browser_endpoint_key = self._compute_browser_endpoint_key()
+            if self._browser_endpoint_key not in BrowserManager._global_pages_in_use:
+                BrowserManager._global_pages_in_use[self._browser_endpoint_key] = set()
+            return
+
        if self.config.cdp_url or self.config.use_managed_browser:
            self.config.use_managed_browser = True

@@ -1158,6 +1228,12 @@ class BrowserManager:
        Returns:
            Context: Browser context object with the specified configurations
        """
+        if self.browser is None:
+            raise RuntimeError(
+                "Cannot create new browser contexts when using "
+                "use_persistent_context=True. Persistent context uses a "
+                "single shared context."
+            )
        # Base settings
        user_agent = self.config.headers.get("User-Agent", self.config.user_agent) 
        viewport_settings = {
@@ -1858,6 +1934,35 @@ class BrowserManager:
                    self.playwright = None
            return

+        # ── Persistent context launched via launch_persistent_context ──
+        if self._launched_persistent:
+            session_ids = list(self.sessions.keys())
+            for session_id in session_ids:
+                await self.kill_session(session_id)
+            for ctx in self.contexts_by_config.values():
+                try:
+                    await ctx.close()
+                except Exception:
+                    pass
+            self.contexts_by_config.clear()
+            self._context_refcounts.clear()
+            self._context_last_used.clear()
+            self._page_to_sig.clear()
+
+            # Closing the persistent context also terminates the browser
+            if self.default_context:
+                try:
+                    await self.default_context.close()
+                except Exception:
+                    pass
+                self.default_context = None
+
+            if self.playwright:
+                await self.playwright.stop()
+                self.playwright = None
+            self._launched_persistent = False
+            return
+
        if self.config.sleep_on_close:
            await asyncio.sleep(0.5)

--- a/tests/proxy/test_chanel_basic.py
+++ b/tests/proxy/test_chanel_basic.py
@@ -0,0 +1,61 @@
+import asyncio
+import os
+import shutil
+import uuid
+from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
+
+
+async def crawl_chanel(url: str):
+    # Fresh profile each time (gets flagged after one use)
+    profile_dir = os.path.expanduser(f"~/.crawl4ai/chanel_{uuid.uuid4().hex[:8]}")
+    os.makedirs(profile_dir, exist_ok=True)
+
+    browser_config = BrowserConfig(
+        headless=True,
+        enable_stealth=True,
+        use_persistent_context=True,
+        user_data_dir=profile_dir,
+        viewport_width=1920,
+        viewport_height=1080,
+        user_agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
+        headers={
+            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+            "Accept-Language": "en-US,en;q=0.9",
+            "Sec-Fetch-Dest": "document",
+            "Sec-Fetch-Mode": "navigate",
+            "Sec-Fetch-Site": "none",
+            "Sec-Fetch-User": "?1",
+        }
+    )
+
+    run_config = CrawlerRunConfig(
+        magic=True,
+        simulate_user=True,
+        override_navigator=True,
+        page_timeout=120000,
+        wait_until="load",
+        delay_before_return_html=10.0,
+    )
+
+    try:
+        async with AsyncWebCrawler(config=browser_config) as crawler:
+            result = await crawler.arun(url, config=run_config)
+            return result
+    finally:
+        shutil.rmtree(profile_dir, ignore_errors=True)
+
+
+async def main():
+    url = "https://www.chanel.com/us/fashion/handbags/c/1x1x1/"
+    result = await crawl_chanel(url)
+    print(f"Status: {result.status_code}")
+    print(f"Success: {result.success}")
+    print(f"HTML: {len(result.html):,} bytes")
+    if result.markdown:
+        md_len = len(result.markdown.raw_markdown)
+        print(f"Markdown: {md_len:,} chars")
+    if result.error_message:
+        print(f"Error: {result.error_message}")
+
+
+asyncio.run(main())
--- a/tests/proxy/test_chanel_debug.py
+++ b/tests/proxy/test_chanel_debug.py
@@ -0,0 +1,62 @@
+import asyncio
+import os
+import shutil
+import uuid
+from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
+from crawl4ai.async_configs import ProxyConfig
+
+
+async def main():
+    profile_dir = os.path.expanduser(f"~/.crawl4ai/chanel_{uuid.uuid4().hex[:8]}")
+    os.makedirs(profile_dir, exist_ok=True)
+
+    browser_config = BrowserConfig(
+        headless=True,
+        enable_stealth=True,
+        use_persistent_context=True,
+        user_data_dir=profile_dir,
+        viewport_width=1920,
+        viewport_height=1080,
+        user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
+        headers={
+            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+            "Accept-Language": "en-US,en;q=0.9",
+            "Sec-Fetch-Dest": "document",
+            "Sec-Fetch-Mode": "navigate",
+            "Sec-Fetch-Site": "none",
+            "Sec-Fetch-User": "?1",
+        },
+        proxy_config=ProxyConfig(
+            server="https://network.joinmassive.com:65535",
+            username="mpuQHs4sWZ-country-US",
+            password="D0yWxVQo8wQ05RWqz1Bn",
+        ),
+    )
+
+    run_config = CrawlerRunConfig(
+        magic=True,
+        simulate_user=True,
+        override_navigator=True,
+        page_timeout=120000,
+        wait_until="networkidle",
+        delay_before_return_html=15.0,
+    )
+
+    try:
+        async with AsyncWebCrawler(config=browser_config) as crawler:
+            result = await crawler.arun(
+                "https://www.chanel.com/us/fashion/handbags/c/1x1x1/",
+                config=run_config,
+            )
+            print(f"Status: {result.status_code}")
+            print(f"HTML bytes: {len(result.html)}")
+            print(f"\n=== FULL HTML ===\n{result.html}")
+            print(f"\n=== RESPONSE HEADERS ===")
+            if result.response_headers:
+                for k, v in sorted(result.response_headers.items()):
+                    print(f"  {k}: {v}")
+    finally:
+        shutil.rmtree(profile_dir, ignore_errors=True)
+
+
+asyncio.run(main())
--- a/tests/proxy/test_chanel_multi_attempt.py
+++ b/tests/proxy/test_chanel_multi_attempt.py
@@ -0,0 +1,147 @@
+import asyncio
+import os
+import shutil
+import uuid
+from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, UndetectedAdapter
+from crawl4ai.async_crawler_strategy import AsyncPlaywrightCrawlerStrategy
+
+
+async def attempt(label, browser_config, run_config, crawler_strategy=None):
+    print(f"\n{'='*60}")
+    print(f"Attempt: {label}")
+    print(f"{'='*60}")
+
+    url = "https://www.chanel.com/us/fashion/handbags/c/1x1x1/"
+
+    kwargs = {"config": browser_config}
+    if crawler_strategy:
+        kwargs["crawler_strategy"] = crawler_strategy
+
+    try:
+        async with AsyncWebCrawler(**kwargs) as crawler:
+            result = await crawler.arun(url, config=run_config)
+            print(f"Status: {result.status_code}")
+            print(f"Success: {result.success}")
+            print(f"HTML: {len(result.html):,} bytes")
+            if result.markdown:
+                print(f"Markdown: {len(result.markdown.raw_markdown):,} chars")
+            if result.error_message:
+                print(f"Error: {result.error_message}")
+            # Check for anti-bot indicators
+            html_lower = result.html.lower()
+            for indicator in ["access denied", "403", "blocked", "captcha", "challenge"]:
+                if indicator in html_lower:
+                    print(f"  Anti-bot indicator found: '{indicator}'")
+            return result
+    except Exception as e:
+        print(f"Exception: {e}")
+        return None
+
+
+async def main():
+    mac_ua = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
+
+    headers = {
+        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+        "Accept-Language": "en-US,en;q=0.9",
+        "Sec-Fetch-Dest": "document",
+        "Sec-Fetch-Mode": "navigate",
+        "Sec-Fetch-Site": "none",
+        "Sec-Fetch-User": "?1",
+    }
+
+    # ---- Attempt 1: Mac UA + stealth (user's original approach) ----
+    profile1 = os.path.expanduser(f"~/.crawl4ai/chanel_{uuid.uuid4().hex[:8]}")
+    os.makedirs(profile1, exist_ok=True)
+    try:
+        bc1 = BrowserConfig(
+            headless=True,
+            enable_stealth=True,
+            use_persistent_context=True,
+            user_data_dir=profile1,
+            viewport_width=1920,
+            viewport_height=1080,
+            user_agent=mac_ua,
+            headers=headers,
+        )
+        rc1 = CrawlerRunConfig(
+            magic=True,
+            simulate_user=True,
+            override_navigator=True,
+            page_timeout=120000,
+            wait_until="load",
+            delay_before_return_html=10.0,
+        )
+        await attempt("Mac UA + Stealth + Magic (user's script)", bc1, rc1)
+    finally:
+        shutil.rmtree(profile1, ignore_errors=True)
+
+    await asyncio.sleep(3)
+
+    # ---- Attempt 2: Undetected adapter (patchright) ----
+    profile2 = os.path.expanduser(f"~/.crawl4ai/chanel_{uuid.uuid4().hex[:8]}")
+    os.makedirs(profile2, exist_ok=True)
+    try:
+        bc2 = BrowserConfig(
+            headless=True,
+            use_persistent_context=True,
+            user_data_dir=profile2,
+            viewport_width=1920,
+            viewport_height=1080,
+            user_agent=mac_ua,
+            headers=headers,
+        )
+        rc2 = CrawlerRunConfig(
+            simulate_user=True,
+            override_navigator=True,
+            page_timeout=120000,
+            wait_until="load",
+            delay_before_return_html=15.0,
+        )
+        adapter = UndetectedAdapter()
+        strategy = AsyncPlaywrightCrawlerStrategy(
+            browser_config=bc2,
+            browser_adapter=adapter,
+        )
+        await attempt("Undetected Adapter (patchright)", bc2, rc2, crawler_strategy=strategy)
+    finally:
+        shutil.rmtree(profile2, ignore_errors=True)
+
+    await asyncio.sleep(3)
+
+    # ---- Attempt 3: Longer delay + networkidle ----
+    profile3 = os.path.expanduser(f"~/.crawl4ai/chanel_{uuid.uuid4().hex[:8]}")
+    os.makedirs(profile3, exist_ok=True)
+    try:
+        bc3 = BrowserConfig(
+            headless=True,
+            enable_stealth=True,
+            use_persistent_context=True,
+            user_data_dir=profile3,
+            viewport_width=1920,
+            viewport_height=1080,
+            user_agent=mac_ua,
+            headers=headers,
+        )
+        rc3 = CrawlerRunConfig(
+            magic=True,
+            simulate_user=True,
+            override_navigator=True,
+            page_timeout=120000,
+            wait_until="networkidle",
+            delay_before_return_html=20.0,
+            js_code="""
+            // Simulate human-like scrolling
+            await new Promise(r => setTimeout(r, 2000));
+            window.scrollTo({top: 300, behavior: 'smooth'});
+            await new Promise(r => setTimeout(r, 1500));
+            window.scrollTo({top: 600, behavior: 'smooth'});
+            await new Promise(r => setTimeout(r, 1000));
+            """,
+        )
+        await attempt("Stealth + networkidle + scroll + 20s delay", bc3, rc3)
+    finally:
+        shutil.rmtree(profile3, ignore_errors=True)
+
+
+asyncio.run(main())
--- a/tests/proxy/test_chanel_xvfb.py
+++ b/tests/proxy/test_chanel_xvfb.py
@@ -0,0 +1,62 @@
+import asyncio
+import os
+import shutil
+import uuid
+from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
+
+
+async def crawl_chanel(url: str):
+    profile_dir = os.path.expanduser(f"~/.crawl4ai/chanel_{uuid.uuid4().hex[:8]}")
+    os.makedirs(profile_dir, exist_ok=True)
+
+    browser_config = BrowserConfig(
+        headless=False,  # Non-headless via Xvfb - harder to detect
+        enable_stealth=True,
+        use_persistent_context=True,
+        user_data_dir=profile_dir,
+        viewport_width=1920,
+        viewport_height=1080,
+        user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
+        headers={
+            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+            "Accept-Language": "en-US,en;q=0.9",
+            "Sec-Fetch-Dest": "document",
+            "Sec-Fetch-Mode": "navigate",
+            "Sec-Fetch-Site": "none",
+            "Sec-Fetch-User": "?1",
+        }
+    )
+
+    run_config = CrawlerRunConfig(
+        magic=True,
+        simulate_user=True,
+        override_navigator=True,
+        page_timeout=120000,
+        wait_until="load",
+        delay_before_return_html=10.0,
+    )
+
+    try:
+        async with AsyncWebCrawler(config=browser_config) as crawler:
+            result = await crawler.arun(url, config=run_config)
+            return result
+    finally:
+        shutil.rmtree(profile_dir, ignore_errors=True)
+
+
+async def main():
+    url = "https://www.chanel.com/us/fashion/handbags/c/1x1x1/"
+    result = await crawl_chanel(url)
+    print(f"Status: {result.status_code}")
+    print(f"Success: {result.success}")
+    print(f"HTML: {len(result.html):,} bytes")
+    if result.markdown:
+        md_len = len(result.markdown.raw_markdown)
+        print(f"Markdown: {md_len:,} chars")
+        if md_len > 500:
+            print(f"\nFirst 500 chars of markdown:\n{result.markdown.raw_markdown[:500]}")
+    if result.error_message:
+        print(f"Error: {result.error_message}")
+
+
+asyncio.run(main())
--- a/tests/proxy/test_persistent_proxy.py
+++ b/tests/proxy/test_persistent_proxy.py
@@ -0,0 +1,68 @@
+import asyncio
+import os
+import shutil
+import uuid
+from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
+from crawl4ai.async_configs import ProxyConfig
+
+
+async def crawl_chanel(url: str):
+    profile_dir = os.path.expanduser(f"~/.crawl4ai/chanel_{uuid.uuid4().hex[:8]}")
+    os.makedirs(profile_dir, exist_ok=True)
+
+    browser_config = BrowserConfig(
+        headless=True,
+        enable_stealth=True,
+        use_persistent_context=True,
+        user_data_dir=profile_dir,
+        viewport_width=1920,
+        viewport_height=1080,
+        user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
+        headers={
+            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+            "Accept-Language": "en-US,en;q=0.9",
+            "Sec-Fetch-Dest": "document",
+            "Sec-Fetch-Mode": "navigate",
+            "Sec-Fetch-Site": "none",
+            "Sec-Fetch-User": "?1",
+        },
+        proxy_config=ProxyConfig(
+            server="https://network.joinmassive.com:65535",
+            username="mpuQHs4sWZ-country-US",
+            password="D0yWxVQo8wQ05RWqz1Bn",
+        ),
+    )
+
+    run_config = CrawlerRunConfig(
+        magic=True,
+        simulate_user=True,
+        override_navigator=True,
+        page_timeout=120000,
+        wait_until="load",
+        delay_before_return_html=10.0,
+    )
+
+    try:
+        async with AsyncWebCrawler(config=browser_config) as crawler:
+            result = await crawler.arun(url, config=run_config)
+            return result
+    finally:
+        shutil.rmtree(profile_dir, ignore_errors=True)
+
+
+async def main():
+    url = "https://www.chanel.com/us/fashion/handbags/c/1x1x1/"
+    result = await crawl_chanel(url)
+    print(f"Status: {result.status_code}")
+    print(f"Success: {result.success}")
+    print(f"HTML: {len(result.html):,} bytes")
+    if result.markdown:
+        md_len = len(result.markdown.raw_markdown)
+        print(f"Markdown: {md_len:,} chars")
+        if md_len > 500:
+            print(f"\nFirst 500 chars of markdown:\n{result.markdown.raw_markdown[:500]}")
+    if result.error_message:
+        print(f"Error: {result.error_message}")
+
+
+asyncio.run(main())
--- a/tests/proxy/test_platform_match.py
+++ b/tests/proxy/test_platform_match.py
@@ -0,0 +1,84 @@
+"""Test if matching UA to actual platform fixes Akamai detection."""
+import asyncio
+import os
+import shutil
+import uuid
+from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
+from crawl4ai.async_configs import ProxyConfig
+
+
+async def test_with_ua(label, user_agent):
+    print(f"\n{'='*60}")
+    print(f"Test: {label}")
+    print(f"{'='*60}")
+
+    profile_dir = os.path.expanduser(f"~/.crawl4ai/test_{uuid.uuid4().hex[:8]}")
+    os.makedirs(profile_dir, exist_ok=True)
+
+    browser_config = BrowserConfig(
+        headless=True,
+        enable_stealth=True,
+        use_persistent_context=True,
+        user_data_dir=profile_dir,
+        viewport_width=1920,
+        viewport_height=1080,
+        user_agent=user_agent,
+        headers={
+            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+            "Accept-Language": "en-US,en;q=0.9",
+            "Sec-Fetch-Dest": "document",
+            "Sec-Fetch-Mode": "navigate",
+            "Sec-Fetch-Site": "none",
+            "Sec-Fetch-User": "?1",
+        },
+        proxy_config=ProxyConfig(
+            server="https://network.joinmassive.com:65535",
+            username="mpuQHs4sWZ-country-US",
+            password="D0yWxVQo8wQ05RWqz1Bn",
+        ),
+    )
+
+    run_config = CrawlerRunConfig(
+        magic=True,
+        simulate_user=True,
+        override_navigator=True,
+        page_timeout=120000,
+        wait_until="load",
+        delay_before_return_html=10.0,
+    )
+
+    try:
+        async with AsyncWebCrawler(config=browser_config) as crawler:
+            result = await crawler.arun(
+                "https://www.chanel.com/us/fashion/handbags/c/1x1x1/",
+                config=run_config,
+            )
+            print(f"  Status: {result.status_code}")
+            print(f"  HTML bytes: {len(result.html)}")
+            blocked = "access denied" in result.html.lower()
+            print(f"  Blocked: {blocked}")
+            if not blocked and len(result.html) > 1000:
+                print(f"  SUCCESS! Got real content")
+    except Exception as e:
+        print(f"  EXCEPTION: {e}")
+    finally:
+        shutil.rmtree(profile_dir, ignore_errors=True)
+
+
+async def main():
+    # Mac UA on Linux = platform mismatch
+    await test_with_ua(
+        "Mac UA (mismatched platform)",
+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
+    )
+
+    await asyncio.sleep(3)
+
+    # Linux UA = matches actual navigator.platform
+    await test_with_ua(
+        "Linux UA (matching platform)",
+        "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
+    )
+
+
+asyncio.run(main())
--- a/tests/proxy/test_proxy_regression.py
+++ b/tests/proxy/test_proxy_regression.py
@@ -0,0 +1,96 @@
+"""Regression tests for proxy fix:
+1. Persistent context + proxy (new path via launch_persistent_context)
+2. Persistent context WITHOUT proxy (should still use launch_persistent_context)
+3. Non-persistent + proxy on CrawlerRunConfig (existing path, must not break)
+4. Non-persistent, no proxy (basic crawl, must not break)
+"""
+import asyncio
+import os
+import shutil
+import uuid
+from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
+from crawl4ai.async_configs import ProxyConfig
+
+TEST_URL = "https://httpbin.org/ip"  # Simple endpoint, returns IP
+
+
+async def test(label, browser_config, run_config=None):
+    print(f"\n{'='*60}")
+    print(f"Test: {label}")
+    print(f"{'='*60}")
+    run_config = run_config or CrawlerRunConfig()
+    try:
+        async with AsyncWebCrawler(config=browser_config) as crawler:
+            result = await crawler.arun(TEST_URL, config=run_config)
+            print(f"  Status: {result.status_code}")
+            print(f"  HTML bytes: {len(result.html)}")
+            if result.markdown:
+                # httpbin.org/ip returns JSON with "origin" key
+                md = result.markdown.raw_markdown.strip()
+                print(f"  Content: {md[:200]}")
+            if result.error_message:
+                print(f"  ERROR: {result.error_message}")
+            return result
+    except Exception as e:
+        print(f"  EXCEPTION: {e}")
+        return None
+
+
+async def main():
+    proxy = ProxyConfig(
+        server="https://network.joinmassive.com:65535",
+        username="mpuQHs4sWZ-country-US",
+        password="D0yWxVQo8wQ05RWqz1Bn",
+    )
+
+    # 1. Persistent context + proxy (the fixed path)
+    pd = os.path.expanduser(f"~/.crawl4ai/test_{uuid.uuid4().hex[:8]}")
+    os.makedirs(pd, exist_ok=True)
+    try:
+        await test(
+            "Persistent + proxy (launch_persistent_context)",
+            BrowserConfig(
+                headless=True,
+                use_persistent_context=True,
+                user_data_dir=pd,
+                proxy_config=proxy,
+            ),
+        )
+    finally:
+        shutil.rmtree(pd, ignore_errors=True)
+
+    # 2. Persistent context WITHOUT proxy
+    pd2 = os.path.expanduser(f"~/.crawl4ai/test_{uuid.uuid4().hex[:8]}")
+    os.makedirs(pd2, exist_ok=True)
+    try:
+        await test(
+            "Persistent, no proxy (launch_persistent_context)",
+            BrowserConfig(
+                headless=True,
+                use_persistent_context=True,
+                user_data_dir=pd2,
+            ),
+        )
+    finally:
+        shutil.rmtree(pd2, ignore_errors=True)
+
+    # 3. Non-persistent + proxy on CrawlerRunConfig
+    await test(
+        "Non-persistent + proxy on RunConfig",
+        BrowserConfig(headless=True),
+        CrawlerRunConfig(
+            proxy_config=proxy,
+        ),
+    )
+
+    # 4. Basic crawl - no proxy, no persistent
+    await test(
+        "Basic crawl (no proxy, no persistent)",
+        BrowserConfig(headless=True),
+    )
+
+    print("\n" + "="*60)
+    print("All regression tests complete.")
+
+
+asyncio.run(main())