From fdd989785fa8aa3ccdaa6175a8ce8db54b9a100f Mon Sep 17 00:00:00 2001
From: unclecode <unclecode@kidocode.com>
Date: Fri, 13 Feb 2026 04:10:47 +0000
Subject: [PATCH] Sync sec-ch-ua with User-Agent and keep WebGL alive in
 stealth mode

Fix a bug where magic mode and per-request UA overrides would change
the User-Agent header without updating the sec-ch-ua (browser hint)
header to match. Anti-bot systems like Akamai detect this mismatch
as a bot signal.

Changes:
- Regenerate browser_hint via UAGen.generate_client_hints() whenever
  the UA is changed at crawl time (magic mode or explicit override)
- Re-apply updated headers to the page via set_extra_http_headers()
- Skip per-crawl UA override for persistent contexts where the UA is
  locked at launch time by Playwright's protocol layer
- Move --disable-gpu flags behind enable_stealth check so WebGL works
  via SwiftShader when stealth mode is active (missing WebGL is a
  detectable headless signal)
- Clean up old test scripts, add clean anti-bot test
---
 crawl4ai/async_crawler_strategy.py       |  40 ++++--
 crawl4ai/browser_manager.py              |  11 +-
 tests/proxy/test_chanel_basic.py         |  61 ----------
 tests/proxy/test_chanel_cdp_proxy.py     | 112 +++++++++++++++++
 tests/proxy/test_chanel_debug.py         |  62 ----------
 tests/proxy/test_chanel_multi_attempt.py | 147 -----------------------
 tests/proxy/test_chanel_xvfb.py          |  62 ----------
 tests/proxy/test_platform_match.py       |  84 -------------
 tests/proxy/test_proxy_verify.py         | 109 +++++++++++++++++
 9 files changed, 261 insertions(+), 427 deletions(-)
 delete mode 100644 tests/proxy/test_chanel_basic.py
 create mode 100644 tests/proxy/test_chanel_cdp_proxy.py
 delete mode 100644 tests/proxy/test_chanel_debug.py
 delete mode 100644 tests/proxy/test_chanel_multi_attempt.py
 delete mode 100644 tests/proxy/test_chanel_xvfb.py
 delete mode 100644 tests/proxy/test_platform_match.py
 create mode 100644 tests/proxy/test_proxy_verify.py

diff --git a/crawl4ai/async_crawler_strategy.py b/crawl4ai/async_crawler_strategy.py
index 9ac3f7fc..858f4cfc 100644
--- a/crawl4ai/async_crawler_strategy.py
+++ b/crawl4ai/async_crawler_strategy.py
@@ -19,7 +19,7 @@ from .config import SCREENSHOT_HEIGHT_TRESHOLD
 from .async_configs import BrowserConfig, CrawlerRunConfig, HTTPCrawlerConfig
 from .async_logger import AsyncLogger
 from .ssl_certificate import SSLCertificate
-from .user_agent_generator import ValidUAGenerator
+from .user_agent_generator import ValidUAGenerator, UAGen
 from .browser_manager import BrowserManager
 from .browser_adapter import BrowserAdapter, PlaywrightAdapter, UndetectedAdapter
 
@@ -534,18 +534,42 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
         captured_requests = []
         captured_console = []
 
-        # Handle user agent with magic mode
-        user_agent_to_override = config.user_agent
-        if user_agent_to_override:
-            self.browser_config.user_agent = user_agent_to_override
-        elif config.magic or config.user_agent_mode == "random":
-            self.browser_config.user_agent = ValidUAGenerator().generate(
-                **(config.user_agent_generator_config or {})
+        # Handle user agent with magic mode.
+        # For persistent contexts the UA is locked at browser launch time
+        # (launch_persistent_context bakes it into the protocol layer), so
+        # changing it here would only desync browser_config from reality.
+        # Users should set user_agent or user_agent_mode on BrowserConfig.
+        ua_changed = False
+        if not self.browser_config.use_persistent_context:
+            user_agent_to_override = config.user_agent
+            if user_agent_to_override:
+                self.browser_config.user_agent = user_agent_to_override
+                ua_changed = True
+            elif config.magic or config.user_agent_mode == "random":
+                self.browser_config.user_agent = ValidUAGenerator().generate(
+                    **(config.user_agent_generator_config or {})
+                )
+                ua_changed = True
+
+        # Keep sec-ch-ua in sync whenever the UA changed
+        if ua_changed:
+            self.browser_config.browser_hint = UAGen.generate_client_hints(
+                self.browser_config.user_agent
             )
+            self.browser_config.headers["sec-ch-ua"] = self.browser_config.browser_hint
 
         # Get page for session
         page, context = await self.browser_manager.get_page(crawlerRunConfig=config)
 
+        # Push updated UA + sec-ch-ua to the page so the server sees them
+        if ua_changed:
+            combined_headers = {
+                "User-Agent": self.browser_config.user_agent,
+                "sec-ch-ua": self.browser_config.browser_hint,
+            }
+            combined_headers.update(self.browser_config.headers)
+            await page.set_extra_http_headers(combined_headers)
+
         # await page.goto(URL)
 
         # Add default cookie
diff --git a/crawl4ai/browser_manager.py b/crawl4ai/browser_manager.py
index 51e8f99b..2296d076 100644
--- a/crawl4ai/browser_manager.py
+++ b/crawl4ai/browser_manager.py
@@ -70,9 +70,6 @@ class ManagedBrowser:
     def build_browser_flags(config: BrowserConfig) -> List[str]:
         """Common CLI flags for launching Chromium"""
         flags = [
-            "--disable-gpu",
-            "--disable-gpu-compositing",
-            "--disable-software-rasterizer",
             "--no-sandbox",
             "--disable-dev-shm-usage",
             "--no-first-run",
@@ -93,6 +90,14 @@ class ManagedBrowser:
             "--disable-component-update",
             "--disable-domain-reliability",
         ]
+        # GPU flags disable WebGL which anti-bot sensors detect as headless.
+        # Keep WebGL working (via SwiftShader) when stealth mode is active.
+        if not config.enable_stealth:
+            flags.extend([
+                "--disable-gpu",
+                "--disable-gpu-compositing",
+                "--disable-software-rasterizer",
+            ])
         if config.memory_saving_mode:
             flags.extend([
                 "--aggressive-cache-discard",
diff --git a/tests/proxy/test_chanel_basic.py b/tests/proxy/test_chanel_basic.py
deleted file mode 100644
index 86201c5c..00000000
--- a/tests/proxy/test_chanel_basic.py
+++ /dev/null
@@ -1,61 +0,0 @@
-import asyncio
-import os
-import shutil
-import uuid
-from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
-
-
-async def crawl_chanel(url: str):
-    # Fresh profile each time (gets flagged after one use)
-    profile_dir = os.path.expanduser(f"~/.crawl4ai/chanel_{uuid.uuid4().hex[:8]}")
-    os.makedirs(profile_dir, exist_ok=True)
-
-    browser_config = BrowserConfig(
-        headless=True,
-        enable_stealth=True,
-        use_persistent_context=True,
-        user_data_dir=profile_dir,
-        viewport_width=1920,
-        viewport_height=1080,
-        user_agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
-        headers={
-            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
-            "Accept-Language": "en-US,en;q=0.9",
-            "Sec-Fetch-Dest": "document",
-            "Sec-Fetch-Mode": "navigate",
-            "Sec-Fetch-Site": "none",
-            "Sec-Fetch-User": "?1",
-        }
-    )
-
-    run_config = CrawlerRunConfig(
-        magic=True,
-        simulate_user=True,
-        override_navigator=True,
-        page_timeout=120000,
-        wait_until="load",
-        delay_before_return_html=10.0,
-    )
-
-    try:
-        async with AsyncWebCrawler(config=browser_config) as crawler:
-            result = await crawler.arun(url, config=run_config)
-            return result
-    finally:
-        shutil.rmtree(profile_dir, ignore_errors=True)
-
-
-async def main():
-    url = "https://www.chanel.com/us/fashion/handbags/c/1x1x1/"
-    result = await crawl_chanel(url)
-    print(f"Status: {result.status_code}")
-    print(f"Success: {result.success}")
-    print(f"HTML: {len(result.html):,} bytes")
-    if result.markdown:
-        md_len = len(result.markdown.raw_markdown)
-        print(f"Markdown: {md_len:,} chars")
-    if result.error_message:
-        print(f"Error: {result.error_message}")
-
-
-asyncio.run(main())
diff --git a/tests/proxy/test_chanel_cdp_proxy.py b/tests/proxy/test_chanel_cdp_proxy.py
new file mode 100644
index 00000000..3fc90bb1
--- /dev/null
+++ b/tests/proxy/test_chanel_cdp_proxy.py
@@ -0,0 +1,112 @@
+"""
+Test: Chanel.com anti-bot bypass via crawl4ai
+
+Requires env vars:
+  MASSIVE_USERNAME  — Massive residential proxy username
+  MASSIVE_PASSWORD  — Massive residential proxy password
+
+Optional:
+  --cdp URL       Connect to external browser via CDP (e.g. http://localhost:9223)
+  --attempts N    Number of attempts per test (default 3)
+
+Usage:
+  export MASSIVE_USERNAME="your_user"
+  export MASSIVE_PASSWORD="your_pass"
+  .venv/bin/python tests/proxy/test_chanel_cdp_proxy.py
+  .venv/bin/python tests/proxy/test_chanel_cdp_proxy.py --cdp http://localhost:9223
+"""
+
+import asyncio
+import os
+import sys
+import re
+import tempfile
+import shutil
+from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
+from crawl4ai.async_configs import ProxyConfig
+
+URL = "https://www.chanel.com/us/fashion/handbags/c/1x1x1/"
+
+MASSIVE_USERNAME = os.environ.get("MASSIVE_USERNAME", "")
+MASSIVE_PASSWORD = os.environ.get("MASSIVE_PASSWORD", "")
+MASSIVE_SERVER = "https://network.joinmassive.com:65535"
+
+
+def get_proxy_config():
+    if not MASSIVE_USERNAME or not MASSIVE_PASSWORD:
+        print("ERROR: Set MASSIVE_USERNAME and MASSIVE_PASSWORD env vars")
+        sys.exit(1)
+    return ProxyConfig(
+        server=MASSIVE_SERVER,
+        username=MASSIVE_USERNAME,
+        password=MASSIVE_PASSWORD,
+    )
+
+
+async def test_isolated_context(cdp_url: str = None, attempts: int = 3):
+    """Test with isolated context (works with both Playwright and CDP)."""
+    mode = f"CDP ({cdp_url})" if cdp_url else "Playwright Chromium"
+    print(f"\n{'='*60}")
+    print(f"Mode: Isolated context — {mode}")
+    print(f"{'='*60}\n")
+
+    kwargs = dict(
+        enable_stealth=True,
+        create_isolated_context=True,
+        viewport_width=1920,
+        viewport_height=1080,
+    )
+    if cdp_url:
+        kwargs["cdp_url"] = cdp_url
+    else:
+        kwargs["headless"] = True
+
+    config = BrowserConfig(**kwargs)
+    run_config = CrawlerRunConfig(
+        magic=True,
+        simulate_user=True,
+        override_navigator=True,
+        proxy_config=get_proxy_config(),
+        page_timeout=120000,
+        wait_until="load",
+        delay_before_return_html=15.0,
+    )
+
+    passed = 0
+    async with AsyncWebCrawler(config=config) as crawler:
+        for i in range(attempts):
+            result = await crawler.arun(URL, config=run_config)
+            ok = result.status_code == 200 and len(result.html) > 10000
+            title = ""
+            if ok:
+                passed += 1
+                m = re.search(r"<title>(.*?)</title>", result.html)
+                title = f"  title={m.group(1)}" if m else ""
+            print(f"  Attempt {i+1}: status={result.status_code}  html={len(result.html):>10,} bytes  {'PASS' if ok else 'FAIL'}{title}")
+
+    print(f"\nResult: {passed}/{attempts} passed")
+    return passed > 0
+
+
+async def main():
+    cdp_url = None
+    attempts = 3
+
+    args = sys.argv[1:]
+    for j, arg in enumerate(args):
+        if arg == "--cdp" and j + 1 < len(args):
+            cdp_url = args[j + 1]
+        if arg == "--attempts" and j + 1 < len(args):
+            attempts = int(args[j + 1])
+
+    ok = await test_isolated_context(cdp_url=cdp_url, attempts=attempts)
+
+    print(f"\n{'='*60}")
+    print(f"Result: {'PASS' if ok else 'FAIL'}")
+    print(f"{'='*60}")
+    return ok
+
+
+if __name__ == "__main__":
+    ok = asyncio.run(main())
+    sys.exit(0 if ok else 1)
diff --git a/tests/proxy/test_chanel_debug.py b/tests/proxy/test_chanel_debug.py
deleted file mode 100644
index a656d512..00000000
--- a/tests/proxy/test_chanel_debug.py
+++ /dev/null
@@ -1,62 +0,0 @@
-import asyncio
-import os
-import shutil
-import uuid
-from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
-from crawl4ai.async_configs import ProxyConfig
-
-
-async def main():
-    profile_dir = os.path.expanduser(f"~/.crawl4ai/chanel_{uuid.uuid4().hex[:8]}")
-    os.makedirs(profile_dir, exist_ok=True)
-
-    browser_config = BrowserConfig(
-        headless=True,
-        enable_stealth=True,
-        use_persistent_context=True,
-        user_data_dir=profile_dir,
-        viewport_width=1920,
-        viewport_height=1080,
-        user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
-        headers={
-            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
-            "Accept-Language": "en-US,en;q=0.9",
-            "Sec-Fetch-Dest": "document",
-            "Sec-Fetch-Mode": "navigate",
-            "Sec-Fetch-Site": "none",
-            "Sec-Fetch-User": "?1",
-        },
-        proxy_config=ProxyConfig(
-            server="https://network.joinmassive.com:65535",
-            username="mpuQHs4sWZ-country-US",
-            password="D0yWxVQo8wQ05RWqz1Bn",
-        ),
-    )
-
-    run_config = CrawlerRunConfig(
-        magic=True,
-        simulate_user=True,
-        override_navigator=True,
-        page_timeout=120000,
-        wait_until="networkidle",
-        delay_before_return_html=15.0,
-    )
-
-    try:
-        async with AsyncWebCrawler(config=browser_config) as crawler:
-            result = await crawler.arun(
-                "https://www.chanel.com/us/fashion/handbags/c/1x1x1/",
-                config=run_config,
-            )
-            print(f"Status: {result.status_code}")
-            print(f"HTML bytes: {len(result.html)}")
-            print(f"\n=== FULL HTML ===\n{result.html}")
-            print(f"\n=== RESPONSE HEADERS ===")
-            if result.response_headers:
-                for k, v in sorted(result.response_headers.items()):
-                    print(f"  {k}: {v}")
-    finally:
-        shutil.rmtree(profile_dir, ignore_errors=True)
-
-
-asyncio.run(main())
diff --git a/tests/proxy/test_chanel_multi_attempt.py b/tests/proxy/test_chanel_multi_attempt.py
deleted file mode 100644
index a02b97aa..00000000
--- a/tests/proxy/test_chanel_multi_attempt.py
+++ /dev/null
@@ -1,147 +0,0 @@
-import asyncio
-import os
-import shutil
-import uuid
-from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, UndetectedAdapter
-from crawl4ai.async_crawler_strategy import AsyncPlaywrightCrawlerStrategy
-
-
-async def attempt(label, browser_config, run_config, crawler_strategy=None):
-    print(f"\n{'='*60}")
-    print(f"Attempt: {label}")
-    print(f"{'='*60}")
-
-    url = "https://www.chanel.com/us/fashion/handbags/c/1x1x1/"
-
-    kwargs = {"config": browser_config}
-    if crawler_strategy:
-        kwargs["crawler_strategy"] = crawler_strategy
-
-    try:
-        async with AsyncWebCrawler(**kwargs) as crawler:
-            result = await crawler.arun(url, config=run_config)
-            print(f"Status: {result.status_code}")
-            print(f"Success: {result.success}")
-            print(f"HTML: {len(result.html):,} bytes")
-            if result.markdown:
-                print(f"Markdown: {len(result.markdown.raw_markdown):,} chars")
-            if result.error_message:
-                print(f"Error: {result.error_message}")
-            # Check for anti-bot indicators
-            html_lower = result.html.lower()
-            for indicator in ["access denied", "403", "blocked", "captcha", "challenge"]:
-                if indicator in html_lower:
-                    print(f"  Anti-bot indicator found: '{indicator}'")
-            return result
-    except Exception as e:
-        print(f"Exception: {e}")
-        return None
-
-
-async def main():
-    mac_ua = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
-
-    headers = {
-        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
-        "Accept-Language": "en-US,en;q=0.9",
-        "Sec-Fetch-Dest": "document",
-        "Sec-Fetch-Mode": "navigate",
-        "Sec-Fetch-Site": "none",
-        "Sec-Fetch-User": "?1",
-    }
-
-    # ---- Attempt 1: Mac UA + stealth (user's original approach) ----
-    profile1 = os.path.expanduser(f"~/.crawl4ai/chanel_{uuid.uuid4().hex[:8]}")
-    os.makedirs(profile1, exist_ok=True)
-    try:
-        bc1 = BrowserConfig(
-            headless=True,
-            enable_stealth=True,
-            use_persistent_context=True,
-            user_data_dir=profile1,
-            viewport_width=1920,
-            viewport_height=1080,
-            user_agent=mac_ua,
-            headers=headers,
-        )
-        rc1 = CrawlerRunConfig(
-            magic=True,
-            simulate_user=True,
-            override_navigator=True,
-            page_timeout=120000,
-            wait_until="load",
-            delay_before_return_html=10.0,
-        )
-        await attempt("Mac UA + Stealth + Magic (user's script)", bc1, rc1)
-    finally:
-        shutil.rmtree(profile1, ignore_errors=True)
-
-    await asyncio.sleep(3)
-
-    # ---- Attempt 2: Undetected adapter (patchright) ----
-    profile2 = os.path.expanduser(f"~/.crawl4ai/chanel_{uuid.uuid4().hex[:8]}")
-    os.makedirs(profile2, exist_ok=True)
-    try:
-        bc2 = BrowserConfig(
-            headless=True,
-            use_persistent_context=True,
-            user_data_dir=profile2,
-            viewport_width=1920,
-            viewport_height=1080,
-            user_agent=mac_ua,
-            headers=headers,
-        )
-        rc2 = CrawlerRunConfig(
-            simulate_user=True,
-            override_navigator=True,
-            page_timeout=120000,
-            wait_until="load",
-            delay_before_return_html=15.0,
-        )
-        adapter = UndetectedAdapter()
-        strategy = AsyncPlaywrightCrawlerStrategy(
-            browser_config=bc2,
-            browser_adapter=adapter,
-        )
-        await attempt("Undetected Adapter (patchright)", bc2, rc2, crawler_strategy=strategy)
-    finally:
-        shutil.rmtree(profile2, ignore_errors=True)
-
-    await asyncio.sleep(3)
-
-    # ---- Attempt 3: Longer delay + networkidle ----
-    profile3 = os.path.expanduser(f"~/.crawl4ai/chanel_{uuid.uuid4().hex[:8]}")
-    os.makedirs(profile3, exist_ok=True)
-    try:
-        bc3 = BrowserConfig(
-            headless=True,
-            enable_stealth=True,
-            use_persistent_context=True,
-            user_data_dir=profile3,
-            viewport_width=1920,
-            viewport_height=1080,
-            user_agent=mac_ua,
-            headers=headers,
-        )
-        rc3 = CrawlerRunConfig(
-            magic=True,
-            simulate_user=True,
-            override_navigator=True,
-            page_timeout=120000,
-            wait_until="networkidle",
-            delay_before_return_html=20.0,
-            js_code="""
-            // Simulate human-like scrolling
-            await new Promise(r => setTimeout(r, 2000));
-            window.scrollTo({top: 300, behavior: 'smooth'});
-            await new Promise(r => setTimeout(r, 1500));
-            window.scrollTo({top: 600, behavior: 'smooth'});
-            await new Promise(r => setTimeout(r, 1000));
-            """,
-        )
-        await attempt("Stealth + networkidle + scroll + 20s delay", bc3, rc3)
-    finally:
-        shutil.rmtree(profile3, ignore_errors=True)
-
-
-asyncio.run(main())
diff --git a/tests/proxy/test_chanel_xvfb.py b/tests/proxy/test_chanel_xvfb.py
deleted file mode 100644
index c9221c17..00000000
--- a/tests/proxy/test_chanel_xvfb.py
+++ /dev/null
@@ -1,62 +0,0 @@
-import asyncio
-import os
-import shutil
-import uuid
-from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
-
-
-async def crawl_chanel(url: str):
-    profile_dir = os.path.expanduser(f"~/.crawl4ai/chanel_{uuid.uuid4().hex[:8]}")
-    os.makedirs(profile_dir, exist_ok=True)
-
-    browser_config = BrowserConfig(
-        headless=False,  # Non-headless via Xvfb - harder to detect
-        enable_stealth=True,
-        use_persistent_context=True,
-        user_data_dir=profile_dir,
-        viewport_width=1920,
-        viewport_height=1080,
-        user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
-        headers={
-            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
-            "Accept-Language": "en-US,en;q=0.9",
-            "Sec-Fetch-Dest": "document",
-            "Sec-Fetch-Mode": "navigate",
-            "Sec-Fetch-Site": "none",
-            "Sec-Fetch-User": "?1",
-        }
-    )
-
-    run_config = CrawlerRunConfig(
-        magic=True,
-        simulate_user=True,
-        override_navigator=True,
-        page_timeout=120000,
-        wait_until="load",
-        delay_before_return_html=10.0,
-    )
-
-    try:
-        async with AsyncWebCrawler(config=browser_config) as crawler:
-            result = await crawler.arun(url, config=run_config)
-            return result
-    finally:
-        shutil.rmtree(profile_dir, ignore_errors=True)
-
-
-async def main():
-    url = "https://www.chanel.com/us/fashion/handbags/c/1x1x1/"
-    result = await crawl_chanel(url)
-    print(f"Status: {result.status_code}")
-    print(f"Success: {result.success}")
-    print(f"HTML: {len(result.html):,} bytes")
-    if result.markdown:
-        md_len = len(result.markdown.raw_markdown)
-        print(f"Markdown: {md_len:,} chars")
-        if md_len > 500:
-            print(f"\nFirst 500 chars of markdown:\n{result.markdown.raw_markdown[:500]}")
-    if result.error_message:
-        print(f"Error: {result.error_message}")
-
-
-asyncio.run(main())
diff --git a/tests/proxy/test_platform_match.py b/tests/proxy/test_platform_match.py
deleted file mode 100644
index 3e503242..00000000
--- a/tests/proxy/test_platform_match.py
+++ /dev/null
@@ -1,84 +0,0 @@
-"""Test if matching UA to actual platform fixes Akamai detection."""
-import asyncio
-import os
-import shutil
-import uuid
-from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
-from crawl4ai.async_configs import ProxyConfig
-
-
-async def test_with_ua(label, user_agent):
-    print(f"\n{'='*60}")
-    print(f"Test: {label}")
-    print(f"{'='*60}")
-
-    profile_dir = os.path.expanduser(f"~/.crawl4ai/test_{uuid.uuid4().hex[:8]}")
-    os.makedirs(profile_dir, exist_ok=True)
-
-    browser_config = BrowserConfig(
-        headless=True,
-        enable_stealth=True,
-        use_persistent_context=True,
-        user_data_dir=profile_dir,
-        viewport_width=1920,
-        viewport_height=1080,
-        user_agent=user_agent,
-        headers={
-            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
-            "Accept-Language": "en-US,en;q=0.9",
-            "Sec-Fetch-Dest": "document",
-            "Sec-Fetch-Mode": "navigate",
-            "Sec-Fetch-Site": "none",
-            "Sec-Fetch-User": "?1",
-        },
-        proxy_config=ProxyConfig(
-            server="https://network.joinmassive.com:65535",
-            username="mpuQHs4sWZ-country-US",
-            password="D0yWxVQo8wQ05RWqz1Bn",
-        ),
-    )
-
-    run_config = CrawlerRunConfig(
-        magic=True,
-        simulate_user=True,
-        override_navigator=True,
-        page_timeout=120000,
-        wait_until="load",
-        delay_before_return_html=10.0,
-    )
-
-    try:
-        async with AsyncWebCrawler(config=browser_config) as crawler:
-            result = await crawler.arun(
-                "https://www.chanel.com/us/fashion/handbags/c/1x1x1/",
-                config=run_config,
-            )
-            print(f"  Status: {result.status_code}")
-            print(f"  HTML bytes: {len(result.html)}")
-            blocked = "access denied" in result.html.lower()
-            print(f"  Blocked: {blocked}")
-            if not blocked and len(result.html) > 1000:
-                print(f"  SUCCESS! Got real content")
-    except Exception as e:
-        print(f"  EXCEPTION: {e}")
-    finally:
-        shutil.rmtree(profile_dir, ignore_errors=True)
-
-
-async def main():
-    # Mac UA on Linux = platform mismatch
-    await test_with_ua(
-        "Mac UA (mismatched platform)",
-        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
-    )
-
-    await asyncio.sleep(3)
-
-    # Linux UA = matches actual navigator.platform
-    await test_with_ua(
-        "Linux UA (matching platform)",
-        "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
-    )
-
-
-asyncio.run(main())
diff --git a/tests/proxy/test_proxy_verify.py b/tests/proxy/test_proxy_verify.py
new file mode 100644
index 00000000..bf9b4f4d
--- /dev/null
+++ b/tests/proxy/test_proxy_verify.py
@@ -0,0 +1,109 @@
+"""
+Verify proxies are working and check what IPs they resolve to.
+Then test Chanel through NST proxy (different provider).
+"""
+import requests
+
+# Check our real IP
+def check_ip(label, proxy=None):
+    print(f"\n--- {label} ---")
+    try:
+        kwargs = {"url": "https://httpbin.org/ip", "timeout": 15}
+        if proxy:
+            kwargs["proxies"] = {"https": proxy, "http": proxy}
+        resp = requests.get(**kwargs)
+        print(f"  IP: {resp.json()}")
+    except Exception as e:
+        print(f"  ERROR: {e}")
+
+# Get NST proxy credentials
+def get_nst_proxy(channel_id):
+    token = "NSTPROXY-DA9C7A614946EA8FCEFDA9FD3B3F4A9D"
+    api_url = f"https://api.nstproxy.com/api/v1/generate/apiproxies?count=1&country=US&protocol=http&sessionDuration=0&channelId={channel_id}&token={token}"
+    print(f"\nFetching NST proxy ({channel_id[:8]}...):")
+    print(f"  URL: {api_url}")
+    try:
+        resp = requests.get(api_url, timeout=15)
+        print(f"  HTTP {resp.status_code}")
+        print(f"  Body: {resp.text[:500]}")
+        data = resp.json()
+        if data.get("code") == 200 and data.get("data"):
+            proxy_str = data["data"][0]
+            parts = proxy_str.split(":")
+            if len(parts) == 4:
+                ip, port, user, pwd = parts
+                proxy_url = f"http://{user}:{pwd}@{ip}:{port}"
+                print(f"  Proxy URL: http://{user[:10]}...@{ip}:{port}")
+                return proxy_url
+    except Exception as e:
+        print(f"  ERROR: {e}")
+    return None
+
+# Test Chanel
+def test_chanel(label, proxy=None, use_cffi=False):
+    url = "https://www.chanel.com/us/fashion/handbags/c/1x1x1/"
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
+        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
+        "Accept-Language": "en-US,en;q=0.9",
+    }
+    print(f"\n{'='*60}")
+    print(f"TEST: {label}")
+    try:
+        if use_cffi:
+            from curl_cffi import requests as cffi_requests
+            kwargs = {"url": url, "headers": headers, "impersonate": "chrome", "timeout": 30, "allow_redirects": True}
+            if proxy:
+                kwargs["proxies"] = {"https": proxy, "http": proxy}
+            resp = cffi_requests.get(**kwargs)
+        else:
+            kwargs = {"url": url, "headers": headers, "timeout": 30, "allow_redirects": True}
+            if proxy:
+                kwargs["proxies"] = {"https": proxy, "http": proxy}
+            resp = requests.get(**kwargs)
+
+        blocked = "Access Denied" in resp.text
+        print(f"  Status: {resp.status_code}")
+        print(f"  Size: {len(resp.text):,} bytes")
+        print(f"  Result: {'BLOCKED' if blocked else 'SUCCESS' if resp.status_code == 200 and len(resp.text) > 10000 else 'UNCLEAR'}")
+        if not blocked and resp.status_code == 200:
+            print(f"  First 300 chars: {resp.text[:300]}")
+    except Exception as e:
+        print(f"  ERROR: {e}")
+
+
+if __name__ == "__main__":
+    MASSIVE_RES = "https://mpuQHs4sWZ-country-US:D0yWxVQo8wQ05RWqz1Bn@network.joinmassive.com:65535"
+    MASSIVE_DC = "http://mpuQHs4sWZ-country-US:D0yWxVQo8wQ05RWqz1Bn@isp.joinmassive.com:8000"
+
+    # Step 1: Verify IPs
+    print("="*60)
+    print("STEP 1: Verify proxy IPs")
+    check_ip("Direct (Hetzner)")
+    check_ip("Massive Residential", MASSIVE_RES)
+    check_ip("Massive Datacenter/ISP", MASSIVE_DC)
+
+    # Step 2: Get NST proxies
+    print("\n" + "="*60)
+    print("STEP 2: Get NST proxy credentials")
+    nst_res = get_nst_proxy("7864DDA266D5899C")  # residential
+    nst_dc = get_nst_proxy("AE0C3B5547F8A021")   # datacenter
+
+    if nst_res:
+        check_ip("NST Residential", nst_res)
+    if nst_dc:
+        check_ip("NST Datacenter", nst_dc)
+
+    # Step 3: Test Chanel with all available proxies
+    print("\n" + "="*60)
+    print("STEP 3: Test Chanel.com")
+
+    if nst_res:
+        test_chanel("curl_cffi + NST residential", proxy=nst_res, use_cffi=True)
+        test_chanel("plain requests + NST residential", proxy=nst_res, use_cffi=False)
+
+    if nst_dc:
+        test_chanel("curl_cffi + NST datacenter", proxy=nst_dc, use_cffi=True)
+
+    # Also try Massive ISP/datacenter (different from residential)
+    test_chanel("curl_cffi + Massive ISP", proxy=MASSIVE_DC, use_cffi=True)