Sync sec-ch-ua with User-Agent and keep WebGL alive in stealth mode

Fix a bug where magic mode and per-request UA overrides would change the User-Agent header without updating the sec-ch-ua (browser hint) header to match. Anti-bot systems like Akamai detect this mismatch as a bot signal. Changes: - Regenerate browser_hint via UAGen.generate_client_hints() whenever the UA is changed at crawl time (magic mode or explicit override) - Re-apply updated headers to the page via set_extra_http_headers() - Skip per-crawl UA override for persistent contexts where the UA is locked at launch time by Playwright's protocol layer - Move --disable-gpu flags behind enable_stealth check so WebGL works via SwiftShader when stealth mode is active (missing WebGL is a detectable headless signal) - Clean up old test scripts, add clean anti-bot test
2026-02-13 04:10:47 +00:00
parent 112f44a97d
commit fdd989785f
9 changed files with 261 additions and 427 deletions
--- a/crawl4ai/async_crawler_strategy.py
+++ b/crawl4ai/async_crawler_strategy.py
@@ -19,7 +19,7 @@ from .config import SCREENSHOT_HEIGHT_TRESHOLD
 from .async_configs import BrowserConfig, CrawlerRunConfig, HTTPCrawlerConfig
 from .async_logger import AsyncLogger
 from .ssl_certificate import SSLCertificate
-from .user_agent_generator import ValidUAGenerator
+from .user_agent_generator import ValidUAGenerator, UAGen
 from .browser_manager import BrowserManager
 from .browser_adapter import BrowserAdapter, PlaywrightAdapter, UndetectedAdapter
@@ -534,18 +534,42 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
        captured_requests = []
        captured_console = []
-        # Handle user agent with magic mode
+        # Handle user agent with magic mode.
-        user_agent_to_override = config.user_agent
+        # For persistent contexts the UA is locked at browser launch time
-        if user_agent_to_override:
+        # (launch_persistent_context bakes it into the protocol layer), so
-            self.browser_config.user_agent = user_agent_to_override
+        # changing it here would only desync browser_config from reality.
-        elif config.magic or config.user_agent_mode == "random":
+        # Users should set user_agent or user_agent_mode on BrowserConfig.
-            self.browser_config.user_agent = ValidUAGenerator().generate(
+        ua_changed = False
-                **(config.user_agent_generator_config or {})
+        if not self.browser_config.use_persistent_context:
            user_agent_to_override = config.user_agent
            if user_agent_to_override:
                self.browser_config.user_agent = user_agent_to_override
                ua_changed = True
            elif config.magic or config.user_agent_mode == "random":
                self.browser_config.user_agent = ValidUAGenerator().generate(
                    **(config.user_agent_generator_config or {})
                )
                ua_changed = True
        # Keep sec-ch-ua in sync whenever the UA changed
        if ua_changed:
            self.browser_config.browser_hint = UAGen.generate_client_hints(
                self.browser_config.user_agent
            )
            self.browser_config.headers["sec-ch-ua"] = self.browser_config.browser_hint
        # Get page for session
        page, context = await self.browser_manager.get_page(crawlerRunConfig=config)
        # Push updated UA + sec-ch-ua to the page so the server sees them
        if ua_changed:
            combined_headers = {
                "User-Agent": self.browser_config.user_agent,
                "sec-ch-ua": self.browser_config.browser_hint,
            }
            combined_headers.update(self.browser_config.headers)
            await page.set_extra_http_headers(combined_headers)
        # await page.goto(URL)
        # Add default cookie
--- a/crawl4ai/browser_manager.py
+++ b/crawl4ai/browser_manager.py
@@ -70,9 +70,6 @@ class ManagedBrowser:
    def build_browser_flags(config: BrowserConfig) -> List[str]:
        """Common CLI flags for launching Chromium"""
        flags = [
            "--disable-gpu",
            "--disable-gpu-compositing",
            "--disable-software-rasterizer",
            "--no-sandbox",
            "--disable-dev-shm-usage",
            "--no-first-run",
@@ -93,6 +90,14 @@ class ManagedBrowser:
            "--disable-component-update",
            "--disable-domain-reliability",
        ]
        # GPU flags disable WebGL which anti-bot sensors detect as headless.
        # Keep WebGL working (via SwiftShader) when stealth mode is active.
        if not config.enable_stealth:
            flags.extend([
                "--disable-gpu",
                "--disable-gpu-compositing",
                "--disable-software-rasterizer",
            ])
        if config.memory_saving_mode:
            flags.extend([
                "--aggressive-cache-discard",
--- a/tests/proxy/test_chanel_basic.py
+++ b/tests/proxy/test_chanel_basic.py
@@ -1,61 +0,0 @@
 import asyncio
 import os
 import shutil
 import uuid
 from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
 async def crawl_chanel(url: str):
    # Fresh profile each time (gets flagged after one use)
    profile_dir = os.path.expanduser(f"~/.crawl4ai/chanel_{uuid.uuid4().hex[:8]}")
    os.makedirs(profile_dir, exist_ok=True)
    browser_config = BrowserConfig(
        headless=True,
        enable_stealth=True,
        use_persistent_context=True,
        user_data_dir=profile_dir,
        viewport_width=1920,
        viewport_height=1080,
        user_agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
        headers={
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
            "Accept-Language": "en-US,en;q=0.9",
            "Sec-Fetch-Dest": "document",
            "Sec-Fetch-Mode": "navigate",
            "Sec-Fetch-Site": "none",
            "Sec-Fetch-User": "?1",
        }
    )
    run_config = CrawlerRunConfig(
        magic=True,
        simulate_user=True,
        override_navigator=True,
        page_timeout=120000,
        wait_until="load",
        delay_before_return_html=10.0,
    )
    try:
        async with AsyncWebCrawler(config=browser_config) as crawler:
            result = await crawler.arun(url, config=run_config)
            return result
    finally:
        shutil.rmtree(profile_dir, ignore_errors=True)
 async def main():
    url = "https://www.chanel.com/us/fashion/handbags/c/1x1x1/"
    result = await crawl_chanel(url)
    print(f"Status: {result.status_code}")
    print(f"Success: {result.success}")
    print(f"HTML: {len(result.html):,} bytes")
    if result.markdown:
        md_len = len(result.markdown.raw_markdown)
        print(f"Markdown: {md_len:,} chars")
    if result.error_message:
        print(f"Error: {result.error_message}")
 asyncio.run(main())
--- a/tests/proxy/test_chanel_cdp_proxy.py
+++ b/tests/proxy/test_chanel_cdp_proxy.py
@@ -0,0 +1,112 @@
 """
 Test: Chanel.com anti-bot bypass via crawl4ai
 Requires env vars:
  MASSIVE_USERNAME  — Massive residential proxy username
  MASSIVE_PASSWORD  — Massive residential proxy password
 Optional:
  --cdp URL       Connect to external browser via CDP (e.g. http://localhost:9223)
  --attempts N    Number of attempts per test (default 3)
 Usage:
  export MASSIVE_USERNAME="your_user"
  export MASSIVE_PASSWORD="your_pass"
  .venv/bin/python tests/proxy/test_chanel_cdp_proxy.py
  .venv/bin/python tests/proxy/test_chanel_cdp_proxy.py --cdp http://localhost:9223
 """
 import asyncio
 import os
 import sys
 import re
 import tempfile
 import shutil
 from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
 from crawl4ai.async_configs import ProxyConfig
 URL = "https://www.chanel.com/us/fashion/handbags/c/1x1x1/"
 MASSIVE_USERNAME = os.environ.get("MASSIVE_USERNAME", "")
 MASSIVE_PASSWORD = os.environ.get("MASSIVE_PASSWORD", "")
 MASSIVE_SERVER = "https://network.joinmassive.com:65535"
 def get_proxy_config():
    if not MASSIVE_USERNAME or not MASSIVE_PASSWORD:
        print("ERROR: Set MASSIVE_USERNAME and MASSIVE_PASSWORD env vars")
        sys.exit(1)
    return ProxyConfig(
        server=MASSIVE_SERVER,
        username=MASSIVE_USERNAME,
        password=MASSIVE_PASSWORD,
    )
 async def test_isolated_context(cdp_url: str = None, attempts: int = 3):
    """Test with isolated context (works with both Playwright and CDP)."""
    mode = f"CDP ({cdp_url})" if cdp_url else "Playwright Chromium"
    print(f"\n{'='*60}")
    print(f"Mode: Isolated context — {mode}")
    print(f"{'='*60}\n")
    kwargs = dict(
        enable_stealth=True,
        create_isolated_context=True,
        viewport_width=1920,
        viewport_height=1080,
    )
    if cdp_url:
        kwargs["cdp_url"] = cdp_url
    else:
        kwargs["headless"] = True
    config = BrowserConfig(**kwargs)
    run_config = CrawlerRunConfig(
        magic=True,
        simulate_user=True,
        override_navigator=True,
        proxy_config=get_proxy_config(),
        page_timeout=120000,
        wait_until="load",
        delay_before_return_html=15.0,
    )
    passed = 0
    async with AsyncWebCrawler(config=config) as crawler:
        for i in range(attempts):
            result = await crawler.arun(URL, config=run_config)
            ok = result.status_code == 200 and len(result.html) > 10000
            title = ""
            if ok:
                passed += 1
                m = re.search(r"<title>(.*?)</title>", result.html)
                title = f"  title={m.group(1)}" if m else ""
            print(f"  Attempt {i+1}: status={result.status_code}  html={len(result.html):>10,} bytes  {'PASS' if ok else 'FAIL'}{title}")
    print(f"\nResult: {passed}/{attempts} passed")
    return passed > 0
 async def main():
    cdp_url = None
    attempts = 3
    args = sys.argv[1:]
    for j, arg in enumerate(args):
        if arg == "--cdp" and j + 1 < len(args):
            cdp_url = args[j + 1]
        if arg == "--attempts" and j + 1 < len(args):
            attempts = int(args[j + 1])
    ok = await test_isolated_context(cdp_url=cdp_url, attempts=attempts)
    print(f"\n{'='*60}")
    print(f"Result: {'PASS' if ok else 'FAIL'}")
    print(f"{'='*60}")
    return ok
 if __name__ == "__main__":
    ok = asyncio.run(main())
    sys.exit(0 if ok else 1)
--- a/tests/proxy/test_chanel_debug.py
+++ b/tests/proxy/test_chanel_debug.py
@@ -1,62 +0,0 @@
 import asyncio
 import os
 import shutil
 import uuid
 from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
 from crawl4ai.async_configs import ProxyConfig
 async def main():
    profile_dir = os.path.expanduser(f"~/.crawl4ai/chanel_{uuid.uuid4().hex[:8]}")
    os.makedirs(profile_dir, exist_ok=True)
    browser_config = BrowserConfig(
        headless=True,
        enable_stealth=True,
        use_persistent_context=True,
        user_data_dir=profile_dir,
        viewport_width=1920,
        viewport_height=1080,
        user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
        headers={
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
            "Accept-Language": "en-US,en;q=0.9",
            "Sec-Fetch-Dest": "document",
            "Sec-Fetch-Mode": "navigate",
            "Sec-Fetch-Site": "none",
            "Sec-Fetch-User": "?1",
        },
        proxy_config=ProxyConfig(
            server="https://network.joinmassive.com:65535",
            username="mpuQHs4sWZ-country-US",
            password="D0yWxVQo8wQ05RWqz1Bn",
        ),
    )
    run_config = CrawlerRunConfig(
        magic=True,
        simulate_user=True,
        override_navigator=True,
        page_timeout=120000,
        wait_until="networkidle",
        delay_before_return_html=15.0,
    )
    try:
        async with AsyncWebCrawler(config=browser_config) as crawler:
            result = await crawler.arun(
                "https://www.chanel.com/us/fashion/handbags/c/1x1x1/",
                config=run_config,
            )
            print(f"Status: {result.status_code}")
            print(f"HTML bytes: {len(result.html)}")
            print(f"\n=== FULL HTML ===\n{result.html}")
            print(f"\n=== RESPONSE HEADERS ===")
            if result.response_headers:
                for k, v in sorted(result.response_headers.items()):
                    print(f"  {k}: {v}")
    finally:
        shutil.rmtree(profile_dir, ignore_errors=True)
 asyncio.run(main())
--- a/tests/proxy/test_chanel_multi_attempt.py
+++ b/tests/proxy/test_chanel_multi_attempt.py
@@ -1,147 +0,0 @@
 import asyncio
 import os
 import shutil
 import uuid
 from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, UndetectedAdapter
 from crawl4ai.async_crawler_strategy import AsyncPlaywrightCrawlerStrategy
 async def attempt(label, browser_config, run_config, crawler_strategy=None):
    print(f"\n{'='*60}")
    print(f"Attempt: {label}")
    print(f"{'='*60}")
    url = "https://www.chanel.com/us/fashion/handbags/c/1x1x1/"
    kwargs = {"config": browser_config}
    if crawler_strategy:
        kwargs["crawler_strategy"] = crawler_strategy
    try:
        async with AsyncWebCrawler(**kwargs) as crawler:
            result = await crawler.arun(url, config=run_config)
            print(f"Status: {result.status_code}")
            print(f"Success: {result.success}")
            print(f"HTML: {len(result.html):,} bytes")
            if result.markdown:
                print(f"Markdown: {len(result.markdown.raw_markdown):,} chars")
            if result.error_message:
                print(f"Error: {result.error_message}")
            # Check for anti-bot indicators
            html_lower = result.html.lower()
            for indicator in ["access denied", "403", "blocked", "captcha", "challenge"]:
                if indicator in html_lower:
                    print(f"  Anti-bot indicator found: '{indicator}'")
            return result
    except Exception as e:
        print(f"Exception: {e}")
        return None
 async def main():
    mac_ua = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
    headers = {
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
        "Accept-Language": "en-US,en;q=0.9",
        "Sec-Fetch-Dest": "document",
        "Sec-Fetch-Mode": "navigate",
        "Sec-Fetch-Site": "none",
        "Sec-Fetch-User": "?1",
    }
    # ---- Attempt 1: Mac UA + stealth (user's original approach) ----
    profile1 = os.path.expanduser(f"~/.crawl4ai/chanel_{uuid.uuid4().hex[:8]}")
    os.makedirs(profile1, exist_ok=True)
    try:
        bc1 = BrowserConfig(
            headless=True,
            enable_stealth=True,
            use_persistent_context=True,
            user_data_dir=profile1,
            viewport_width=1920,
            viewport_height=1080,
            user_agent=mac_ua,
            headers=headers,
        )
        rc1 = CrawlerRunConfig(
            magic=True,
            simulate_user=True,
            override_navigator=True,
            page_timeout=120000,
            wait_until="load",
            delay_before_return_html=10.0,
        )
        await attempt("Mac UA + Stealth + Magic (user's script)", bc1, rc1)
    finally:
        shutil.rmtree(profile1, ignore_errors=True)
    await asyncio.sleep(3)
    # ---- Attempt 2: Undetected adapter (patchright) ----
    profile2 = os.path.expanduser(f"~/.crawl4ai/chanel_{uuid.uuid4().hex[:8]}")
    os.makedirs(profile2, exist_ok=True)
    try:
        bc2 = BrowserConfig(
            headless=True,
            use_persistent_context=True,
            user_data_dir=profile2,
            viewport_width=1920,
            viewport_height=1080,
            user_agent=mac_ua,
            headers=headers,
        )
        rc2 = CrawlerRunConfig(
            simulate_user=True,
            override_navigator=True,
            page_timeout=120000,
            wait_until="load",
            delay_before_return_html=15.0,
        )
        adapter = UndetectedAdapter()
        strategy = AsyncPlaywrightCrawlerStrategy(
            browser_config=bc2,
            browser_adapter=adapter,
        )
        await attempt("Undetected Adapter (patchright)", bc2, rc2, crawler_strategy=strategy)
    finally:
        shutil.rmtree(profile2, ignore_errors=True)
    await asyncio.sleep(3)
    # ---- Attempt 3: Longer delay + networkidle ----
    profile3 = os.path.expanduser(f"~/.crawl4ai/chanel_{uuid.uuid4().hex[:8]}")
    os.makedirs(profile3, exist_ok=True)
    try:
        bc3 = BrowserConfig(
            headless=True,
            enable_stealth=True,
            use_persistent_context=True,
            user_data_dir=profile3,
            viewport_width=1920,
            viewport_height=1080,
            user_agent=mac_ua,
            headers=headers,
        )
        rc3 = CrawlerRunConfig(
            magic=True,
            simulate_user=True,
            override_navigator=True,
            page_timeout=120000,
            wait_until="networkidle",
            delay_before_return_html=20.0,
            js_code="""
            // Simulate human-like scrolling
            await new Promise(r => setTimeout(r, 2000));
            window.scrollTo({top: 300, behavior: 'smooth'});
            await new Promise(r => setTimeout(r, 1500));
            window.scrollTo({top: 600, behavior: 'smooth'});
            await new Promise(r => setTimeout(r, 1000));
            """,
        )
        await attempt("Stealth + networkidle + scroll + 20s delay", bc3, rc3)
    finally:
        shutil.rmtree(profile3, ignore_errors=True)
 asyncio.run(main())
--- a/tests/proxy/test_chanel_xvfb.py
+++ b/tests/proxy/test_chanel_xvfb.py
@@ -1,62 +0,0 @@
 import asyncio
 import os
 import shutil
 import uuid
 from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
 async def crawl_chanel(url: str):
    profile_dir = os.path.expanduser(f"~/.crawl4ai/chanel_{uuid.uuid4().hex[:8]}")
    os.makedirs(profile_dir, exist_ok=True)
    browser_config = BrowserConfig(
        headless=False,  # Non-headless via Xvfb - harder to detect
        enable_stealth=True,
        use_persistent_context=True,
        user_data_dir=profile_dir,
        viewport_width=1920,
        viewport_height=1080,
        user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
        headers={
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
            "Accept-Language": "en-US,en;q=0.9",
            "Sec-Fetch-Dest": "document",
            "Sec-Fetch-Mode": "navigate",
            "Sec-Fetch-Site": "none",
            "Sec-Fetch-User": "?1",
        }
    )
    run_config = CrawlerRunConfig(
        magic=True,
        simulate_user=True,
        override_navigator=True,
        page_timeout=120000,
        wait_until="load",
        delay_before_return_html=10.0,
    )
    try:
        async with AsyncWebCrawler(config=browser_config) as crawler:
            result = await crawler.arun(url, config=run_config)
            return result
    finally:
        shutil.rmtree(profile_dir, ignore_errors=True)
 async def main():
    url = "https://www.chanel.com/us/fashion/handbags/c/1x1x1/"
    result = await crawl_chanel(url)
    print(f"Status: {result.status_code}")
    print(f"Success: {result.success}")
    print(f"HTML: {len(result.html):,} bytes")
    if result.markdown:
        md_len = len(result.markdown.raw_markdown)
        print(f"Markdown: {md_len:,} chars")
        if md_len > 500:
            print(f"\nFirst 500 chars of markdown:\n{result.markdown.raw_markdown[:500]}")
    if result.error_message:
        print(f"Error: {result.error_message}")
 asyncio.run(main())
--- a/tests/proxy/test_platform_match.py
+++ b/tests/proxy/test_platform_match.py
@@ -1,84 +0,0 @@
 """Test if matching UA to actual platform fixes Akamai detection."""
 import asyncio
 import os
 import shutil
 import uuid
 from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
 from crawl4ai.async_configs import ProxyConfig
 async def test_with_ua(label, user_agent):
    print(f"\n{'='*60}")
    print(f"Test: {label}")
    print(f"{'='*60}")
    profile_dir = os.path.expanduser(f"~/.crawl4ai/test_{uuid.uuid4().hex[:8]}")
    os.makedirs(profile_dir, exist_ok=True)
    browser_config = BrowserConfig(
        headless=True,
        enable_stealth=True,
        use_persistent_context=True,
        user_data_dir=profile_dir,
        viewport_width=1920,
        viewport_height=1080,
        user_agent=user_agent,
        headers={
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
            "Accept-Language": "en-US,en;q=0.9",
            "Sec-Fetch-Dest": "document",
            "Sec-Fetch-Mode": "navigate",
            "Sec-Fetch-Site": "none",
            "Sec-Fetch-User": "?1",
        },
        proxy_config=ProxyConfig(
            server="https://network.joinmassive.com:65535",
            username="mpuQHs4sWZ-country-US",
            password="D0yWxVQo8wQ05RWqz1Bn",
        ),
    )
    run_config = CrawlerRunConfig(
        magic=True,
        simulate_user=True,
        override_navigator=True,
        page_timeout=120000,
        wait_until="load",
        delay_before_return_html=10.0,
    )
    try:
        async with AsyncWebCrawler(config=browser_config) as crawler:
            result = await crawler.arun(
                "https://www.chanel.com/us/fashion/handbags/c/1x1x1/",
                config=run_config,
            )
            print(f"  Status: {result.status_code}")
            print(f"  HTML bytes: {len(result.html)}")
            blocked = "access denied" in result.html.lower()
            print(f"  Blocked: {blocked}")
            if not blocked and len(result.html) > 1000:
                print(f"  SUCCESS! Got real content")
    except Exception as e:
        print(f"  EXCEPTION: {e}")
    finally:
        shutil.rmtree(profile_dir, ignore_errors=True)
 async def main():
    # Mac UA on Linux = platform mismatch
    await test_with_ua(
        "Mac UA (mismatched platform)",
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
    )
    await asyncio.sleep(3)
    # Linux UA = matches actual navigator.platform
    await test_with_ua(
        "Linux UA (matching platform)",
        "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
    )
 asyncio.run(main())
--- a/tests/proxy/test_proxy_verify.py
+++ b/tests/proxy/test_proxy_verify.py
@@ -0,0 +1,109 @@
 """
 Verify proxies are working and check what IPs they resolve to.
 Then test Chanel through NST proxy (different provider).
 """
 import requests
 # Check our real IP
 def check_ip(label, proxy=None):
    print(f"\n--- {label} ---")
    try:
        kwargs = {"url": "https://httpbin.org/ip", "timeout": 15}
        if proxy:
            kwargs["proxies"] = {"https": proxy, "http": proxy}
        resp = requests.get(**kwargs)
        print(f"  IP: {resp.json()}")
    except Exception as e:
        print(f"  ERROR: {e}")
 # Get NST proxy credentials
 def get_nst_proxy(channel_id):
    token = "NSTPROXY-DA9C7A614946EA8FCEFDA9FD3B3F4A9D"
    api_url = f"https://api.nstproxy.com/api/v1/generate/apiproxies?count=1&country=US&protocol=http&sessionDuration=0&channelId={channel_id}&token={token}"
    print(f"\nFetching NST proxy ({channel_id[:8]}...):")
    print(f"  URL: {api_url}")
    try:
        resp = requests.get(api_url, timeout=15)
        print(f"  HTTP {resp.status_code}")
        print(f"  Body: {resp.text[:500]}")
        data = resp.json()
        if data.get("code") == 200 and data.get("data"):
            proxy_str = data["data"][0]
            parts = proxy_str.split(":")
            if len(parts) == 4:
                ip, port, user, pwd = parts
                proxy_url = f"http://{user}:{pwd}@{ip}:{port}"
                print(f"  Proxy URL: http://{user[:10]}...@{ip}:{port}")
                return proxy_url
    except Exception as e:
        print(f"  ERROR: {e}")
    return None
 # Test Chanel
 def test_chanel(label, proxy=None, use_cffi=False):
    url = "https://www.chanel.com/us/fashion/handbags/c/1x1x1/"
    headers = {
        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
        "Accept-Language": "en-US,en;q=0.9",
    }
    print(f"\n{'='*60}")
    print(f"TEST: {label}")
    try:
        if use_cffi:
            from curl_cffi import requests as cffi_requests
            kwargs = {"url": url, "headers": headers, "impersonate": "chrome", "timeout": 30, "allow_redirects": True}
            if proxy:
                kwargs["proxies"] = {"https": proxy, "http": proxy}
            resp = cffi_requests.get(**kwargs)
        else:
            kwargs = {"url": url, "headers": headers, "timeout": 30, "allow_redirects": True}
            if proxy:
                kwargs["proxies"] = {"https": proxy, "http": proxy}
            resp = requests.get(**kwargs)
        blocked = "Access Denied" in resp.text
        print(f"  Status: {resp.status_code}")
        print(f"  Size: {len(resp.text):,} bytes")
        print(f"  Result: {'BLOCKED' if blocked else 'SUCCESS' if resp.status_code == 200 and len(resp.text) > 10000 else 'UNCLEAR'}")
        if not blocked and resp.status_code == 200:
            print(f"  First 300 chars: {resp.text[:300]}")
    except Exception as e:
        print(f"  ERROR: {e}")
 if __name__ == "__main__":
    MASSIVE_RES = "https://mpuQHs4sWZ-country-US:D0yWxVQo8wQ05RWqz1Bn@network.joinmassive.com:65535"
    MASSIVE_DC = "http://mpuQHs4sWZ-country-US:D0yWxVQo8wQ05RWqz1Bn@isp.joinmassive.com:8000"
    # Step 1: Verify IPs
    print("="*60)
    print("STEP 1: Verify proxy IPs")
    check_ip("Direct (Hetzner)")
    check_ip("Massive Residential", MASSIVE_RES)
    check_ip("Massive Datacenter/ISP", MASSIVE_DC)
    # Step 2: Get NST proxies
    print("\n" + "="*60)
    print("STEP 2: Get NST proxy credentials")
    nst_res = get_nst_proxy("7864DDA266D5899C")  # residential
    nst_dc = get_nst_proxy("AE0C3B5547F8A021")   # datacenter
    if nst_res:
        check_ip("NST Residential", nst_res)
    if nst_dc:
        check_ip("NST Datacenter", nst_dc)
    # Step 3: Test Chanel with all available proxies
    print("\n" + "="*60)
    print("STEP 3: Test Chanel.com")
    if nst_res:
        test_chanel("curl_cffi + NST residential", proxy=nst_res, use_cffi=True)
        test_chanel("plain requests + NST residential", proxy=nst_res, use_cffi=False)
    if nst_dc:
        test_chanel("curl_cffi + NST datacenter", proxy=nst_dc, use_cffi=True)
    # Also try Massive ISP/datacenter (different from residential)
    test_chanel("curl_cffi + Massive ISP", proxy=MASSIVE_DC, use_cffi=True)