diff --git a/crawl4ai/async_crawler_strategy.py b/crawl4ai/async_crawler_strategy.py
index 9ac3f7fc..858f4cfc 100644
--- a/crawl4ai/async_crawler_strategy.py
+++ b/crawl4ai/async_crawler_strategy.py
@@ -19,7 +19,7 @@ from .config import SCREENSHOT_HEIGHT_TRESHOLD
from .async_configs import BrowserConfig, CrawlerRunConfig, HTTPCrawlerConfig
from .async_logger import AsyncLogger
from .ssl_certificate import SSLCertificate
-from .user_agent_generator import ValidUAGenerator
+from .user_agent_generator import ValidUAGenerator, UAGen
from .browser_manager import BrowserManager
from .browser_adapter import BrowserAdapter, PlaywrightAdapter, UndetectedAdapter
@@ -534,18 +534,42 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
captured_requests = []
captured_console = []
- # Handle user agent with magic mode
- user_agent_to_override = config.user_agent
- if user_agent_to_override:
- self.browser_config.user_agent = user_agent_to_override
- elif config.magic or config.user_agent_mode == "random":
- self.browser_config.user_agent = ValidUAGenerator().generate(
- **(config.user_agent_generator_config or {})
+ # Handle user agent with magic mode.
+ # For persistent contexts the UA is locked at browser launch time
+ # (launch_persistent_context bakes it into the protocol layer), so
+ # changing it here would only desync browser_config from reality.
+ # Users should set user_agent or user_agent_mode on BrowserConfig.
+ ua_changed = False
+ if not self.browser_config.use_persistent_context:
+ user_agent_to_override = config.user_agent
+ if user_agent_to_override:
+ self.browser_config.user_agent = user_agent_to_override
+ ua_changed = True
+ elif config.magic or config.user_agent_mode == "random":
+ self.browser_config.user_agent = ValidUAGenerator().generate(
+ **(config.user_agent_generator_config or {})
+ )
+ ua_changed = True
+
+ # Keep sec-ch-ua in sync whenever the UA changed
+ if ua_changed:
+ self.browser_config.browser_hint = UAGen.generate_client_hints(
+ self.browser_config.user_agent
)
+ self.browser_config.headers["sec-ch-ua"] = self.browser_config.browser_hint
# Get page for session
page, context = await self.browser_manager.get_page(crawlerRunConfig=config)
+ # Push updated UA + sec-ch-ua to the page so the server sees them
+ if ua_changed:
+ combined_headers = {
+ "User-Agent": self.browser_config.user_agent,
+ "sec-ch-ua": self.browser_config.browser_hint,
+ }
+ combined_headers.update(self.browser_config.headers)
+ await page.set_extra_http_headers(combined_headers)
+
# await page.goto(URL)
# Add default cookie
diff --git a/crawl4ai/browser_manager.py b/crawl4ai/browser_manager.py
index 51e8f99b..2296d076 100644
--- a/crawl4ai/browser_manager.py
+++ b/crawl4ai/browser_manager.py
@@ -70,9 +70,6 @@ class ManagedBrowser:
def build_browser_flags(config: BrowserConfig) -> List[str]:
"""Common CLI flags for launching Chromium"""
flags = [
- "--disable-gpu",
- "--disable-gpu-compositing",
- "--disable-software-rasterizer",
"--no-sandbox",
"--disable-dev-shm-usage",
"--no-first-run",
@@ -93,6 +90,14 @@ class ManagedBrowser:
"--disable-component-update",
"--disable-domain-reliability",
]
+ # GPU flags disable WebGL which anti-bot sensors detect as headless.
+ # Keep WebGL working (via SwiftShader) when stealth mode is active.
+ if not config.enable_stealth:
+ flags.extend([
+ "--disable-gpu",
+ "--disable-gpu-compositing",
+ "--disable-software-rasterizer",
+ ])
if config.memory_saving_mode:
flags.extend([
"--aggressive-cache-discard",
diff --git a/tests/proxy/test_chanel_basic.py b/tests/proxy/test_chanel_basic.py
deleted file mode 100644
index 86201c5c..00000000
--- a/tests/proxy/test_chanel_basic.py
+++ /dev/null
@@ -1,61 +0,0 @@
-import asyncio
-import os
-import shutil
-import uuid
-from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
-
-
-async def crawl_chanel(url: str):
- # Fresh profile each time (gets flagged after one use)
- profile_dir = os.path.expanduser(f"~/.crawl4ai/chanel_{uuid.uuid4().hex[:8]}")
- os.makedirs(profile_dir, exist_ok=True)
-
- browser_config = BrowserConfig(
- headless=True,
- enable_stealth=True,
- use_persistent_context=True,
- user_data_dir=profile_dir,
- viewport_width=1920,
- viewport_height=1080,
- user_agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
- headers={
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
- "Accept-Language": "en-US,en;q=0.9",
- "Sec-Fetch-Dest": "document",
- "Sec-Fetch-Mode": "navigate",
- "Sec-Fetch-Site": "none",
- "Sec-Fetch-User": "?1",
- }
- )
-
- run_config = CrawlerRunConfig(
- magic=True,
- simulate_user=True,
- override_navigator=True,
- page_timeout=120000,
- wait_until="load",
- delay_before_return_html=10.0,
- )
-
- try:
- async with AsyncWebCrawler(config=browser_config) as crawler:
- result = await crawler.arun(url, config=run_config)
- return result
- finally:
- shutil.rmtree(profile_dir, ignore_errors=True)
-
-
-async def main():
- url = "https://www.chanel.com/us/fashion/handbags/c/1x1x1/"
- result = await crawl_chanel(url)
- print(f"Status: {result.status_code}")
- print(f"Success: {result.success}")
- print(f"HTML: {len(result.html):,} bytes")
- if result.markdown:
- md_len = len(result.markdown.raw_markdown)
- print(f"Markdown: {md_len:,} chars")
- if result.error_message:
- print(f"Error: {result.error_message}")
-
-
-asyncio.run(main())
diff --git a/tests/proxy/test_chanel_cdp_proxy.py b/tests/proxy/test_chanel_cdp_proxy.py
new file mode 100644
index 00000000..3fc90bb1
--- /dev/null
+++ b/tests/proxy/test_chanel_cdp_proxy.py
@@ -0,0 +1,112 @@
+"""
+Test: Chanel.com anti-bot bypass via crawl4ai
+
+Requires env vars:
+ MASSIVE_USERNAME — Massive residential proxy username
+ MASSIVE_PASSWORD — Massive residential proxy password
+
+Optional:
+ --cdp URL Connect to external browser via CDP (e.g. http://localhost:9223)
+ --attempts N Number of attempts per test (default 3)
+
+Usage:
+ export MASSIVE_USERNAME="your_user"
+ export MASSIVE_PASSWORD="your_pass"
+ .venv/bin/python tests/proxy/test_chanel_cdp_proxy.py
+ .venv/bin/python tests/proxy/test_chanel_cdp_proxy.py --cdp http://localhost:9223
+"""
+
+import asyncio
+import os
+import sys
+import re
+import tempfile
+import shutil
+from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
+from crawl4ai.async_configs import ProxyConfig
+
+URL = "https://www.chanel.com/us/fashion/handbags/c/1x1x1/"
+
+MASSIVE_USERNAME = os.environ.get("MASSIVE_USERNAME", "")
+MASSIVE_PASSWORD = os.environ.get("MASSIVE_PASSWORD", "")
+MASSIVE_SERVER = "https://network.joinmassive.com:65535"
+
+
+def get_proxy_config():
+ if not MASSIVE_USERNAME or not MASSIVE_PASSWORD:
+ print("ERROR: Set MASSIVE_USERNAME and MASSIVE_PASSWORD env vars")
+ sys.exit(1)
+ return ProxyConfig(
+ server=MASSIVE_SERVER,
+ username=MASSIVE_USERNAME,
+ password=MASSIVE_PASSWORD,
+ )
+
+
+async def test_isolated_context(cdp_url: str = None, attempts: int = 3):
+ """Test with isolated context (works with both Playwright and CDP)."""
+ mode = f"CDP ({cdp_url})" if cdp_url else "Playwright Chromium"
+ print(f"\n{'='*60}")
+ print(f"Mode: Isolated context — {mode}")
+ print(f"{'='*60}\n")
+
+ kwargs = dict(
+ enable_stealth=True,
+ create_isolated_context=True,
+ viewport_width=1920,
+ viewport_height=1080,
+ )
+ if cdp_url:
+ kwargs["cdp_url"] = cdp_url
+ else:
+ kwargs["headless"] = True
+
+ config = BrowserConfig(**kwargs)
+ run_config = CrawlerRunConfig(
+ magic=True,
+ simulate_user=True,
+ override_navigator=True,
+ proxy_config=get_proxy_config(),
+ page_timeout=120000,
+ wait_until="load",
+ delay_before_return_html=15.0,
+ )
+
+ passed = 0
+ async with AsyncWebCrawler(config=config) as crawler:
+ for i in range(attempts):
+ result = await crawler.arun(URL, config=run_config)
+ ok = result.status_code == 200 and len(result.html) > 10000
+ title = ""
+ if ok:
+ passed += 1
+ m = re.search(r"
(.*?)", result.html)
+ title = f" title={m.group(1)}" if m else ""
+ print(f" Attempt {i+1}: status={result.status_code} html={len(result.html):>10,} bytes {'PASS' if ok else 'FAIL'}{title}")
+
+ print(f"\nResult: {passed}/{attempts} passed")
+ return passed > 0
+
+
+async def main():
+ cdp_url = None
+ attempts = 3
+
+ args = sys.argv[1:]
+ for j, arg in enumerate(args):
+ if arg == "--cdp" and j + 1 < len(args):
+ cdp_url = args[j + 1]
+ if arg == "--attempts" and j + 1 < len(args):
+ attempts = int(args[j + 1])
+
+ ok = await test_isolated_context(cdp_url=cdp_url, attempts=attempts)
+
+ print(f"\n{'='*60}")
+ print(f"Result: {'PASS' if ok else 'FAIL'}")
+ print(f"{'='*60}")
+ return ok
+
+
+if __name__ == "__main__":
+ ok = asyncio.run(main())
+ sys.exit(0 if ok else 1)
diff --git a/tests/proxy/test_chanel_debug.py b/tests/proxy/test_chanel_debug.py
deleted file mode 100644
index a656d512..00000000
--- a/tests/proxy/test_chanel_debug.py
+++ /dev/null
@@ -1,62 +0,0 @@
-import asyncio
-import os
-import shutil
-import uuid
-from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
-from crawl4ai.async_configs import ProxyConfig
-
-
-async def main():
- profile_dir = os.path.expanduser(f"~/.crawl4ai/chanel_{uuid.uuid4().hex[:8]}")
- os.makedirs(profile_dir, exist_ok=True)
-
- browser_config = BrowserConfig(
- headless=True,
- enable_stealth=True,
- use_persistent_context=True,
- user_data_dir=profile_dir,
- viewport_width=1920,
- viewport_height=1080,
- user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
- headers={
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
- "Accept-Language": "en-US,en;q=0.9",
- "Sec-Fetch-Dest": "document",
- "Sec-Fetch-Mode": "navigate",
- "Sec-Fetch-Site": "none",
- "Sec-Fetch-User": "?1",
- },
- proxy_config=ProxyConfig(
- server="https://network.joinmassive.com:65535",
- username="mpuQHs4sWZ-country-US",
- password="D0yWxVQo8wQ05RWqz1Bn",
- ),
- )
-
- run_config = CrawlerRunConfig(
- magic=True,
- simulate_user=True,
- override_navigator=True,
- page_timeout=120000,
- wait_until="networkidle",
- delay_before_return_html=15.0,
- )
-
- try:
- async with AsyncWebCrawler(config=browser_config) as crawler:
- result = await crawler.arun(
- "https://www.chanel.com/us/fashion/handbags/c/1x1x1/",
- config=run_config,
- )
- print(f"Status: {result.status_code}")
- print(f"HTML bytes: {len(result.html)}")
- print(f"\n=== FULL HTML ===\n{result.html}")
- print(f"\n=== RESPONSE HEADERS ===")
- if result.response_headers:
- for k, v in sorted(result.response_headers.items()):
- print(f" {k}: {v}")
- finally:
- shutil.rmtree(profile_dir, ignore_errors=True)
-
-
-asyncio.run(main())
diff --git a/tests/proxy/test_chanel_multi_attempt.py b/tests/proxy/test_chanel_multi_attempt.py
deleted file mode 100644
index a02b97aa..00000000
--- a/tests/proxy/test_chanel_multi_attempt.py
+++ /dev/null
@@ -1,147 +0,0 @@
-import asyncio
-import os
-import shutil
-import uuid
-from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, UndetectedAdapter
-from crawl4ai.async_crawler_strategy import AsyncPlaywrightCrawlerStrategy
-
-
-async def attempt(label, browser_config, run_config, crawler_strategy=None):
- print(f"\n{'='*60}")
- print(f"Attempt: {label}")
- print(f"{'='*60}")
-
- url = "https://www.chanel.com/us/fashion/handbags/c/1x1x1/"
-
- kwargs = {"config": browser_config}
- if crawler_strategy:
- kwargs["crawler_strategy"] = crawler_strategy
-
- try:
- async with AsyncWebCrawler(**kwargs) as crawler:
- result = await crawler.arun(url, config=run_config)
- print(f"Status: {result.status_code}")
- print(f"Success: {result.success}")
- print(f"HTML: {len(result.html):,} bytes")
- if result.markdown:
- print(f"Markdown: {len(result.markdown.raw_markdown):,} chars")
- if result.error_message:
- print(f"Error: {result.error_message}")
- # Check for anti-bot indicators
- html_lower = result.html.lower()
- for indicator in ["access denied", "403", "blocked", "captcha", "challenge"]:
- if indicator in html_lower:
- print(f" Anti-bot indicator found: '{indicator}'")
- return result
- except Exception as e:
- print(f"Exception: {e}")
- return None
-
-
-async def main():
- mac_ua = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
-
- headers = {
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
- "Accept-Language": "en-US,en;q=0.9",
- "Sec-Fetch-Dest": "document",
- "Sec-Fetch-Mode": "navigate",
- "Sec-Fetch-Site": "none",
- "Sec-Fetch-User": "?1",
- }
-
- # ---- Attempt 1: Mac UA + stealth (user's original approach) ----
- profile1 = os.path.expanduser(f"~/.crawl4ai/chanel_{uuid.uuid4().hex[:8]}")
- os.makedirs(profile1, exist_ok=True)
- try:
- bc1 = BrowserConfig(
- headless=True,
- enable_stealth=True,
- use_persistent_context=True,
- user_data_dir=profile1,
- viewport_width=1920,
- viewport_height=1080,
- user_agent=mac_ua,
- headers=headers,
- )
- rc1 = CrawlerRunConfig(
- magic=True,
- simulate_user=True,
- override_navigator=True,
- page_timeout=120000,
- wait_until="load",
- delay_before_return_html=10.0,
- )
- await attempt("Mac UA + Stealth + Magic (user's script)", bc1, rc1)
- finally:
- shutil.rmtree(profile1, ignore_errors=True)
-
- await asyncio.sleep(3)
-
- # ---- Attempt 2: Undetected adapter (patchright) ----
- profile2 = os.path.expanduser(f"~/.crawl4ai/chanel_{uuid.uuid4().hex[:8]}")
- os.makedirs(profile2, exist_ok=True)
- try:
- bc2 = BrowserConfig(
- headless=True,
- use_persistent_context=True,
- user_data_dir=profile2,
- viewport_width=1920,
- viewport_height=1080,
- user_agent=mac_ua,
- headers=headers,
- )
- rc2 = CrawlerRunConfig(
- simulate_user=True,
- override_navigator=True,
- page_timeout=120000,
- wait_until="load",
- delay_before_return_html=15.0,
- )
- adapter = UndetectedAdapter()
- strategy = AsyncPlaywrightCrawlerStrategy(
- browser_config=bc2,
- browser_adapter=adapter,
- )
- await attempt("Undetected Adapter (patchright)", bc2, rc2, crawler_strategy=strategy)
- finally:
- shutil.rmtree(profile2, ignore_errors=True)
-
- await asyncio.sleep(3)
-
- # ---- Attempt 3: Longer delay + networkidle ----
- profile3 = os.path.expanduser(f"~/.crawl4ai/chanel_{uuid.uuid4().hex[:8]}")
- os.makedirs(profile3, exist_ok=True)
- try:
- bc3 = BrowserConfig(
- headless=True,
- enable_stealth=True,
- use_persistent_context=True,
- user_data_dir=profile3,
- viewport_width=1920,
- viewport_height=1080,
- user_agent=mac_ua,
- headers=headers,
- )
- rc3 = CrawlerRunConfig(
- magic=True,
- simulate_user=True,
- override_navigator=True,
- page_timeout=120000,
- wait_until="networkidle",
- delay_before_return_html=20.0,
- js_code="""
- // Simulate human-like scrolling
- await new Promise(r => setTimeout(r, 2000));
- window.scrollTo({top: 300, behavior: 'smooth'});
- await new Promise(r => setTimeout(r, 1500));
- window.scrollTo({top: 600, behavior: 'smooth'});
- await new Promise(r => setTimeout(r, 1000));
- """,
- )
- await attempt("Stealth + networkidle + scroll + 20s delay", bc3, rc3)
- finally:
- shutil.rmtree(profile3, ignore_errors=True)
-
-
-asyncio.run(main())
diff --git a/tests/proxy/test_chanel_xvfb.py b/tests/proxy/test_chanel_xvfb.py
deleted file mode 100644
index c9221c17..00000000
--- a/tests/proxy/test_chanel_xvfb.py
+++ /dev/null
@@ -1,62 +0,0 @@
-import asyncio
-import os
-import shutil
-import uuid
-from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
-
-
-async def crawl_chanel(url: str):
- profile_dir = os.path.expanduser(f"~/.crawl4ai/chanel_{uuid.uuid4().hex[:8]}")
- os.makedirs(profile_dir, exist_ok=True)
-
- browser_config = BrowserConfig(
- headless=False, # Non-headless via Xvfb - harder to detect
- enable_stealth=True,
- use_persistent_context=True,
- user_data_dir=profile_dir,
- viewport_width=1920,
- viewport_height=1080,
- user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
- headers={
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
- "Accept-Language": "en-US,en;q=0.9",
- "Sec-Fetch-Dest": "document",
- "Sec-Fetch-Mode": "navigate",
- "Sec-Fetch-Site": "none",
- "Sec-Fetch-User": "?1",
- }
- )
-
- run_config = CrawlerRunConfig(
- magic=True,
- simulate_user=True,
- override_navigator=True,
- page_timeout=120000,
- wait_until="load",
- delay_before_return_html=10.0,
- )
-
- try:
- async with AsyncWebCrawler(config=browser_config) as crawler:
- result = await crawler.arun(url, config=run_config)
- return result
- finally:
- shutil.rmtree(profile_dir, ignore_errors=True)
-
-
-async def main():
- url = "https://www.chanel.com/us/fashion/handbags/c/1x1x1/"
- result = await crawl_chanel(url)
- print(f"Status: {result.status_code}")
- print(f"Success: {result.success}")
- print(f"HTML: {len(result.html):,} bytes")
- if result.markdown:
- md_len = len(result.markdown.raw_markdown)
- print(f"Markdown: {md_len:,} chars")
- if md_len > 500:
- print(f"\nFirst 500 chars of markdown:\n{result.markdown.raw_markdown[:500]}")
- if result.error_message:
- print(f"Error: {result.error_message}")
-
-
-asyncio.run(main())
diff --git a/tests/proxy/test_platform_match.py b/tests/proxy/test_platform_match.py
deleted file mode 100644
index 3e503242..00000000
--- a/tests/proxy/test_platform_match.py
+++ /dev/null
@@ -1,84 +0,0 @@
-"""Test if matching UA to actual platform fixes Akamai detection."""
-import asyncio
-import os
-import shutil
-import uuid
-from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
-from crawl4ai.async_configs import ProxyConfig
-
-
-async def test_with_ua(label, user_agent):
- print(f"\n{'='*60}")
- print(f"Test: {label}")
- print(f"{'='*60}")
-
- profile_dir = os.path.expanduser(f"~/.crawl4ai/test_{uuid.uuid4().hex[:8]}")
- os.makedirs(profile_dir, exist_ok=True)
-
- browser_config = BrowserConfig(
- headless=True,
- enable_stealth=True,
- use_persistent_context=True,
- user_data_dir=profile_dir,
- viewport_width=1920,
- viewport_height=1080,
- user_agent=user_agent,
- headers={
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
- "Accept-Language": "en-US,en;q=0.9",
- "Sec-Fetch-Dest": "document",
- "Sec-Fetch-Mode": "navigate",
- "Sec-Fetch-Site": "none",
- "Sec-Fetch-User": "?1",
- },
- proxy_config=ProxyConfig(
- server="https://network.joinmassive.com:65535",
- username="mpuQHs4sWZ-country-US",
- password="D0yWxVQo8wQ05RWqz1Bn",
- ),
- )
-
- run_config = CrawlerRunConfig(
- magic=True,
- simulate_user=True,
- override_navigator=True,
- page_timeout=120000,
- wait_until="load",
- delay_before_return_html=10.0,
- )
-
- try:
- async with AsyncWebCrawler(config=browser_config) as crawler:
- result = await crawler.arun(
- "https://www.chanel.com/us/fashion/handbags/c/1x1x1/",
- config=run_config,
- )
- print(f" Status: {result.status_code}")
- print(f" HTML bytes: {len(result.html)}")
- blocked = "access denied" in result.html.lower()
- print(f" Blocked: {blocked}")
- if not blocked and len(result.html) > 1000:
- print(f" SUCCESS! Got real content")
- except Exception as e:
- print(f" EXCEPTION: {e}")
- finally:
- shutil.rmtree(profile_dir, ignore_errors=True)
-
-
-async def main():
- # Mac UA on Linux = platform mismatch
- await test_with_ua(
- "Mac UA (mismatched platform)",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
- )
-
- await asyncio.sleep(3)
-
- # Linux UA = matches actual navigator.platform
- await test_with_ua(
- "Linux UA (matching platform)",
- "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
- )
-
-
-asyncio.run(main())
diff --git a/tests/proxy/test_proxy_verify.py b/tests/proxy/test_proxy_verify.py
new file mode 100644
index 00000000..bf9b4f4d
--- /dev/null
+++ b/tests/proxy/test_proxy_verify.py
@@ -0,0 +1,109 @@
+"""
+Verify proxies are working and check what IPs they resolve to.
+Then test Chanel through NST proxy (different provider).
+"""
+import requests
+
+# Check our real IP
+def check_ip(label, proxy=None):
+ print(f"\n--- {label} ---")
+ try:
+ kwargs = {"url": "https://httpbin.org/ip", "timeout": 15}
+ if proxy:
+ kwargs["proxies"] = {"https": proxy, "http": proxy}
+ resp = requests.get(**kwargs)
+ print(f" IP: {resp.json()}")
+ except Exception as e:
+ print(f" ERROR: {e}")
+
+# Get NST proxy credentials
+def get_nst_proxy(channel_id):
+ token = "NSTPROXY-DA9C7A614946EA8FCEFDA9FD3B3F4A9D"
+ api_url = f"https://api.nstproxy.com/api/v1/generate/apiproxies?count=1&country=US&protocol=http&sessionDuration=0&channelId={channel_id}&token={token}"
+ print(f"\nFetching NST proxy ({channel_id[:8]}...):")
+ print(f" URL: {api_url}")
+ try:
+ resp = requests.get(api_url, timeout=15)
+ print(f" HTTP {resp.status_code}")
+ print(f" Body: {resp.text[:500]}")
+ data = resp.json()
+ if data.get("code") == 200 and data.get("data"):
+ proxy_str = data["data"][0]
+ parts = proxy_str.split(":")
+ if len(parts) == 4:
+ ip, port, user, pwd = parts
+ proxy_url = f"http://{user}:{pwd}@{ip}:{port}"
+ print(f" Proxy URL: http://{user[:10]}...@{ip}:{port}")
+ return proxy_url
+ except Exception as e:
+ print(f" ERROR: {e}")
+ return None
+
+# Test Chanel
+def test_chanel(label, proxy=None, use_cffi=False):
+ url = "https://www.chanel.com/us/fashion/handbags/c/1x1x1/"
+ headers = {
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
+ "Accept-Language": "en-US,en;q=0.9",
+ }
+ print(f"\n{'='*60}")
+ print(f"TEST: {label}")
+ try:
+ if use_cffi:
+ from curl_cffi import requests as cffi_requests
+ kwargs = {"url": url, "headers": headers, "impersonate": "chrome", "timeout": 30, "allow_redirects": True}
+ if proxy:
+ kwargs["proxies"] = {"https": proxy, "http": proxy}
+ resp = cffi_requests.get(**kwargs)
+ else:
+ kwargs = {"url": url, "headers": headers, "timeout": 30, "allow_redirects": True}
+ if proxy:
+ kwargs["proxies"] = {"https": proxy, "http": proxy}
+ resp = requests.get(**kwargs)
+
+ blocked = "Access Denied" in resp.text
+ print(f" Status: {resp.status_code}")
+ print(f" Size: {len(resp.text):,} bytes")
+ print(f" Result: {'BLOCKED' if blocked else 'SUCCESS' if resp.status_code == 200 and len(resp.text) > 10000 else 'UNCLEAR'}")
+ if not blocked and resp.status_code == 200:
+ print(f" First 300 chars: {resp.text[:300]}")
+ except Exception as e:
+ print(f" ERROR: {e}")
+
+
+if __name__ == "__main__":
+ MASSIVE_RES = "https://mpuQHs4sWZ-country-US:D0yWxVQo8wQ05RWqz1Bn@network.joinmassive.com:65535"
+ MASSIVE_DC = "http://mpuQHs4sWZ-country-US:D0yWxVQo8wQ05RWqz1Bn@isp.joinmassive.com:8000"
+
+ # Step 1: Verify IPs
+ print("="*60)
+ print("STEP 1: Verify proxy IPs")
+ check_ip("Direct (Hetzner)")
+ check_ip("Massive Residential", MASSIVE_RES)
+ check_ip("Massive Datacenter/ISP", MASSIVE_DC)
+
+ # Step 2: Get NST proxies
+ print("\n" + "="*60)
+ print("STEP 2: Get NST proxy credentials")
+ nst_res = get_nst_proxy("7864DDA266D5899C") # residential
+ nst_dc = get_nst_proxy("AE0C3B5547F8A021") # datacenter
+
+ if nst_res:
+ check_ip("NST Residential", nst_res)
+ if nst_dc:
+ check_ip("NST Datacenter", nst_dc)
+
+ # Step 3: Test Chanel with all available proxies
+ print("\n" + "="*60)
+ print("STEP 3: Test Chanel.com")
+
+ if nst_res:
+ test_chanel("curl_cffi + NST residential", proxy=nst_res, use_cffi=True)
+ test_chanel("plain requests + NST residential", proxy=nst_res, use_cffi=False)
+
+ if nst_dc:
+ test_chanel("curl_cffi + NST datacenter", proxy=nst_dc, use_cffi=True)
+
+ # Also try Massive ISP/datacenter (different from residential)
+ test_chanel("curl_cffi + Massive ISP", proxy=MASSIVE_DC, use_cffi=True)