Files
crawl4ai/tests/proxy/test_proxy_verify.py
unclecode fdd989785f Sync sec-ch-ua with User-Agent and keep WebGL alive in stealth mode
Fix a bug where magic mode and per-request UA overrides would change
the User-Agent header without updating the sec-ch-ua (browser hint)
header to match. Anti-bot systems like Akamai detect this mismatch
as a bot signal.

Changes:
- Regenerate browser_hint via UAGen.generate_client_hints() whenever
  the UA is changed at crawl time (magic mode or explicit override)
- Re-apply updated headers to the page via set_extra_http_headers()
- Skip per-crawl UA override for persistent contexts where the UA is
  locked at launch time by Playwright's protocol layer
- Move --disable-gpu flags behind enable_stealth check so WebGL works
  via SwiftShader when stealth mode is active (missing WebGL is a
  detectable headless signal)
- Clean up old test scripts, add clean anti-bot test
2026-02-13 04:10:47 +00:00

110 lines
4.3 KiB
Python

"""
Verify proxies are working and check what IPs they resolve to.
Then test Chanel through NST proxy (different provider).
"""
import requests
# Check our real IP
def check_ip(label, proxy=None):
print(f"\n--- {label} ---")
try:
kwargs = {"url": "https://httpbin.org/ip", "timeout": 15}
if proxy:
kwargs["proxies"] = {"https": proxy, "http": proxy}
resp = requests.get(**kwargs)
print(f" IP: {resp.json()}")
except Exception as e:
print(f" ERROR: {e}")
# Get NST proxy credentials
def get_nst_proxy(channel_id):
token = "NSTPROXY-DA9C7A614946EA8FCEFDA9FD3B3F4A9D"
api_url = f"https://api.nstproxy.com/api/v1/generate/apiproxies?count=1&country=US&protocol=http&sessionDuration=0&channelId={channel_id}&token={token}"
print(f"\nFetching NST proxy ({channel_id[:8]}...):")
print(f" URL: {api_url}")
try:
resp = requests.get(api_url, timeout=15)
print(f" HTTP {resp.status_code}")
print(f" Body: {resp.text[:500]}")
data = resp.json()
if data.get("code") == 200 and data.get("data"):
proxy_str = data["data"][0]
parts = proxy_str.split(":")
if len(parts) == 4:
ip, port, user, pwd = parts
proxy_url = f"http://{user}:{pwd}@{ip}:{port}"
print(f" Proxy URL: http://{user[:10]}...@{ip}:{port}")
return proxy_url
except Exception as e:
print(f" ERROR: {e}")
return None
# Test Chanel
def test_chanel(label, proxy=None, use_cffi=False):
url = "https://www.chanel.com/us/fashion/handbags/c/1x1x1/"
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.9",
}
print(f"\n{'='*60}")
print(f"TEST: {label}")
try:
if use_cffi:
from curl_cffi import requests as cffi_requests
kwargs = {"url": url, "headers": headers, "impersonate": "chrome", "timeout": 30, "allow_redirects": True}
if proxy:
kwargs["proxies"] = {"https": proxy, "http": proxy}
resp = cffi_requests.get(**kwargs)
else:
kwargs = {"url": url, "headers": headers, "timeout": 30, "allow_redirects": True}
if proxy:
kwargs["proxies"] = {"https": proxy, "http": proxy}
resp = requests.get(**kwargs)
blocked = "Access Denied" in resp.text
print(f" Status: {resp.status_code}")
print(f" Size: {len(resp.text):,} bytes")
print(f" Result: {'BLOCKED' if blocked else 'SUCCESS' if resp.status_code == 200 and len(resp.text) > 10000 else 'UNCLEAR'}")
if not blocked and resp.status_code == 200:
print(f" First 300 chars: {resp.text[:300]}")
except Exception as e:
print(f" ERROR: {e}")
if __name__ == "__main__":
MASSIVE_RES = "https://mpuQHs4sWZ-country-US:D0yWxVQo8wQ05RWqz1Bn@network.joinmassive.com:65535"
MASSIVE_DC = "http://mpuQHs4sWZ-country-US:D0yWxVQo8wQ05RWqz1Bn@isp.joinmassive.com:8000"
# Step 1: Verify IPs
print("="*60)
print("STEP 1: Verify proxy IPs")
check_ip("Direct (Hetzner)")
check_ip("Massive Residential", MASSIVE_RES)
check_ip("Massive Datacenter/ISP", MASSIVE_DC)
# Step 2: Get NST proxies
print("\n" + "="*60)
print("STEP 2: Get NST proxy credentials")
nst_res = get_nst_proxy("7864DDA266D5899C") # residential
nst_dc = get_nst_proxy("AE0C3B5547F8A021") # datacenter
if nst_res:
check_ip("NST Residential", nst_res)
if nst_dc:
check_ip("NST Datacenter", nst_dc)
# Step 3: Test Chanel with all available proxies
print("\n" + "="*60)
print("STEP 3: Test Chanel.com")
if nst_res:
test_chanel("curl_cffi + NST residential", proxy=nst_res, use_cffi=True)
test_chanel("plain requests + NST residential", proxy=nst_res, use_cffi=False)
if nst_dc:
test_chanel("curl_cffi + NST datacenter", proxy=nst_dc, use_cffi=True)
# Also try Massive ISP/datacenter (different from residential)
test_chanel("curl_cffi + Massive ISP", proxy=MASSIVE_DC, use_cffi=True)