Merge pull request #1605 from Nstproxy/feat/nstproxy

feat: Add Nstproxy Proxies
This commit is contained in:
Nasrin
2025-11-12 23:56:14 +08:00
committed by GitHub
17 changed files with 752 additions and 6 deletions

View File

@@ -0,0 +1,62 @@
import asyncio
import capsolver
from crawl4ai import *
# TODO: set your config
# Docs: https://docs.capsolver.com/guide/captcha/awsWaf/
api_key = "CAP-xxxxxxxxxxxxxxxxxxxxx" # your api key of capsolver
site_url = "https://nft.porsche.com/onboarding@6" # page url of your target site
cookie_domain = ".nft.porsche.com" # the domain name to which you want to apply the cookie
captcha_type = "AntiAwsWafTaskProxyLess" # type of your target captcha
capsolver.api_key = api_key
async def main():
browser_config = BrowserConfig(
verbose=True,
headless=False,
use_persistent_context=True,
)
async with AsyncWebCrawler(config=browser_config) as crawler:
await crawler.arun(
url=site_url,
cache_mode=CacheMode.BYPASS,
session_id="session_captcha_test"
)
# get aws waf cookie using capsolver sdk
solution = capsolver.solve({
"type": captcha_type,
"websiteURL": site_url,
})
cookie = solution["cookie"]
print("aws waf cookie:", cookie)
js_code = """
document.cookie = \'aws-waf-token=""" + cookie + """;domain=""" + cookie_domain + """;path=/\';
location.reload();
"""
wait_condition = """() => {
return document.title === \'Join Porsches journey into Web3\';
}"""
run_config = CrawlerRunConfig(
cache_mode=CacheMode.BYPASS,
session_id="session_captcha_test",
js_code=js_code,
js_only=True,
wait_for=f"js:{wait_condition}"
)
result_next = await crawler.arun(
url=site_url,
config=run_config,
)
print(result_next.markdown)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,60 @@
import asyncio
import capsolver
from crawl4ai import *
# TODO: set your config
# Docs: https://docs.capsolver.com/guide/captcha/cloudflare_challenge/
api_key = "CAP-xxxxxxxxxxxxxxxxxxxxx" # your api key of capsolver
site_url = "https://gitlab.com/users/sign_in" # page url of your target site
captcha_type = "AntiCloudflareTask" # type of your target captcha
# your http proxy to solve cloudflare challenge
proxy_server = "proxy.example.com:8080"
proxy_username = "myuser"
proxy_password = "mypass"
capsolver.api_key = api_key
async def main():
# get challenge cookie using capsolver sdk
solution = capsolver.solve({
"type": captcha_type,
"websiteURL": site_url,
"proxy": f"{proxy_server}:{proxy_username}:{proxy_password}",
})
cookies = solution["cookies"]
user_agent = solution["userAgent"]
print("challenge cookies:", cookies)
cookies_list = []
for name, value in cookies.items():
cookies_list.append({
"name": name,
"value": value,
"url": site_url,
})
browser_config = BrowserConfig(
verbose=True,
headless=False,
use_persistent_context=True,
user_agent=user_agent,
cookies=cookies_list,
proxy_config={
"server": f"http://{proxy_server}",
"username": proxy_username,
"password": proxy_password,
},
)
async with AsyncWebCrawler(config=browser_config) as crawler:
result = await crawler.arun(
url=site_url,
cache_mode=CacheMode.BYPASS,
session_id="session_captcha_test"
)
print(result.markdown)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,64 @@
import asyncio
import capsolver
from crawl4ai import *
# TODO: set your config
# Docs: https://docs.capsolver.com/guide/captcha/cloudflare_turnstile/
api_key = "CAP-xxxxxxxxxxxxxxxxxxxxx" # your api key of capsolver
site_key = "0x4AAAAAAAGlwMzq_9z6S9Mh" # site key of your target site
site_url = "https://clifford.io/demo/cloudflare-turnstile" # page url of your target site
captcha_type = "AntiTurnstileTaskProxyLess" # type of your target captcha
capsolver.api_key = api_key
async def main():
browser_config = BrowserConfig(
verbose=True,
headless=False,
use_persistent_context=True,
)
async with AsyncWebCrawler(config=browser_config) as crawler:
await crawler.arun(
url=site_url,
cache_mode=CacheMode.BYPASS,
session_id="session_captcha_test"
)
# get turnstile token using capsolver sdk
solution = capsolver.solve({
"type": captcha_type,
"websiteURL": site_url,
"websiteKey": site_key,
})
token = solution["token"]
print("turnstile token:", token)
js_code = """
document.querySelector(\'input[name="cf-turnstile-response"]\').value = \'"""+token+"""\';
document.querySelector(\'button[type="submit"]\').click();
"""
wait_condition = """() => {
const items = document.querySelectorAll(\'h1\');
return items.length === 0;
}"""
run_config = CrawlerRunConfig(
cache_mode=CacheMode.BYPASS,
session_id="session_captcha_test",
js_code=js_code,
js_only=True,
wait_for=f"js:{wait_condition}"
)
result_next = await crawler.arun(
url=site_url,
config=run_config,
)
print(result_next.markdown)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,67 @@
import asyncio
import capsolver
from crawl4ai import *
# TODO: set your config
# Docs: https://docs.capsolver.com/guide/captcha/ReCaptchaV2/
api_key = "CAP-xxxxxxxxxxxxxxxxxxxxx" # your api key of capsolver
site_key = "6LfW6wATAAAAAHLqO2pb8bDBahxlMxNdo9g947u9" # site key of your target site
site_url = "https://recaptcha-demo.appspot.com/recaptcha-v2-checkbox.php" # page url of your target site
captcha_type = "ReCaptchaV2TaskProxyLess" # type of your target captcha
capsolver.api_key = api_key
async def main():
browser_config = BrowserConfig(
verbose=True,
headless=False,
use_persistent_context=True,
)
async with AsyncWebCrawler(config=browser_config) as crawler:
await crawler.arun(
url=site_url,
cache_mode=CacheMode.BYPASS,
session_id="session_captcha_test"
)
# get recaptcha token using capsolver sdk
solution = capsolver.solve({
"type": captcha_type,
"websiteURL": site_url,
"websiteKey": site_key,
})
token = solution["gRecaptchaResponse"]
print("recaptcha token:", token)
js_code = """
const textarea = document.getElementById(\'g-recaptcha-response\');
if (textarea) {
textarea.value = \"""" + token + """\";
document.querySelector(\'button.form-field[type="submit"]\').click();
}
"""
wait_condition = """() => {
const items = document.querySelectorAll(\'h2\');
return items.length > 1;
}"""
run_config = CrawlerRunConfig(
cache_mode=CacheMode.BYPASS,
session_id="session_captcha_test",
js_code=js_code,
js_only=True,
wait_for=f"js:{wait_condition}"
)
result_next = await crawler.arun(
url=site_url,
config=run_config,
)
print(result_next.markdown)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,75 @@
import asyncio
import capsolver
from crawl4ai import *
# TODO: set your config
# Docs: https://docs.capsolver.com/guide/captcha/ReCaptchaV3/
api_key = "CAP-xxxxxxxxxxxxxxxxxxxxx" # your api key of capsolver
site_key = "6LdKlZEpAAAAAAOQjzC2v_d36tWxCl6dWsozdSy9" # site key of your target site
site_url = "https://recaptcha-demo.appspot.com/recaptcha-v3-request-scores.php" # page url of your target site
page_action = "examples/v3scores" # page action of your target site
captcha_type = "ReCaptchaV3TaskProxyLess" # type of your target captcha
capsolver.api_key = api_key
async def main():
browser_config = BrowserConfig(
verbose=True,
headless=False,
use_persistent_context=True,
)
# get recaptcha token using capsolver sdk
solution = capsolver.solve({
"type": captcha_type,
"websiteURL": site_url,
"websiteKey": site_key,
"pageAction": page_action,
})
token = solution["gRecaptchaResponse"]
print("recaptcha token:", token)
async with AsyncWebCrawler(config=browser_config) as crawler:
await crawler.arun(
url=site_url,
cache_mode=CacheMode.BYPASS,
session_id="session_captcha_test"
)
js_code = """
const originalFetch = window.fetch;
window.fetch = function(...args) {
if (typeof args[0] === 'string' && args[0].includes('/recaptcha-v3-verify.php')) {
const url = new URL(args[0], window.location.origin);
url.searchParams.set('action', '""" + token + """');
args[0] = url.toString();
document.querySelector('.token').innerHTML = "fetch('/recaptcha-v3-verify.php?action=examples/v3scores&token=""" + token + """')";
console.log('Fetch URL hooked:', args[0]);
}
return originalFetch.apply(this, args);
};
"""
wait_condition = """() => {
return document.querySelector('.step3:not(.hidden)');
}"""
run_config = CrawlerRunConfig(
cache_mode=CacheMode.BYPASS,
session_id="session_captcha_test",
js_code=js_code,
js_only=True,
wait_for=f"js:{wait_condition}"
)
result_next = await crawler.arun(
url=site_url,
config=run_config,
)
print(result_next.markdown)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,36 @@
import time
import asyncio
from crawl4ai import *
# TODO: the user data directory that includes the capsolver extension
user_data_dir = "/browser-profile/Default1"
"""
The capsolver extension supports more features, such as:
- Telling the extension when to start solving captcha.
- Calling functions to check whether the captcha has been solved, etc.
Reference blog: https://docs.capsolver.com/guide/automation-tool-integration/
"""
browser_config = BrowserConfig(
verbose=True,
headless=False,
user_data_dir=user_data_dir,
use_persistent_context=True,
)
async def main():
async with AsyncWebCrawler(config=browser_config) as crawler:
result_initial = await crawler.arun(
url="https://nft.porsche.com/onboarding@6",
cache_mode=CacheMode.BYPASS,
session_id="session_captcha_test"
)
# do something later
time.sleep(300)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,36 @@
import time
import asyncio
from crawl4ai import *
# TODO: the user data directory that includes the capsolver extension
user_data_dir = "/browser-profile/Default1"
"""
The capsolver extension supports more features, such as:
- Telling the extension when to start solving captcha.
- Calling functions to check whether the captcha has been solved, etc.
Reference blog: https://docs.capsolver.com/guide/automation-tool-integration/
"""
browser_config = BrowserConfig(
verbose=True,
headless=False,
user_data_dir=user_data_dir,
use_persistent_context=True,
)
async def main():
async with AsyncWebCrawler(config=browser_config) as crawler:
result_initial = await crawler.arun(
url="https://gitlab.com/users/sign_in",
cache_mode=CacheMode.BYPASS,
session_id="session_captcha_test"
)
# do something later
time.sleep(300)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,36 @@
import time
import asyncio
from crawl4ai import *
# TODO: the user data directory that includes the capsolver extension
user_data_dir = "/browser-profile/Default1"
"""
The capsolver extension supports more features, such as:
- Telling the extension when to start solving captcha.
- Calling functions to check whether the captcha has been solved, etc.
Reference blog: https://docs.capsolver.com/guide/automation-tool-integration/
"""
browser_config = BrowserConfig(
verbose=True,
headless=False,
user_data_dir=user_data_dir,
use_persistent_context=True,
)
async def main():
async with AsyncWebCrawler(config=browser_config) as crawler:
result_initial = await crawler.arun(
url="https://clifford.io/demo/cloudflare-turnstile",
cache_mode=CacheMode.BYPASS,
session_id="session_captcha_test"
)
# do something later
time.sleep(300)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,36 @@
import time
import asyncio
from crawl4ai import *
# TODO: the user data directory that includes the capsolver extension
user_data_dir = "/browser-profile/Default1"
"""
The capsolver extension supports more features, such as:
- Telling the extension when to start solving captcha.
- Calling functions to check whether the captcha has been solved, etc.
Reference blog: https://docs.capsolver.com/guide/automation-tool-integration/
"""
browser_config = BrowserConfig(
verbose=True,
headless=False,
user_data_dir=user_data_dir,
use_persistent_context=True,
)
async def main():
async with AsyncWebCrawler(config=browser_config) as crawler:
result_initial = await crawler.arun(
url="https://recaptcha-demo.appspot.com/recaptcha-v2-checkbox.php",
cache_mode=CacheMode.BYPASS,
session_id="session_captcha_test"
)
# do something later
time.sleep(300)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,36 @@
import time
import asyncio
from crawl4ai import *
# TODO: the user data directory that includes the capsolver extension
user_data_dir = "/browser-profile/Default1"
"""
The capsolver extension supports more features, such as:
- Telling the extension when to start solving captcha.
- Calling functions to check whether the captcha has been solved, etc.
Reference blog: https://docs.capsolver.com/guide/automation-tool-integration/
"""
browser_config = BrowserConfig(
verbose=True,
headless=False,
user_data_dir=user_data_dir,
use_persistent_context=True,
)
async def main():
async with AsyncWebCrawler(config=browser_config) as crawler:
result_initial = await crawler.arun(
url="https://recaptcha-demo.appspot.com/recaptcha-v3-request-scores.php",
cache_mode=CacheMode.BYPASS,
session_id="session_captcha_test"
)
# do something later
time.sleep(300)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,48 @@
"""
NSTProxy Integration Examples for crawl4ai
------------------------------------------
NSTProxy is a premium residential proxy provider.
👉 Purchase Proxies: https://nstproxy.com
💰 Use coupon code "crawl4ai" for 10% off your plan.
"""
import asyncio, requests
from crawl4ai import AsyncWebCrawler, BrowserConfig
async def main():
"""
Example: Dynamically fetch a proxy from NSTProxy API before crawling.
"""
NST_TOKEN = "YOUR_NST_PROXY_TOKEN" # Get from https://app.nstproxy.com/profile
CHANNEL_ID = "YOUR_NST_PROXY_CHANNEL_ID" # Your NSTProxy Channel ID
country = "ANY" # e.g. "ANY", "US", "DE"
# Fetch proxy from NSTProxy API
api_url = (
f"https://api.nstproxy.com/api/v1/generate/apiproxies"
f"?fType=2&channelId={CHANNEL_ID}&country={country}"
f"&protocol=http&sessionDuration=10&count=1&token={NST_TOKEN}"
)
response = requests.get(api_url, timeout=10).json()
proxy = response[0]
ip = proxy.get("ip")
port = proxy.get("port")
username = proxy.get("username", "")
password = proxy.get("password", "")
browser_config = BrowserConfig(proxy_config={
"server": f"http://{ip}:{port}",
"username": username,
"password": password,
})
async with AsyncWebCrawler(config=browser_config) as crawler:
result = await crawler.arun(url="https://example.com")
print("[API Proxy] Status:", result.status_code)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,31 @@
"""
NSTProxy Integration Examples for crawl4ai
------------------------------------------
NSTProxy is a premium residential proxy provider.
👉 Purchase Proxies: https://nstproxy.com
💰 Use coupon code "crawl4ai" for 10% off your plan.
"""
import asyncio
from crawl4ai import AsyncWebCrawler, BrowserConfig
async def main():
"""
Example: Use NSTProxy with manual username/password authentication.
"""
browser_config = BrowserConfig(proxy_config={
"server": "http://gate.nstproxy.io:24125",
"username": "your_username",
"password": "your_password",
})
async with AsyncWebCrawler(config=browser_config) as crawler:
result = await crawler.arun(url="https://example.com")
print("[Auth Proxy] Status:", result.status_code)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,29 @@
"""
NSTProxy Integration Examples for crawl4ai
------------------------------------------
NSTProxy is a premium residential proxy provider.
👉 Purchase Proxies: https://nstproxy.com
💰 Use coupon code "crawl4ai" for 10% off your plan.
"""
import asyncio
from crawl4ai import AsyncWebCrawler, BrowserConfig
async def main():
# Using HTTP proxy
browser_config = BrowserConfig(proxy_config={"server": "http://gate.nstproxy.io:24125"})
async with AsyncWebCrawler(config=browser_config) as crawler:
result = await crawler.arun(url="https://example.com")
print("[HTTP Proxy] Status:", result.status_code)
# Using SOCKS proxy
browser_config = BrowserConfig(proxy_config={"server": "socks5://gate.nstproxy.io:24125"})
async with AsyncWebCrawler(config=browser_config) as crawler:
result = await crawler.arun(url="https://example.com")
print("[SOCKS5 Proxy] Status:", result.status_code)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,39 @@
"""
NSTProxy Integration Examples for crawl4ai
------------------------------------------
NSTProxy is a premium residential proxy provider.
👉 Purchase Proxies: https://nstproxy.com
💰 Use coupon code "crawl4ai" for 10% off your plan.
"""
import asyncio
from crawl4ai import AsyncWebCrawler, BrowserConfig
async def main():
"""
Example: Using NSTProxy with AsyncWebCrawler.
"""
NST_TOKEN = "YOUR_NST_PROXY_TOKEN" # Get from https://app.nstproxy.com/profile
CHANNEL_ID = "YOUR_NST_PROXY_CHANNEL_ID" # Your NSTProxy Channel ID
browser_config = BrowserConfig()
browser_config.set_nstproxy(
token=NST_TOKEN,
channel_id=CHANNEL_ID,
country="ANY", # e.g. "US", "JP", or "ANY"
state="", # optional, leave empty if not needed
city="", # optional, leave empty if not needed
session_duration=0 # Session duration in minutes,0 = rotate on every request
)
# === Run crawler ===
async with AsyncWebCrawler(config=browser_config) as crawler:
result = await crawler.arun(url="https://example.com")
print("[Nstproxy] Status:", result.status_code)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -11,6 +11,12 @@ This page provides a comprehensive list of example scripts that demonstrate vari
| Quickstart Set 1 | Basic examples for getting started with Crawl4AI. | [View Code](https://github.com/unclecode/crawl4ai/blob/main/docs/examples/quickstart_examples_set_1.py) |
| Quickstart Set 2 | More advanced examples for working with Crawl4AI. | [View Code](https://github.com/unclecode/crawl4ai/blob/main/docs/examples/quickstart_examples_set_2.py) |
## Proxies
| Example | Description | Link |
|----------|--------------|------|
| **NSTProxy** | [NSTProxy](https://www.nstproxy.com/?utm_source=crawl4ai) Seamlessly integrates with crawl4ai — no setup required. Access high-performance residential, datacenter, ISP, and IPv6 proxies with smart rotation and anti-blocking technology. Starts from $0.1/GB. Use code crawl4ai for 10% off. | [View Code](https://github.com/unclecode/crawl4ai/tree/main/docs/examples/proxy) |
## Browser & Crawling Features
| Example | Description | Link |
@@ -56,13 +62,14 @@ This page provides a comprehensive list of example scripts that demonstrate vari
## Anti-Bot & Stealth Features
| Example | Description | Link |
|---------|-------------|------|
| Stealth Mode Quick Start | Five practical examples showing how to use stealth mode for bypassing basic bot detection. | [View Code](https://github.com/unclecode/crawl4ai/blob/main/docs/examples/stealth_mode_quick_start.py) |
| Example | Description | Link |
|----------------------------|-------------|------|
| Stealth Mode Quick Start | Five practical examples showing how to use stealth mode for bypassing basic bot detection. | [View Code](https://github.com/unclecode/crawl4ai/blob/main/docs/examples/stealth_mode_quick_start.py) |
| Stealth Mode Comprehensive | Comprehensive demonstration of stealth mode features with bot detection testing and comparisons. | [View Code](https://github.com/unclecode/crawl4ai/blob/main/docs/examples/stealth_mode_example.py) |
| Undetected Browser | Simple example showing how to use the undetected browser adapter. | [View Code](https://github.com/unclecode/crawl4ai/blob/main/docs/examples/hello_world_undetected.py) |
| Undetected Browser Demo | Basic demo comparing regular and undetected browser modes. | [View Code](https://github.com/unclecode/crawl4ai/blob/main/docs/examples/undetected_simple_demo.py) |
| Undetected Tests | Advanced tests comparing regular vs undetected browsers on various bot detection services. | [View Folder](https://github.com/unclecode/crawl4ai/tree/main/docs/examples/undetectability/) |
| Undetected Browser | Simple example showing how to use the undetected browser adapter. | [View Code](https://github.com/unclecode/crawl4ai/blob/main/docs/examples/hello_world_undetected.py) |
| Undetected Browser Demo | Basic demo comparing regular and undetected browser modes. | [View Code](https://github.com/unclecode/crawl4ai/blob/main/docs/examples/undetected_simple_demo.py) |
| Undetected Tests | Advanced tests comparing regular vs undetected browsers on various bot detection services. | [View Folder](https://github.com/unclecode/crawl4ai/tree/main/docs/examples/undetectability/) |
| CapSolver Captcha Solver | Seamlessly integrate with [CapSolver](https://www.capsolver.com/?utm_source=crawl4ai&utm_medium=github_pr&utm_campaign=crawl4ai_integration) to automatically solve reCAPTCHA v2/v3, Cloudflare Turnstile / Challenges, AWS WAF and more for uninterrupted scraping and automation. | [View Folder](https://github.com/unclecode/crawl4ai/tree/main/docs/examples/capsolver_captcha_solver/) |
## Customization & Security