feat: Add Nstproxy Proxies
This commit is contained in:
@@ -1,6 +1,7 @@
|
|||||||
import os
|
import os
|
||||||
from typing import Union
|
from typing import Union
|
||||||
import warnings
|
import warnings
|
||||||
|
import requests
|
||||||
from .config import (
|
from .config import (
|
||||||
DEFAULT_PROVIDER,
|
DEFAULT_PROVIDER,
|
||||||
DEFAULT_PROVIDER_API_KEY,
|
DEFAULT_PROVIDER_API_KEY,
|
||||||
@@ -649,6 +650,85 @@ class BrowserConfig:
|
|||||||
return config
|
return config
|
||||||
return BrowserConfig.from_kwargs(config)
|
return BrowserConfig.from_kwargs(config)
|
||||||
|
|
||||||
|
def set_nstproxy(
|
||||||
|
self,
|
||||||
|
token: str,
|
||||||
|
channel_id: str,
|
||||||
|
country: str = "ANY",
|
||||||
|
state: str = "",
|
||||||
|
city: str = "",
|
||||||
|
protocol: str = "http",
|
||||||
|
session_duration: int = 10,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Fetch a proxy from NSTProxy API and automatically assign it to proxy_config.
|
||||||
|
|
||||||
|
Get your NSTProxy token from: https://app.nstproxy.com/profile
|
||||||
|
|
||||||
|
Args:
|
||||||
|
token (str): NSTProxy API token.
|
||||||
|
channel_id (str): NSTProxy channel ID.
|
||||||
|
country (str, optional): Country code (default: "ANY").
|
||||||
|
state (str, optional): State code (default: "").
|
||||||
|
city (str, optional): City name (default: "").
|
||||||
|
protocol (str, optional): Proxy protocol ("http" or "socks5"). Defaults to "http".
|
||||||
|
session_duration (int, optional): Session duration in minutes (0 = rotate each request). Defaults to 10.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If the API response format is invalid.
|
||||||
|
PermissionError: If the API returns an error message.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# --- Validate input early ---
|
||||||
|
if not token or not channel_id:
|
||||||
|
raise ValueError("[NSTProxy] token and channel_id are required")
|
||||||
|
|
||||||
|
if protocol not in ("http", "socks5"):
|
||||||
|
raise ValueError(f"[NSTProxy] Invalid protocol: {protocol}")
|
||||||
|
|
||||||
|
# --- Build NSTProxy API URL ---
|
||||||
|
params = {
|
||||||
|
"fType": 2,
|
||||||
|
"count": 1,
|
||||||
|
"channelId": channel_id,
|
||||||
|
"country": country,
|
||||||
|
"protocol": protocol,
|
||||||
|
"sessionDuration": session_duration,
|
||||||
|
"token": token,
|
||||||
|
}
|
||||||
|
if state:
|
||||||
|
params["state"] = state
|
||||||
|
if city:
|
||||||
|
params["city"] = city
|
||||||
|
|
||||||
|
url = "https://api.nstproxy.com/api/v1/generate/apiproxies"
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.get(url, params=params, timeout=10)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
# --- Handle API error response ---
|
||||||
|
if isinstance(data, dict) and data.get("err"):
|
||||||
|
raise PermissionError(f"[NSTProxy] API Error: {data.get('msg', 'Unknown error')}")
|
||||||
|
|
||||||
|
if not isinstance(data, list) or not data:
|
||||||
|
raise ValueError("[NSTProxy] Invalid API response — expected a non-empty list")
|
||||||
|
|
||||||
|
proxy_info = data[0]
|
||||||
|
|
||||||
|
# --- Apply proxy config ---
|
||||||
|
self.proxy_config = ProxyConfig(
|
||||||
|
server=f"{protocol}://{proxy_info['ip']}:{proxy_info['port']}",
|
||||||
|
username=proxy_info["username"],
|
||||||
|
password=proxy_info["password"],
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[NSTProxy] ❌ Failed to set proxy: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
class VirtualScrollConfig:
|
class VirtualScrollConfig:
|
||||||
"""Configuration for virtual scroll handling.
|
"""Configuration for virtual scroll handling.
|
||||||
|
|
||||||
|
|||||||
48
docs/examples/proxy/api_proxy_example.py
Normal file
48
docs/examples/proxy/api_proxy_example.py
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
"""
|
||||||
|
NSTProxy Integration Examples for crawl4ai
|
||||||
|
------------------------------------------
|
||||||
|
|
||||||
|
NSTProxy is a premium residential proxy provider.
|
||||||
|
👉 Purchase Proxies: https://nstproxy.com
|
||||||
|
💰 Use coupon code "crawl4ai" for 10% off your plan.
|
||||||
|
|
||||||
|
"""
|
||||||
|
import asyncio, requests
|
||||||
|
from crawl4ai import AsyncWebCrawler, BrowserConfig
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
"""
|
||||||
|
Example: Dynamically fetch a proxy from NSTProxy API before crawling.
|
||||||
|
"""
|
||||||
|
NST_TOKEN = "YOUR_NST_PROXY_TOKEN" # Get from https://app.nstproxy.com/profile
|
||||||
|
CHANNEL_ID = "YOUR_NST_PROXY_CHANNEL_ID" # Your NSTProxy Channel ID
|
||||||
|
country = "ANY" # e.g. "ANY", "US", "DE"
|
||||||
|
|
||||||
|
# Fetch proxy from NSTProxy API
|
||||||
|
api_url = (
|
||||||
|
f"https://api.nstproxy.com/api/v1/generate/apiproxies"
|
||||||
|
f"?fType=2&channelId={CHANNEL_ID}&country={country}"
|
||||||
|
f"&protocol=http&sessionDuration=10&count=1&token={NST_TOKEN}"
|
||||||
|
)
|
||||||
|
response = requests.get(api_url, timeout=10).json()
|
||||||
|
proxy = response[0]
|
||||||
|
|
||||||
|
ip = proxy.get("ip")
|
||||||
|
port = proxy.get("port")
|
||||||
|
username = proxy.get("username", "")
|
||||||
|
password = proxy.get("password", "")
|
||||||
|
|
||||||
|
browser_config = BrowserConfig(proxy_config={
|
||||||
|
"server": f"http://{ip}:{port}",
|
||||||
|
"username": username,
|
||||||
|
"password": password,
|
||||||
|
})
|
||||||
|
|
||||||
|
async with AsyncWebCrawler(config=browser_config) as crawler:
|
||||||
|
result = await crawler.arun(url="https://example.com")
|
||||||
|
print("[API Proxy] Status:", result.status_code)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
31
docs/examples/proxy/auth_proxy_example.py
Normal file
31
docs/examples/proxy/auth_proxy_example.py
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
"""
|
||||||
|
NSTProxy Integration Examples for crawl4ai
|
||||||
|
------------------------------------------
|
||||||
|
|
||||||
|
NSTProxy is a premium residential proxy provider.
|
||||||
|
👉 Purchase Proxies: https://nstproxy.com
|
||||||
|
💰 Use coupon code "crawl4ai" for 10% off your plan.
|
||||||
|
|
||||||
|
"""
|
||||||
|
import asyncio
|
||||||
|
from crawl4ai import AsyncWebCrawler, BrowserConfig
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
"""
|
||||||
|
Example: Use NSTProxy with manual username/password authentication.
|
||||||
|
"""
|
||||||
|
|
||||||
|
browser_config = BrowserConfig(proxy_config={
|
||||||
|
"server": "http://gate.nstproxy.io:24125",
|
||||||
|
"username": "your_username",
|
||||||
|
"password": "your_password",
|
||||||
|
})
|
||||||
|
|
||||||
|
async with AsyncWebCrawler(config=browser_config) as crawler:
|
||||||
|
result = await crawler.arun(url="https://example.com")
|
||||||
|
print("[Auth Proxy] Status:", result.status_code)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
29
docs/examples/proxy/basic_proxy_example.py
Normal file
29
docs/examples/proxy/basic_proxy_example.py
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
"""
|
||||||
|
NSTProxy Integration Examples for crawl4ai
|
||||||
|
------------------------------------------
|
||||||
|
|
||||||
|
NSTProxy is a premium residential proxy provider.
|
||||||
|
👉 Purchase Proxies: https://nstproxy.com
|
||||||
|
💰 Use coupon code "crawl4ai" for 10% off your plan.
|
||||||
|
|
||||||
|
"""
|
||||||
|
import asyncio
|
||||||
|
from crawl4ai import AsyncWebCrawler, BrowserConfig
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
# Using HTTP proxy
|
||||||
|
browser_config = BrowserConfig(proxy_config={"server": "http://gate.nstproxy.io:24125"})
|
||||||
|
async with AsyncWebCrawler(config=browser_config) as crawler:
|
||||||
|
result = await crawler.arun(url="https://example.com")
|
||||||
|
print("[HTTP Proxy] Status:", result.status_code)
|
||||||
|
|
||||||
|
# Using SOCKS proxy
|
||||||
|
browser_config = BrowserConfig(proxy_config={"server": "socks5://gate.nstproxy.io:24125"})
|
||||||
|
async with AsyncWebCrawler(config=browser_config) as crawler:
|
||||||
|
result = await crawler.arun(url="https://example.com")
|
||||||
|
print("[SOCKS5 Proxy] Status:", result.status_code)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
39
docs/examples/proxy/nstproxy_example.py
Normal file
39
docs/examples/proxy/nstproxy_example.py
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
"""
|
||||||
|
NSTProxy Integration Examples for crawl4ai
|
||||||
|
------------------------------------------
|
||||||
|
|
||||||
|
NSTProxy is a premium residential proxy provider.
|
||||||
|
👉 Purchase Proxies: https://nstproxy.com
|
||||||
|
💰 Use coupon code "crawl4ai" for 10% off your plan.
|
||||||
|
|
||||||
|
"""
|
||||||
|
import asyncio
|
||||||
|
from crawl4ai import AsyncWebCrawler, BrowserConfig
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
"""
|
||||||
|
Example: Using NSTProxy with AsyncWebCrawler.
|
||||||
|
"""
|
||||||
|
|
||||||
|
NST_TOKEN = "YOUR_NST_PROXY_TOKEN" # Get from https://app.nstproxy.com/profile
|
||||||
|
CHANNEL_ID = "YOUR_NST_PROXY_CHANNEL_ID" # Your NSTProxy Channel ID
|
||||||
|
|
||||||
|
browser_config = BrowserConfig()
|
||||||
|
browser_config.set_nstproxy(
|
||||||
|
token=NST_TOKEN,
|
||||||
|
channel_id=CHANNEL_ID,
|
||||||
|
country="ANY", # e.g. "US", "JP", or "ANY"
|
||||||
|
state="", # optional, leave empty if not needed
|
||||||
|
city="", # optional, leave empty if not needed
|
||||||
|
session_duration=0 # Session duration in minutes,0 = rotate on every request
|
||||||
|
)
|
||||||
|
|
||||||
|
# === Run crawler ===
|
||||||
|
async with AsyncWebCrawler(config=browser_config) as crawler:
|
||||||
|
result = await crawler.arun(url="https://example.com")
|
||||||
|
print("[Nstproxy] Status:", result.status_code)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
@@ -11,6 +11,12 @@ This page provides a comprehensive list of example scripts that demonstrate vari
|
|||||||
| Quickstart Set 1 | Basic examples for getting started with Crawl4AI. | [View Code](https://github.com/unclecode/crawl4ai/blob/main/docs/examples/quickstart_examples_set_1.py) |
|
| Quickstart Set 1 | Basic examples for getting started with Crawl4AI. | [View Code](https://github.com/unclecode/crawl4ai/blob/main/docs/examples/quickstart_examples_set_1.py) |
|
||||||
| Quickstart Set 2 | More advanced examples for working with Crawl4AI. | [View Code](https://github.com/unclecode/crawl4ai/blob/main/docs/examples/quickstart_examples_set_2.py) |
|
| Quickstart Set 2 | More advanced examples for working with Crawl4AI. | [View Code](https://github.com/unclecode/crawl4ai/blob/main/docs/examples/quickstart_examples_set_2.py) |
|
||||||
|
|
||||||
|
## Proxies
|
||||||
|
|
||||||
|
| Example | Description | Link |
|
||||||
|
|----------|--------------|------|
|
||||||
|
| **NSTProxy** | [NSTProxy](https://www.nstproxy.com/?utm_source=crawl4ai) Seamlessly integrates with crawl4ai — no setup required. Access high-performance residential, datacenter, ISP, and IPv6 proxies with smart rotation and anti-blocking technology. Starts from $0.1/GB. Use code crawl4ai for 10% off. | [View Code](https://github.com/unclecode/crawl4ai/tree/main/docs/examples/proxy) |
|
||||||
|
|
||||||
## Browser & Crawling Features
|
## Browser & Crawling Features
|
||||||
|
|
||||||
| Example | Description | Link |
|
| Example | Description | Link |
|
||||||
|
|||||||
Reference in New Issue
Block a user