diff --git a/crawl4ai/async_configs.py b/crawl4ai/async_configs.py index 50fe99ba..bfa0d398 100644 --- a/crawl4ai/async_configs.py +++ b/crawl4ai/async_configs.py @@ -1,6 +1,7 @@ import os from typing import Union import warnings +import requests from .config import ( DEFAULT_PROVIDER, DEFAULT_PROVIDER_API_KEY, @@ -649,6 +650,85 @@ class BrowserConfig: return config return BrowserConfig.from_kwargs(config) + def set_nstproxy( + self, + token: str, + channel_id: str, + country: str = "ANY", + state: str = "", + city: str = "", + protocol: str = "http", + session_duration: int = 10, + ): + """ + Fetch a proxy from NSTProxy API and automatically assign it to proxy_config. + + Get your NSTProxy token from: https://app.nstproxy.com/profile + + Args: + token (str): NSTProxy API token. + channel_id (str): NSTProxy channel ID. + country (str, optional): Country code (default: "ANY"). + state (str, optional): State code (default: ""). + city (str, optional): City name (default: ""). + protocol (str, optional): Proxy protocol ("http" or "socks5"). Defaults to "http". + session_duration (int, optional): Session duration in minutes (0 = rotate each request). Defaults to 10. + + Raises: + ValueError: If the API response format is invalid. + PermissionError: If the API returns an error message. + """ + + # --- Validate input early --- + if not token or not channel_id: + raise ValueError("[NSTProxy] token and channel_id are required") + + if protocol not in ("http", "socks5"): + raise ValueError(f"[NSTProxy] Invalid protocol: {protocol}") + + # --- Build NSTProxy API URL --- + params = { + "fType": 2, + "count": 1, + "channelId": channel_id, + "country": country, + "protocol": protocol, + "sessionDuration": session_duration, + "token": token, + } + if state: + params["state"] = state + if city: + params["city"] = city + + url = "https://api.nstproxy.com/api/v1/generate/apiproxies" + + try: + response = requests.get(url, params=params, timeout=10) + response.raise_for_status() + + data = response.json() + + # --- Handle API error response --- + if isinstance(data, dict) and data.get("err"): + raise PermissionError(f"[NSTProxy] API Error: {data.get('msg', 'Unknown error')}") + + if not isinstance(data, list) or not data: + raise ValueError("[NSTProxy] Invalid API response — expected a non-empty list") + + proxy_info = data[0] + + # --- Apply proxy config --- + self.proxy_config = ProxyConfig( + server=f"{protocol}://{proxy_info['ip']}:{proxy_info['port']}", + username=proxy_info["username"], + password=proxy_info["password"], + ) + + except Exception as e: + print(f"[NSTProxy] ❌ Failed to set proxy: {e}") + raise + class VirtualScrollConfig: """Configuration for virtual scroll handling. diff --git a/docs/examples/proxy/api_proxy_example.py b/docs/examples/proxy/api_proxy_example.py new file mode 100644 index 00000000..11847697 --- /dev/null +++ b/docs/examples/proxy/api_proxy_example.py @@ -0,0 +1,48 @@ +""" +NSTProxy Integration Examples for crawl4ai +------------------------------------------ + +NSTProxy is a premium residential proxy provider. +👉 Purchase Proxies: https://nstproxy.com +💰 Use coupon code "crawl4ai" for 10% off your plan. + +""" +import asyncio, requests +from crawl4ai import AsyncWebCrawler, BrowserConfig + + +async def main(): + """ + Example: Dynamically fetch a proxy from NSTProxy API before crawling. + """ + NST_TOKEN = "YOUR_NST_PROXY_TOKEN" # Get from https://app.nstproxy.com/profile + CHANNEL_ID = "YOUR_NST_PROXY_CHANNEL_ID" # Your NSTProxy Channel ID + country = "ANY" # e.g. "ANY", "US", "DE" + + # Fetch proxy from NSTProxy API + api_url = ( + f"https://api.nstproxy.com/api/v1/generate/apiproxies" + f"?fType=2&channelId={CHANNEL_ID}&country={country}" + f"&protocol=http&sessionDuration=10&count=1&token={NST_TOKEN}" + ) + response = requests.get(api_url, timeout=10).json() + proxy = response[0] + + ip = proxy.get("ip") + port = proxy.get("port") + username = proxy.get("username", "") + password = proxy.get("password", "") + + browser_config = BrowserConfig(proxy_config={ + "server": f"http://{ip}:{port}", + "username": username, + "password": password, + }) + + async with AsyncWebCrawler(config=browser_config) as crawler: + result = await crawler.arun(url="https://example.com") + print("[API Proxy] Status:", result.status_code) + + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/docs/examples/proxy/auth_proxy_example.py b/docs/examples/proxy/auth_proxy_example.py new file mode 100644 index 00000000..6fb838b4 --- /dev/null +++ b/docs/examples/proxy/auth_proxy_example.py @@ -0,0 +1,31 @@ +""" +NSTProxy Integration Examples for crawl4ai +------------------------------------------ + +NSTProxy is a premium residential proxy provider. +👉 Purchase Proxies: https://nstproxy.com +💰 Use coupon code "crawl4ai" for 10% off your plan. + +""" +import asyncio +from crawl4ai import AsyncWebCrawler, BrowserConfig + + +async def main(): + """ + Example: Use NSTProxy with manual username/password authentication. + """ + + browser_config = BrowserConfig(proxy_config={ + "server": "http://gate.nstproxy.io:24125", + "username": "your_username", + "password": "your_password", + }) + + async with AsyncWebCrawler(config=browser_config) as crawler: + result = await crawler.arun(url="https://example.com") + print("[Auth Proxy] Status:", result.status_code) + + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/docs/examples/proxy/basic_proxy_example.py b/docs/examples/proxy/basic_proxy_example.py new file mode 100644 index 00000000..5a79525c --- /dev/null +++ b/docs/examples/proxy/basic_proxy_example.py @@ -0,0 +1,29 @@ +""" +NSTProxy Integration Examples for crawl4ai +------------------------------------------ + +NSTProxy is a premium residential proxy provider. +👉 Purchase Proxies: https://nstproxy.com +💰 Use coupon code "crawl4ai" for 10% off your plan. + +""" +import asyncio +from crawl4ai import AsyncWebCrawler, BrowserConfig + + +async def main(): + # Using HTTP proxy + browser_config = BrowserConfig(proxy_config={"server": "http://gate.nstproxy.io:24125"}) + async with AsyncWebCrawler(config=browser_config) as crawler: + result = await crawler.arun(url="https://example.com") + print("[HTTP Proxy] Status:", result.status_code) + + # Using SOCKS proxy + browser_config = BrowserConfig(proxy_config={"server": "socks5://gate.nstproxy.io:24125"}) + async with AsyncWebCrawler(config=browser_config) as crawler: + result = await crawler.arun(url="https://example.com") + print("[SOCKS5 Proxy] Status:", result.status_code) + + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/docs/examples/proxy/nstproxy_example.py b/docs/examples/proxy/nstproxy_example.py new file mode 100644 index 00000000..4e8587b3 --- /dev/null +++ b/docs/examples/proxy/nstproxy_example.py @@ -0,0 +1,39 @@ +""" +NSTProxy Integration Examples for crawl4ai +------------------------------------------ + +NSTProxy is a premium residential proxy provider. +👉 Purchase Proxies: https://nstproxy.com +💰 Use coupon code "crawl4ai" for 10% off your plan. + +""" +import asyncio +from crawl4ai import AsyncWebCrawler, BrowserConfig + + +async def main(): + """ + Example: Using NSTProxy with AsyncWebCrawler. + """ + + NST_TOKEN = "YOUR_NST_PROXY_TOKEN" # Get from https://app.nstproxy.com/profile + CHANNEL_ID = "YOUR_NST_PROXY_CHANNEL_ID" # Your NSTProxy Channel ID + + browser_config = BrowserConfig() + browser_config.set_nstproxy( + token=NST_TOKEN, + channel_id=CHANNEL_ID, + country="ANY", # e.g. "US", "JP", or "ANY" + state="", # optional, leave empty if not needed + city="", # optional, leave empty if not needed + session_duration=0 # Session duration in minutes,0 = rotate on every request + ) + + # === Run crawler === + async with AsyncWebCrawler(config=browser_config) as crawler: + result = await crawler.arun(url="https://example.com") + print("[Nstproxy] Status:", result.status_code) + + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/docs/md_v2/core/examples.md b/docs/md_v2/core/examples.md index db11773b..d5d58e02 100644 --- a/docs/md_v2/core/examples.md +++ b/docs/md_v2/core/examples.md @@ -11,6 +11,12 @@ This page provides a comprehensive list of example scripts that demonstrate vari | Quickstart Set 1 | Basic examples for getting started with Crawl4AI. | [View Code](https://github.com/unclecode/crawl4ai/blob/main/docs/examples/quickstart_examples_set_1.py) | | Quickstart Set 2 | More advanced examples for working with Crawl4AI. | [View Code](https://github.com/unclecode/crawl4ai/blob/main/docs/examples/quickstart_examples_set_2.py) | +## Proxies + +| Example | Description | Link | +|----------|--------------|------| +| **NSTProxy** | [NSTProxy](https://www.nstproxy.com/?utm_source=crawl4ai) Seamlessly integrates with crawl4ai — no setup required. Access high-performance residential, datacenter, ISP, and IPv6 proxies with smart rotation and anti-blocking technology. Starts from $0.1/GB. Use code crawl4ai for 10% off. | [View Code](https://github.com/unclecode/crawl4ai/tree/main/docs/examples/proxy) | + ## Browser & Crawling Features | Example | Description | Link |