Files

AHMET YILMAZ f7a3366f72 #1375 : refactor(proxy) Deprecate 'proxy' parameter in BrowserConfig and enhance proxy string parsing

- Updated ProxyConfig.from_string to support multiple proxy formats, including URLs with credentials.
- Deprecated the 'proxy' parameter in BrowserConfig, replacing it with 'proxy_config' for better flexibility.
- Added warnings for deprecated usage and clarified behavior when both parameters are provided.
- Updated documentation and tests to reflect changes in proxy configuration handling.

2025-08-28 17:21:49 +08:00

3.1 KiB

Raw Blame History

Proxy

Basic Proxy Setup

Simple proxy configuration with BrowserConfig:

from crawl4ai.async_configs import BrowserConfig

# Using HTTP proxy
browser_config = BrowserConfig(proxy_config={"server": "http://proxy.example.com:8080"})
async with AsyncWebCrawler(config=browser_config) as crawler:
    result = await crawler.arun(url="https://example.com")

# Using SOCKS proxy
browser_config = BrowserConfig(proxy_config={"server": "socks5://proxy.example.com:1080"})
async with AsyncWebCrawler(config=browser_config) as crawler:
    result = await crawler.arun(url="https://example.com")

Authenticated Proxy

Use an authenticated proxy with BrowserConfig:

from crawl4ai.async_configs import BrowserConfig

browser_config = BrowserConfig(proxy_config={
    "server": "http://[host]:[port]",
    "username": "[username]",
    "password": "[password]",
})
async with AsyncWebCrawler(config=browser_config) as crawler:
    result = await crawler.arun(url="https://example.com")

Rotating Proxies

Example using a proxy rotation service dynamically:

import re
from crawl4ai import (
    AsyncWebCrawler,
    BrowserConfig,
    CrawlerRunConfig,
    CacheMode,
    RoundRobinProxyStrategy,
)
import asyncio
from crawl4ai import ProxyConfig
async def main():
    # Load proxies and create rotation strategy
    proxies = ProxyConfig.from_env()
    #eg: export PROXIES="ip1:port1:username1:password1,ip2:port2:username2:password2"
    if not proxies:
        print("No proxies found in environment. Set PROXIES env variable!")
        return

    proxy_strategy = RoundRobinProxyStrategy(proxies)

    # Create configs
    browser_config = BrowserConfig(headless=True, verbose=False)
    run_config = CrawlerRunConfig(
        cache_mode=CacheMode.BYPASS,
        proxy_rotation_strategy=proxy_strategy
    )

    async with AsyncWebCrawler(config=browser_config) as crawler:
        urls = ["https://httpbin.org/ip"] * (len(proxies) * 2)  # Test each proxy twice

        print("\n📈 Initializing crawler with proxy rotation...")
        async with AsyncWebCrawler(config=browser_config) as crawler:
            print("\n🚀 Starting batch crawl with proxy rotation...")
            results = await crawler.arun_many(
                urls=urls,
                config=run_config
            )
            for result in results:
                if result.success:
                    ip_match = re.search(r'(?:[0-9]{1,3}\.){3}[0-9]{1,3}', result.html)
                    current_proxy = run_config.proxy_config if run_config.proxy_config else None

                    if current_proxy and ip_match:
                        print(f"URL {result.url}")
                        print(f"Proxy {current_proxy.server} -> Response IP: {ip_match.group(0)}")
                        verified = ip_match.group(0) == current_proxy.ip
                        if verified:
                            print(f"✅ Proxy working! IP matches: {current_proxy.ip}")
                        else:
                            print("❌ Proxy failed or IP mismatch!")
                    print("---")

asyncio.run(main())

3.1 KiB Raw Blame History

Proxy

Basic Proxy Setup

Authenticated Proxy

Rotating Proxies

3.1 KiB

Raw Blame History