Merge pull request #1371 from unclecode/bug/proxy_config
#1057 : enhance ProxyConfig initialization to support dict and string…
This commit is contained in:
@@ -448,6 +448,10 @@ class BrowserConfig:
|
||||
self.chrome_channel = ""
|
||||
self.proxy = proxy
|
||||
self.proxy_config = proxy_config
|
||||
if isinstance(self.proxy_config, dict):
|
||||
self.proxy_config = ProxyConfig.from_dict(self.proxy_config)
|
||||
if isinstance(self.proxy_config, str):
|
||||
self.proxy_config = ProxyConfig.from_string(self.proxy_config)
|
||||
|
||||
|
||||
self.viewport_width = viewport_width
|
||||
@@ -1159,6 +1163,11 @@ class CrawlerRunConfig():
|
||||
self.parser_type = parser_type
|
||||
self.scraping_strategy = scraping_strategy or LXMLWebScrapingStrategy()
|
||||
self.proxy_config = proxy_config
|
||||
if isinstance(proxy_config, dict):
|
||||
self.proxy_config = ProxyConfig.from_dict(proxy_config)
|
||||
if isinstance(proxy_config, str):
|
||||
self.proxy_config = ProxyConfig.from_string(proxy_config)
|
||||
|
||||
self.proxy_rotation_strategy = proxy_rotation_strategy
|
||||
|
||||
# Browser Location and Identity Parameters
|
||||
|
||||
582
tests/proxy/test_proxy_config.py
Normal file
582
tests/proxy/test_proxy_config.py
Normal file
@@ -0,0 +1,582 @@
|
||||
"""
|
||||
Comprehensive test suite for ProxyConfig in different forms:
|
||||
1. String form (ip:port:username:password)
|
||||
2. Dict form (dictionary with keys)
|
||||
3. Object form (ProxyConfig instance)
|
||||
4. Environment variable form (from env vars)
|
||||
|
||||
Tests cover all possible scenarios and edge cases using pytest.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import pytest
|
||||
import tempfile
|
||||
from unittest.mock import patch
|
||||
|
||||
from crawl4ai import AsyncWebCrawler, BrowserConfig
|
||||
from crawl4ai.async_configs import CrawlerRunConfig, ProxyConfig
|
||||
from crawl4ai.cache_context import CacheMode
|
||||
|
||||
|
||||
class TestProxyConfig:
|
||||
"""Comprehensive test suite for ProxyConfig functionality."""
|
||||
|
||||
# Test data for different scenarios
|
||||
# get free proxy server from from webshare.io https://www.webshare.io/?referral_code=3sqog0y1fvsl
|
||||
TEST_PROXY_DATA = {
|
||||
"server": "",
|
||||
"username": "",
|
||||
"password": "",
|
||||
"ip": ""
|
||||
}
|
||||
|
||||
def setup_method(self):
|
||||
"""Setup for each test method."""
|
||||
self.test_url = "https://httpbin.org/ip" # Use httpbin for testing
|
||||
|
||||
# ==================== OBJECT FORM TESTS ====================
|
||||
|
||||
def test_proxy_config_object_creation_basic(self):
|
||||
"""Test basic ProxyConfig object creation."""
|
||||
proxy = ProxyConfig(server="127.0.0.1:8080")
|
||||
assert proxy.server == "127.0.0.1:8080"
|
||||
assert proxy.username is None
|
||||
assert proxy.password is None
|
||||
assert proxy.ip == "127.0.0.1" # Should auto-extract IP
|
||||
|
||||
def test_proxy_config_object_creation_full(self):
|
||||
"""Test ProxyConfig object creation with all parameters."""
|
||||
proxy = ProxyConfig(
|
||||
server=f"http://{self.TEST_PROXY_DATA['server']}",
|
||||
username=self.TEST_PROXY_DATA['username'],
|
||||
password=self.TEST_PROXY_DATA['password'],
|
||||
ip=self.TEST_PROXY_DATA['ip']
|
||||
)
|
||||
assert proxy.server == f"http://{self.TEST_PROXY_DATA['server']}"
|
||||
assert proxy.username == self.TEST_PROXY_DATA['username']
|
||||
assert proxy.password == self.TEST_PROXY_DATA['password']
|
||||
assert proxy.ip == self.TEST_PROXY_DATA['ip']
|
||||
|
||||
def test_proxy_config_object_ip_extraction(self):
|
||||
"""Test automatic IP extraction from server URL."""
|
||||
test_cases = [
|
||||
("http://192.168.1.1:8080", "192.168.1.1"),
|
||||
("https://10.0.0.1:3128", "10.0.0.1"),
|
||||
("192.168.1.100:8080", "192.168.1.100"),
|
||||
("proxy.example.com:8080", "proxy.example.com"),
|
||||
]
|
||||
|
||||
for server, expected_ip in test_cases:
|
||||
proxy = ProxyConfig(server=server)
|
||||
assert proxy.ip == expected_ip, f"Failed for server: {server}"
|
||||
|
||||
def test_proxy_config_object_invalid_server(self):
|
||||
"""Test ProxyConfig with invalid server formats."""
|
||||
# Should not raise exception but may not extract IP properly
|
||||
proxy = ProxyConfig(server="invalid-format")
|
||||
assert proxy.server == "invalid-format"
|
||||
# IP extraction might fail but object should still be created
|
||||
|
||||
# ==================== DICT FORM TESTS ====================
|
||||
|
||||
def test_proxy_config_from_dict_basic(self):
|
||||
"""Test creating ProxyConfig from basic dictionary."""
|
||||
proxy_dict = {"server": "127.0.0.1:8080"}
|
||||
proxy = ProxyConfig.from_dict(proxy_dict)
|
||||
assert proxy.server == "127.0.0.1:8080"
|
||||
assert proxy.username is None
|
||||
assert proxy.password is None
|
||||
|
||||
def test_proxy_config_from_dict_full(self):
|
||||
"""Test creating ProxyConfig from complete dictionary."""
|
||||
proxy_dict = {
|
||||
"server": f"http://{self.TEST_PROXY_DATA['server']}",
|
||||
"username": self.TEST_PROXY_DATA['username'],
|
||||
"password": self.TEST_PROXY_DATA['password'],
|
||||
"ip": self.TEST_PROXY_DATA['ip']
|
||||
}
|
||||
proxy = ProxyConfig.from_dict(proxy_dict)
|
||||
assert proxy.server == proxy_dict["server"]
|
||||
assert proxy.username == proxy_dict["username"]
|
||||
assert proxy.password == proxy_dict["password"]
|
||||
assert proxy.ip == proxy_dict["ip"]
|
||||
|
||||
def test_proxy_config_from_dict_missing_keys(self):
|
||||
"""Test creating ProxyConfig from dictionary with missing keys."""
|
||||
proxy_dict = {"server": "127.0.0.1:8080", "username": "user"}
|
||||
proxy = ProxyConfig.from_dict(proxy_dict)
|
||||
assert proxy.server == "127.0.0.1:8080"
|
||||
assert proxy.username == "user"
|
||||
assert proxy.password is None
|
||||
assert proxy.ip == "127.0.0.1" # Should auto-extract
|
||||
|
||||
def test_proxy_config_from_dict_empty(self):
|
||||
"""Test creating ProxyConfig from empty dictionary."""
|
||||
proxy_dict = {}
|
||||
proxy = ProxyConfig.from_dict(proxy_dict)
|
||||
assert proxy.server is None
|
||||
assert proxy.username is None
|
||||
assert proxy.password is None
|
||||
assert proxy.ip is None
|
||||
|
||||
def test_proxy_config_from_dict_none_values(self):
|
||||
"""Test creating ProxyConfig from dictionary with None values."""
|
||||
proxy_dict = {
|
||||
"server": "127.0.0.1:8080",
|
||||
"username": None,
|
||||
"password": None,
|
||||
"ip": None
|
||||
}
|
||||
proxy = ProxyConfig.from_dict(proxy_dict)
|
||||
assert proxy.server == "127.0.0.1:8080"
|
||||
assert proxy.username is None
|
||||
assert proxy.password is None
|
||||
assert proxy.ip == "127.0.0.1" # Should auto-extract despite None
|
||||
|
||||
# ==================== STRING FORM TESTS ====================
|
||||
|
||||
def test_proxy_config_from_string_full_format(self):
|
||||
"""Test creating ProxyConfig from full string format (ip:port:username:password)."""
|
||||
proxy_str = f"{self.TEST_PROXY_DATA['ip']}:6114:{self.TEST_PROXY_DATA['username']}:{self.TEST_PROXY_DATA['password']}"
|
||||
proxy = ProxyConfig.from_string(proxy_str)
|
||||
assert proxy.server == f"http://{self.TEST_PROXY_DATA['ip']}:6114"
|
||||
assert proxy.username == self.TEST_PROXY_DATA['username']
|
||||
assert proxy.password == self.TEST_PROXY_DATA['password']
|
||||
assert proxy.ip == self.TEST_PROXY_DATA['ip']
|
||||
|
||||
def test_proxy_config_from_string_ip_port_only(self):
|
||||
"""Test creating ProxyConfig from string with only ip:port."""
|
||||
proxy_str = "192.168.1.1:8080"
|
||||
proxy = ProxyConfig.from_string(proxy_str)
|
||||
assert proxy.server == "http://192.168.1.1:8080"
|
||||
assert proxy.username is None
|
||||
assert proxy.password is None
|
||||
assert proxy.ip == "192.168.1.1"
|
||||
|
||||
def test_proxy_config_from_string_invalid_format(self):
|
||||
"""Test creating ProxyConfig from invalid string formats."""
|
||||
invalid_formats = [
|
||||
"invalid",
|
||||
"ip:port:user", # Missing password (3 parts)
|
||||
"ip:port:user:pass:extra", # Too many parts (5 parts)
|
||||
"",
|
||||
"::", # Empty parts but 3 total (invalid)
|
||||
"::::", # Empty parts but 5 total (invalid)
|
||||
]
|
||||
|
||||
for proxy_str in invalid_formats:
|
||||
with pytest.raises(ValueError, match="Invalid proxy string format"):
|
||||
ProxyConfig.from_string(proxy_str)
|
||||
|
||||
def test_proxy_config_from_string_edge_cases_that_work(self):
|
||||
"""Test string formats that should work but might be edge cases."""
|
||||
# These cases actually work as valid formats
|
||||
edge_cases = [
|
||||
(":", "http://:", ""), # ip:port format with empty values
|
||||
(":::", "http://:", ""), # ip:port:user:pass format with empty values
|
||||
]
|
||||
|
||||
for proxy_str, expected_server, expected_ip in edge_cases:
|
||||
proxy = ProxyConfig.from_string(proxy_str)
|
||||
assert proxy.server == expected_server
|
||||
assert proxy.ip == expected_ip
|
||||
|
||||
def test_proxy_config_from_string_edge_cases(self):
|
||||
"""Test string parsing edge cases."""
|
||||
# Test with different port numbers
|
||||
proxy_str = "10.0.0.1:3128:user:pass"
|
||||
proxy = ProxyConfig.from_string(proxy_str)
|
||||
assert proxy.server == "http://10.0.0.1:3128"
|
||||
|
||||
# Test with special characters in credentials
|
||||
proxy_str = "10.0.0.1:8080:user@domain:pass:word"
|
||||
with pytest.raises(ValueError): # Should fail due to extra colon in password
|
||||
ProxyConfig.from_string(proxy_str)
|
||||
|
||||
# ==================== ENVIRONMENT VARIABLE TESTS ====================
|
||||
|
||||
def test_proxy_config_from_env_single_proxy(self):
|
||||
"""Test loading single proxy from environment variable."""
|
||||
proxy_str = f"{self.TEST_PROXY_DATA['ip']}:6114:{self.TEST_PROXY_DATA['username']}:{self.TEST_PROXY_DATA['password']}"
|
||||
|
||||
with patch.dict(os.environ, {'TEST_PROXIES': proxy_str}):
|
||||
proxies = ProxyConfig.from_env('TEST_PROXIES')
|
||||
assert len(proxies) == 1
|
||||
proxy = proxies[0]
|
||||
assert proxy.ip == self.TEST_PROXY_DATA['ip']
|
||||
assert proxy.username == self.TEST_PROXY_DATA['username']
|
||||
assert proxy.password == self.TEST_PROXY_DATA['password']
|
||||
|
||||
def test_proxy_config_from_env_multiple_proxies(self):
|
||||
"""Test loading multiple proxies from environment variable."""
|
||||
proxy_list = [
|
||||
"192.168.1.1:8080:user1:pass1",
|
||||
"192.168.1.2:8080:user2:pass2",
|
||||
"10.0.0.1:3128" # No auth
|
||||
]
|
||||
proxy_str = ",".join(proxy_list)
|
||||
|
||||
with patch.dict(os.environ, {'TEST_PROXIES': proxy_str}):
|
||||
proxies = ProxyConfig.from_env('TEST_PROXIES')
|
||||
assert len(proxies) == 3
|
||||
|
||||
# Check first proxy
|
||||
assert proxies[0].ip == "192.168.1.1"
|
||||
assert proxies[0].username == "user1"
|
||||
assert proxies[0].password == "pass1"
|
||||
|
||||
# Check second proxy
|
||||
assert proxies[1].ip == "192.168.1.2"
|
||||
assert proxies[1].username == "user2"
|
||||
assert proxies[1].password == "pass2"
|
||||
|
||||
# Check third proxy (no auth)
|
||||
assert proxies[2].ip == "10.0.0.1"
|
||||
assert proxies[2].username is None
|
||||
assert proxies[2].password is None
|
||||
|
||||
def test_proxy_config_from_env_empty_var(self):
|
||||
"""Test loading from empty environment variable."""
|
||||
with patch.dict(os.environ, {'TEST_PROXIES': ''}):
|
||||
proxies = ProxyConfig.from_env('TEST_PROXIES')
|
||||
assert len(proxies) == 0
|
||||
|
||||
def test_proxy_config_from_env_missing_var(self):
|
||||
"""Test loading from missing environment variable."""
|
||||
# Ensure the env var doesn't exist
|
||||
with patch.dict(os.environ, {}, clear=True):
|
||||
proxies = ProxyConfig.from_env('NON_EXISTENT_VAR')
|
||||
assert len(proxies) == 0
|
||||
|
||||
def test_proxy_config_from_env_with_empty_entries(self):
|
||||
"""Test loading proxies with empty entries in the list."""
|
||||
proxy_str = "192.168.1.1:8080:user:pass,,10.0.0.1:3128,"
|
||||
|
||||
with patch.dict(os.environ, {'TEST_PROXIES': proxy_str}):
|
||||
proxies = ProxyConfig.from_env('TEST_PROXIES')
|
||||
assert len(proxies) == 2 # Empty entries should be skipped
|
||||
assert proxies[0].ip == "192.168.1.1"
|
||||
assert proxies[1].ip == "10.0.0.1"
|
||||
|
||||
def test_proxy_config_from_env_with_invalid_entries(self):
|
||||
"""Test loading proxies with some invalid entries."""
|
||||
proxy_str = "192.168.1.1:8080:user:pass,invalid_proxy,10.0.0.1:3128"
|
||||
|
||||
with patch.dict(os.environ, {'TEST_PROXIES': proxy_str}):
|
||||
# Should handle errors gracefully and return valid proxies
|
||||
proxies = ProxyConfig.from_env('TEST_PROXIES')
|
||||
# Depending on implementation, might return partial list or empty
|
||||
# This tests error handling
|
||||
assert isinstance(proxies, list)
|
||||
|
||||
# ==================== SERIALIZATION TESTS ====================
|
||||
|
||||
def test_proxy_config_to_dict(self):
|
||||
"""Test converting ProxyConfig to dictionary."""
|
||||
proxy = ProxyConfig(
|
||||
server=f"http://{self.TEST_PROXY_DATA['server']}",
|
||||
username=self.TEST_PROXY_DATA['username'],
|
||||
password=self.TEST_PROXY_DATA['password'],
|
||||
ip=self.TEST_PROXY_DATA['ip']
|
||||
)
|
||||
|
||||
result_dict = proxy.to_dict()
|
||||
expected = {
|
||||
"server": f"http://{self.TEST_PROXY_DATA['server']}",
|
||||
"username": self.TEST_PROXY_DATA['username'],
|
||||
"password": self.TEST_PROXY_DATA['password'],
|
||||
"ip": self.TEST_PROXY_DATA['ip']
|
||||
}
|
||||
assert result_dict == expected
|
||||
|
||||
def test_proxy_config_clone(self):
|
||||
"""Test cloning ProxyConfig with modifications."""
|
||||
original = ProxyConfig(
|
||||
server="http://127.0.0.1:8080",
|
||||
username="user",
|
||||
password="pass"
|
||||
)
|
||||
|
||||
# Clone with modifications
|
||||
cloned = original.clone(username="new_user", password="new_pass")
|
||||
|
||||
# Original should be unchanged
|
||||
assert original.username == "user"
|
||||
assert original.password == "pass"
|
||||
|
||||
# Clone should have new values
|
||||
assert cloned.username == "new_user"
|
||||
assert cloned.password == "new_pass"
|
||||
assert cloned.server == original.server # Unchanged value
|
||||
|
||||
def test_proxy_config_roundtrip_serialization(self):
|
||||
"""Test that ProxyConfig can be serialized and deserialized without loss."""
|
||||
original = ProxyConfig(
|
||||
server=f"http://{self.TEST_PROXY_DATA['server']}",
|
||||
username=self.TEST_PROXY_DATA['username'],
|
||||
password=self.TEST_PROXY_DATA['password'],
|
||||
ip=self.TEST_PROXY_DATA['ip']
|
||||
)
|
||||
|
||||
# Serialize to dict and back
|
||||
serialized = original.to_dict()
|
||||
deserialized = ProxyConfig.from_dict(serialized)
|
||||
|
||||
assert deserialized.server == original.server
|
||||
assert deserialized.username == original.username
|
||||
assert deserialized.password == original.password
|
||||
assert deserialized.ip == original.ip
|
||||
|
||||
# ==================== INTEGRATION TESTS ====================
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_crawler_with_proxy_config_object(self):
|
||||
"""Test AsyncWebCrawler with ProxyConfig object."""
|
||||
proxy_config = ProxyConfig(
|
||||
server=f"http://{self.TEST_PROXY_DATA['server']}",
|
||||
username=self.TEST_PROXY_DATA['username'],
|
||||
password=self.TEST_PROXY_DATA['password']
|
||||
)
|
||||
|
||||
browser_config = BrowserConfig(headless=True)
|
||||
|
||||
# Test that the crawler accepts the ProxyConfig object without errors
|
||||
async with AsyncWebCrawler(config=browser_config) as crawler:
|
||||
try:
|
||||
# Note: This might fail due to actual proxy connection, but should not fail due to config issues
|
||||
result = await crawler.arun(
|
||||
url=self.test_url,
|
||||
config=CrawlerRunConfig(
|
||||
cache_mode=CacheMode.BYPASS,
|
||||
proxy_config=proxy_config,
|
||||
page_timeout=10000 # Short timeout for testing
|
||||
)
|
||||
)
|
||||
# If we get here, proxy config was accepted
|
||||
assert result is not None
|
||||
except Exception as e:
|
||||
# We expect connection errors with test proxies, but not config errors
|
||||
error_msg = str(e).lower()
|
||||
assert "attribute" not in error_msg, f"Config error: {e}"
|
||||
assert "proxy_config" not in error_msg, f"Proxy config error: {e}"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_crawler_with_proxy_config_dict(self):
|
||||
"""Test AsyncWebCrawler with ProxyConfig from dictionary."""
|
||||
proxy_dict = {
|
||||
"server": f"http://{self.TEST_PROXY_DATA['server']}",
|
||||
"username": self.TEST_PROXY_DATA['username'],
|
||||
"password": self.TEST_PROXY_DATA['password']
|
||||
}
|
||||
proxy_config = ProxyConfig.from_dict(proxy_dict)
|
||||
|
||||
browser_config = BrowserConfig(headless=True)
|
||||
|
||||
async with AsyncWebCrawler(config=browser_config) as crawler:
|
||||
try:
|
||||
result = await crawler.arun(
|
||||
url=self.test_url,
|
||||
config=CrawlerRunConfig(
|
||||
cache_mode=CacheMode.BYPASS,
|
||||
proxy_config=proxy_config,
|
||||
page_timeout=10000
|
||||
)
|
||||
)
|
||||
assert result is not None
|
||||
except Exception as e:
|
||||
error_msg = str(e).lower()
|
||||
assert "attribute" not in error_msg, f"Config error: {e}"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_crawler_with_proxy_config_from_string(self):
|
||||
"""Test AsyncWebCrawler with ProxyConfig from string."""
|
||||
proxy_str = f"{self.TEST_PROXY_DATA['ip']}:6114:{self.TEST_PROXY_DATA['username']}:{self.TEST_PROXY_DATA['password']}"
|
||||
proxy_config = ProxyConfig.from_string(proxy_str)
|
||||
|
||||
browser_config = BrowserConfig(headless=True)
|
||||
|
||||
async with AsyncWebCrawler(config=browser_config) as crawler:
|
||||
try:
|
||||
result = await crawler.arun(
|
||||
url=self.test_url,
|
||||
config=CrawlerRunConfig(
|
||||
cache_mode=CacheMode.BYPASS,
|
||||
proxy_config=proxy_config,
|
||||
page_timeout=10000
|
||||
)
|
||||
)
|
||||
assert result is not None
|
||||
except Exception as e:
|
||||
error_msg = str(e).lower()
|
||||
assert "attribute" not in error_msg, f"Config error: {e}"
|
||||
|
||||
# ==================== EDGE CASES AND ERROR HANDLING ====================
|
||||
|
||||
def test_proxy_config_with_none_server(self):
|
||||
"""Test ProxyConfig behavior with None server."""
|
||||
proxy = ProxyConfig(server=None)
|
||||
assert proxy.server is None
|
||||
assert proxy.ip is None # Should not crash
|
||||
|
||||
def test_proxy_config_with_empty_string_server(self):
|
||||
"""Test ProxyConfig behavior with empty string server."""
|
||||
proxy = ProxyConfig(server="")
|
||||
assert proxy.server == ""
|
||||
assert proxy.ip is None or proxy.ip == ""
|
||||
|
||||
def test_proxy_config_special_characters_in_credentials(self):
|
||||
"""Test ProxyConfig with special characters in username/password."""
|
||||
special_chars_tests = [
|
||||
("user@domain.com", "pass!@#$%"),
|
||||
("user_123", "p@ssw0rd"),
|
||||
("user-test", "pass-word"),
|
||||
]
|
||||
|
||||
for username, password in special_chars_tests:
|
||||
proxy = ProxyConfig(
|
||||
server="http://127.0.0.1:8080",
|
||||
username=username,
|
||||
password=password
|
||||
)
|
||||
assert proxy.username == username
|
||||
assert proxy.password == password
|
||||
|
||||
def test_proxy_config_unicode_handling(self):
|
||||
"""Test ProxyConfig with unicode characters."""
|
||||
proxy = ProxyConfig(
|
||||
server="http://127.0.0.1:8080",
|
||||
username="ユーザー", # Japanese characters
|
||||
password="пароль" # Cyrillic characters
|
||||
)
|
||||
assert proxy.username == "ユーザー"
|
||||
assert proxy.password == "пароль"
|
||||
|
||||
# ==================== PERFORMANCE TESTS ====================
|
||||
|
||||
def test_proxy_config_creation_performance(self):
|
||||
"""Test that ProxyConfig creation is reasonably fast."""
|
||||
import time
|
||||
|
||||
start_time = time.time()
|
||||
for i in range(1000):
|
||||
proxy = ProxyConfig(
|
||||
server=f"http://192.168.1.{i % 255}:8080",
|
||||
username=f"user{i}",
|
||||
password=f"pass{i}"
|
||||
)
|
||||
end_time = time.time()
|
||||
|
||||
# Should be able to create 1000 configs in less than 1 second
|
||||
assert (end_time - start_time) < 1.0
|
||||
|
||||
def test_proxy_config_from_env_performance(self):
|
||||
"""Test that loading many proxies from env is reasonably fast."""
|
||||
import time
|
||||
|
||||
# Create a large list of proxy strings
|
||||
proxy_list = [f"192.168.1.{i}:8080:user{i}:pass{i}" for i in range(100)]
|
||||
proxy_str = ",".join(proxy_list)
|
||||
|
||||
with patch.dict(os.environ, {'PERF_TEST_PROXIES': proxy_str}):
|
||||
start_time = time.time()
|
||||
proxies = ProxyConfig.from_env('PERF_TEST_PROXIES')
|
||||
end_time = time.time()
|
||||
|
||||
assert len(proxies) == 100
|
||||
# Should be able to parse 100 proxies in less than 1 second
|
||||
assert (end_time - start_time) < 1.0
|
||||
|
||||
|
||||
# ==================== STANDALONE TEST FUNCTIONS ====================
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_dict_proxy():
|
||||
"""Original test function for dict proxy - kept for backward compatibility."""
|
||||
proxy_config = {
|
||||
"server": "23.95.150.145:6114",
|
||||
"username": "cfyswbwn",
|
||||
"password": "1gs266hoqysi"
|
||||
}
|
||||
proxy_config_obj = ProxyConfig.from_dict(proxy_config)
|
||||
|
||||
browser_config = BrowserConfig(headless=True)
|
||||
async with AsyncWebCrawler(config=browser_config) as crawler:
|
||||
try:
|
||||
result = await crawler.arun(url="https://httpbin.org/ip", config=CrawlerRunConfig(
|
||||
stream=False,
|
||||
cache_mode=CacheMode.BYPASS,
|
||||
proxy_config=proxy_config_obj,
|
||||
page_timeout=10000
|
||||
))
|
||||
print("Dict proxy test passed!")
|
||||
print(result.markdown[:200] if result and result.markdown else "No result")
|
||||
except Exception as e:
|
||||
print(f"Dict proxy test error (expected): {e}")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_string_proxy():
|
||||
"""Test function for string proxy format."""
|
||||
proxy_str = "23.95.150.145:6114:cfyswbwn:1gs266hoqysi"
|
||||
proxy_config_obj = ProxyConfig.from_string(proxy_str)
|
||||
|
||||
browser_config = BrowserConfig(headless=True)
|
||||
async with AsyncWebCrawler(config=browser_config) as crawler:
|
||||
try:
|
||||
result = await crawler.arun(url="https://httpbin.org/ip", config=CrawlerRunConfig(
|
||||
stream=False,
|
||||
cache_mode=CacheMode.BYPASS,
|
||||
proxy_config=proxy_config_obj,
|
||||
page_timeout=10000
|
||||
))
|
||||
print("String proxy test passed!")
|
||||
print(result.markdown[:200] if result and result.markdown else "No result")
|
||||
except Exception as e:
|
||||
print(f"String proxy test error (expected): {e}")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_env_proxy():
|
||||
"""Test function for environment variable proxy."""
|
||||
# Set environment variable
|
||||
os.environ['TEST_PROXIES'] = "23.95.150.145:6114:cfyswbwn:1gs266hoqysi"
|
||||
|
||||
proxies = ProxyConfig.from_env('TEST_PROXIES')
|
||||
if proxies:
|
||||
proxy_config_obj = proxies[0] # Use first proxy
|
||||
|
||||
browser_config = BrowserConfig(headless=True)
|
||||
async with AsyncWebCrawler(config=browser_config) as crawler:
|
||||
try:
|
||||
result = await crawler.arun(url="https://httpbin.org/ip", config=CrawlerRunConfig(
|
||||
stream=False,
|
||||
cache_mode=CacheMode.BYPASS,
|
||||
proxy_config=proxy_config_obj,
|
||||
page_timeout=10000
|
||||
))
|
||||
print("Environment proxy test passed!")
|
||||
print(result.markdown[:200] if result and result.markdown else "No result")
|
||||
except Exception as e:
|
||||
print(f"Environment proxy test error (expected): {e}")
|
||||
else:
|
||||
print("No proxies loaded from environment")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("Running comprehensive ProxyConfig tests...")
|
||||
print("=" * 50)
|
||||
|
||||
# Run the standalone test functions
|
||||
print("\n1. Testing dict proxy format...")
|
||||
asyncio.run(test_dict_proxy())
|
||||
|
||||
print("\n2. Testing string proxy format...")
|
||||
asyncio.run(test_string_proxy())
|
||||
|
||||
print("\n3. Testing environment variable proxy format...")
|
||||
asyncio.run(test_env_proxy())
|
||||
|
||||
print("\n" + "=" * 50)
|
||||
print("To run the full pytest suite, use: pytest " + __file__)
|
||||
print("=" * 50)
|
||||
Reference in New Issue
Block a user