Add proxy support to HTTP crawler strategy
This commit is contained in:
@@ -2382,6 +2382,25 @@ class AsyncHTTPCrawlerStrategy(AsyncCrawlerStrategy):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _format_proxy_url(self, proxy_config) -> str:
|
||||||
|
"""Format ProxyConfig into aiohttp-compatible proxy URL."""
|
||||||
|
if not proxy_config:
|
||||||
|
return None
|
||||||
|
|
||||||
|
server = proxy_config.server
|
||||||
|
username = getattr(proxy_config, 'username', None)
|
||||||
|
password = getattr(proxy_config, 'password', None)
|
||||||
|
|
||||||
|
if username and password:
|
||||||
|
# Insert credentials into URL: http://user:pass@host:port
|
||||||
|
if '://' in server:
|
||||||
|
protocol, rest = server.split('://', 1)
|
||||||
|
return f"{protocol}://{username}:{password}@{rest}"
|
||||||
|
else:
|
||||||
|
return f"http://{username}:{password}@{server}"
|
||||||
|
|
||||||
|
return server
|
||||||
|
|
||||||
async def _handle_http(
|
async def _handle_http(
|
||||||
self,
|
self,
|
||||||
url: str,
|
url: str,
|
||||||
@@ -2405,6 +2424,12 @@ class AsyncHTTPCrawlerStrategy(AsyncCrawlerStrategy):
|
|||||||
'headers': headers
|
'headers': headers
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Add proxy support - use config.proxy_config (set by arun() from rotation strategy or direct config)
|
||||||
|
proxy_url = None
|
||||||
|
if config.proxy_config:
|
||||||
|
proxy_url = self._format_proxy_url(config.proxy_config)
|
||||||
|
request_kwargs['proxy'] = proxy_url
|
||||||
|
|
||||||
if self.browser_config.method == "POST":
|
if self.browser_config.method == "POST":
|
||||||
if self.browser_config.data:
|
if self.browser_config.data:
|
||||||
request_kwargs['data'] = self.browser_config.data
|
request_kwargs['data'] = self.browser_config.data
|
||||||
|
|||||||
Reference in New Issue
Block a user