feat(proxy): add proxy rotation strategy
Implements a new proxy rotation system with the following changes: - Add ProxyRotationStrategy abstract base class - Add RoundRobinProxyStrategy concrete implementation - Integrate proxy rotation with AsyncWebCrawler - Add proxy_rotation_strategy parameter to CrawlerRunConfig - Add example script demonstrating proxy rotation usage - Remove deprecated synchronous WebCrawler code - Clean up rate limiting documentation BREAKING CHANGE: Removed synchronous WebCrawler support and related rate limiting configurations
This commit is contained in:
@@ -8,6 +8,10 @@ from .content_scraping_strategy import (
|
||||
WebScrapingStrategy,
|
||||
LXMLWebScrapingStrategy,
|
||||
)
|
||||
from .proxy_strategy import (
|
||||
ProxyRotationStrategy,
|
||||
RoundRobinProxyStrategy,
|
||||
)
|
||||
from .extraction_strategy import (
|
||||
ExtractionStrategy,
|
||||
LLMExtractionStrategy,
|
||||
@@ -60,31 +64,33 @@ __all__ = [
|
||||
"DisplayMode",
|
||||
"MarkdownGenerationResult",
|
||||
"Crawl4aiDockerClient",
|
||||
"ProxyRotationStrategy",
|
||||
"RoundRobinProxyStrategy",
|
||||
]
|
||||
|
||||
|
||||
def is_sync_version_installed():
|
||||
try:
|
||||
import selenium # noqa
|
||||
# def is_sync_version_installed():
|
||||
# try:
|
||||
# import selenium # noqa
|
||||
|
||||
return True
|
||||
except ImportError:
|
||||
return False
|
||||
# return True
|
||||
# except ImportError:
|
||||
# return False
|
||||
|
||||
|
||||
if is_sync_version_installed():
|
||||
try:
|
||||
from .web_crawler import WebCrawler
|
||||
# if is_sync_version_installed():
|
||||
# try:
|
||||
# from .web_crawler import WebCrawler
|
||||
|
||||
__all__.append("WebCrawler")
|
||||
except ImportError:
|
||||
print(
|
||||
"Warning: Failed to import WebCrawler even though selenium is installed. This might be due to other missing dependencies."
|
||||
)
|
||||
else:
|
||||
WebCrawler = None
|
||||
# import warnings
|
||||
# print("Warning: Synchronous WebCrawler is not available. Install crawl4ai[sync] for synchronous support. However, please note that the synchronous version will be deprecated soon.")
|
||||
# __all__.append("WebCrawler")
|
||||
# except ImportError:
|
||||
# print(
|
||||
# "Warning: Failed to import WebCrawler even though selenium is installed. This might be due to other missing dependencies."
|
||||
# )
|
||||
# else:
|
||||
# WebCrawler = None
|
||||
# # import warnings
|
||||
# # print("Warning: Synchronous WebCrawler is not available. Install crawl4ai[sync] for synchronous support. However, please note that the synchronous version will be deprecated soon.")
|
||||
|
||||
# Disable all Pydantic warnings
|
||||
warnings.filterwarnings("ignore", module="pydantic")
|
||||
|
||||
Reference in New Issue
Block a user