refactor(proxy): consolidate proxy configuration handling
Moves ProxyConfig from configs/ directory into proxy_strategy.py to improve code organization and reduce fragmentation. Updates all imports and type hints to reflect the new location. Key changes: - Moved ProxyConfig class from configs/proxy_config.py to proxy_strategy.py - Updated type hints in async_configs.py to support ProxyConfig - Fixed proxy configuration handling in browser_manager.py - Updated documentation and examples to use new import path BREAKING CHANGE: ProxyConfig import path has changed from crawl4ai.configs to crawl4ai.proxy_strategy
This commit is contained in:
@@ -767,6 +767,7 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
|
||||
# Handle wait_for condition
|
||||
# Todo: Decide how to handle this
|
||||
if not config.wait_for and config.css_selector and False:
|
||||
# if not config.wait_for and config.css_selector:
|
||||
config.wait_for = f"css:{config.css_selector}"
|
||||
|
||||
if config.wait_for:
|
||||
@@ -806,8 +807,28 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
|
||||
if config.remove_overlay_elements:
|
||||
await self.remove_overlay_elements(page)
|
||||
|
||||
# Get final HTML content
|
||||
html = await page.content()
|
||||
if config.css_selector:
|
||||
try:
|
||||
# Handle comma-separated selectors by splitting them
|
||||
selectors = [s.strip() for s in config.css_selector.split(',')]
|
||||
html_parts = []
|
||||
|
||||
for selector in selectors:
|
||||
try:
|
||||
content = await page.evaluate(f"document.querySelector('{selector}')?.outerHTML || ''")
|
||||
html_parts.append(content)
|
||||
except Error as e:
|
||||
print(f"Warning: Could not get content for selector '{selector}': {str(e)}")
|
||||
|
||||
# Wrap in a div to create a valid HTML structure
|
||||
html = f"<div class='crawl4ai-result'>\n" + "\n".join(html_parts) + "\n</div>"
|
||||
except Error as e:
|
||||
raise RuntimeError(f"Failed to extract HTML content: {str(e)}")
|
||||
else:
|
||||
html = await page.content()
|
||||
|
||||
# # Get final HTML content
|
||||
# html = await page.content()
|
||||
await self.execute_hook(
|
||||
"before_return_html", page=page, html=html, context=context, config=config
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user