feat(StealthAdapter): fix stealth features for Playwright integration. ref #1481
This commit is contained in:
@@ -148,6 +148,134 @@ class PlaywrightAdapter(BrowserAdapter):
|
|||||||
return Page, Error, PlaywrightTimeoutError
|
return Page, Error, PlaywrightTimeoutError
|
||||||
|
|
||||||
|
|
||||||
|
class StealthAdapter(BrowserAdapter):
|
||||||
|
"""Adapter for Playwright with stealth features using playwright_stealth"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self._console_script_injected = {}
|
||||||
|
self._stealth_available = self._check_stealth_availability()
|
||||||
|
|
||||||
|
def _check_stealth_availability(self) -> bool:
|
||||||
|
"""Check if playwright_stealth is available and get the correct function"""
|
||||||
|
try:
|
||||||
|
from playwright_stealth import stealth_async
|
||||||
|
self._stealth_function = stealth_async
|
||||||
|
return True
|
||||||
|
except ImportError:
|
||||||
|
try:
|
||||||
|
from playwright_stealth import stealth_sync
|
||||||
|
self._stealth_function = stealth_sync
|
||||||
|
return True
|
||||||
|
except ImportError:
|
||||||
|
self._stealth_function = None
|
||||||
|
return False
|
||||||
|
|
||||||
|
async def apply_stealth(self, page: Page):
|
||||||
|
"""Apply stealth to a page if available"""
|
||||||
|
if self._stealth_available and self._stealth_function:
|
||||||
|
try:
|
||||||
|
if hasattr(self._stealth_function, '__call__'):
|
||||||
|
if 'async' in getattr(self._stealth_function, '__name__', ''):
|
||||||
|
await self._stealth_function(page)
|
||||||
|
else:
|
||||||
|
self._stealth_function(page)
|
||||||
|
except Exception as e:
|
||||||
|
# Fail silently or log error depending on requirements
|
||||||
|
pass
|
||||||
|
|
||||||
|
async def evaluate(self, page: Page, expression: str, arg: Any = None) -> Any:
|
||||||
|
"""Standard Playwright evaluate with stealth applied"""
|
||||||
|
if arg is not None:
|
||||||
|
return await page.evaluate(expression, arg)
|
||||||
|
return await page.evaluate(expression)
|
||||||
|
|
||||||
|
async def setup_console_capture(self, page: Page, captured_console: List[Dict]) -> Optional[Callable]:
|
||||||
|
"""Setup console capture using Playwright's event system with stealth"""
|
||||||
|
# Apply stealth to the page first
|
||||||
|
await self.apply_stealth(page)
|
||||||
|
|
||||||
|
def handle_console_capture(msg):
|
||||||
|
try:
|
||||||
|
message_type = "unknown"
|
||||||
|
try:
|
||||||
|
message_type = msg.type
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
message_text = "unknown"
|
||||||
|
try:
|
||||||
|
message_text = msg.text
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
entry = {
|
||||||
|
"type": message_type,
|
||||||
|
"text": message_text,
|
||||||
|
"timestamp": time.time()
|
||||||
|
}
|
||||||
|
|
||||||
|
captured_console.append(entry)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
captured_console.append({
|
||||||
|
"type": "console_capture_error",
|
||||||
|
"error": str(e),
|
||||||
|
"timestamp": time.time()
|
||||||
|
})
|
||||||
|
|
||||||
|
page.on("console", handle_console_capture)
|
||||||
|
return handle_console_capture
|
||||||
|
|
||||||
|
async def setup_error_capture(self, page: Page, captured_console: List[Dict]) -> Optional[Callable]:
|
||||||
|
"""Setup error capture using Playwright's event system"""
|
||||||
|
def handle_pageerror_capture(err):
|
||||||
|
try:
|
||||||
|
error_message = "Unknown error"
|
||||||
|
try:
|
||||||
|
error_message = err.message
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
error_stack = ""
|
||||||
|
try:
|
||||||
|
error_stack = err.stack
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
captured_console.append({
|
||||||
|
"type": "error",
|
||||||
|
"text": error_message,
|
||||||
|
"stack": error_stack,
|
||||||
|
"timestamp": time.time()
|
||||||
|
})
|
||||||
|
except Exception as e:
|
||||||
|
captured_console.append({
|
||||||
|
"type": "pageerror_capture_error",
|
||||||
|
"error": str(e),
|
||||||
|
"timestamp": time.time()
|
||||||
|
})
|
||||||
|
|
||||||
|
page.on("pageerror", handle_pageerror_capture)
|
||||||
|
return handle_pageerror_capture
|
||||||
|
|
||||||
|
async def retrieve_console_messages(self, page: Page) -> List[Dict]:
|
||||||
|
"""Not needed for Playwright - messages are captured via events"""
|
||||||
|
return []
|
||||||
|
|
||||||
|
async def cleanup_console_capture(self, page: Page, handle_console: Optional[Callable], handle_error: Optional[Callable]):
|
||||||
|
"""Remove event listeners"""
|
||||||
|
if handle_console:
|
||||||
|
page.remove_listener("console", handle_console)
|
||||||
|
if handle_error:
|
||||||
|
page.remove_listener("pageerror", handle_error)
|
||||||
|
|
||||||
|
def get_imports(self) -> tuple:
|
||||||
|
"""Return Playwright imports"""
|
||||||
|
from playwright.async_api import Page, Error
|
||||||
|
from playwright.async_api import TimeoutError as PlaywrightTimeoutError
|
||||||
|
return Page, Error, PlaywrightTimeoutError
|
||||||
|
|
||||||
|
|
||||||
class UndetectedAdapter(BrowserAdapter):
|
class UndetectedAdapter(BrowserAdapter):
|
||||||
"""Adapter for undetected browser automation with stealth features"""
|
"""Adapter for undetected browser automation with stealth features"""
|
||||||
|
|
||||||
|
|||||||
@@ -614,9 +614,11 @@ class BrowserManager:
|
|||||||
# for all racers). Prevents 'Target page/context closed' errors.
|
# for all racers). Prevents 'Target page/context closed' errors.
|
||||||
self._page_lock = asyncio.Lock()
|
self._page_lock = asyncio.Lock()
|
||||||
|
|
||||||
# Stealth-related attributes
|
# Stealth adapter for stealth mode
|
||||||
self._stealth_instance = None
|
self._stealth_adapter = None
|
||||||
self._stealth_cm = None
|
if self.config.enable_stealth and not self.use_undetected:
|
||||||
|
from .browser_adapter import StealthAdapter
|
||||||
|
self._stealth_adapter = StealthAdapter()
|
||||||
|
|
||||||
# Initialize ManagedBrowser if needed
|
# Initialize ManagedBrowser if needed
|
||||||
if self.config.use_managed_browser:
|
if self.config.use_managed_browser:
|
||||||
@@ -650,16 +652,8 @@ class BrowserManager:
|
|||||||
else:
|
else:
|
||||||
from playwright.async_api import async_playwright
|
from playwright.async_api import async_playwright
|
||||||
|
|
||||||
# Initialize playwright with or without stealth
|
# Initialize playwright
|
||||||
if self.config.enable_stealth and not self.use_undetected:
|
self.playwright = await async_playwright().start()
|
||||||
# Import stealth only when needed
|
|
||||||
from playwright_stealth import Stealth
|
|
||||||
# Use the recommended stealth wrapper approach
|
|
||||||
self._stealth_instance = Stealth()
|
|
||||||
self._stealth_cm = self._stealth_instance.use_async(async_playwright())
|
|
||||||
self.playwright = await self._stealth_cm.__aenter__()
|
|
||||||
else:
|
|
||||||
self.playwright = await async_playwright().start()
|
|
||||||
|
|
||||||
if self.config.cdp_url or self.config.use_managed_browser:
|
if self.config.cdp_url or self.config.use_managed_browser:
|
||||||
self.config.use_managed_browser = True
|
self.config.use_managed_browser = True
|
||||||
@@ -1009,6 +1003,19 @@ class BrowserManager:
|
|||||||
signature_hash = hashlib.sha256(signature_json.encode("utf-8")).hexdigest()
|
signature_hash = hashlib.sha256(signature_json.encode("utf-8")).hexdigest()
|
||||||
return signature_hash
|
return signature_hash
|
||||||
|
|
||||||
|
async def _apply_stealth_to_page(self, page):
|
||||||
|
"""Apply stealth to a page if stealth mode is enabled"""
|
||||||
|
if self._stealth_adapter:
|
||||||
|
try:
|
||||||
|
await self._stealth_adapter.apply_stealth(page)
|
||||||
|
except Exception as e:
|
||||||
|
if self.logger:
|
||||||
|
self.logger.warning(
|
||||||
|
message="Failed to apply stealth to page: {error}",
|
||||||
|
tag="STEALTH",
|
||||||
|
params={"error": str(e)}
|
||||||
|
)
|
||||||
|
|
||||||
async def get_page(self, crawlerRunConfig: CrawlerRunConfig):
|
async def get_page(self, crawlerRunConfig: CrawlerRunConfig):
|
||||||
"""
|
"""
|
||||||
Get a page for the given session ID, creating a new one if needed.
|
Get a page for the given session ID, creating a new one if needed.
|
||||||
@@ -1038,6 +1045,7 @@ class BrowserManager:
|
|||||||
# See GH-1198: context.pages can be empty under races
|
# See GH-1198: context.pages can be empty under races
|
||||||
async with self._page_lock:
|
async with self._page_lock:
|
||||||
page = await ctx.new_page()
|
page = await ctx.new_page()
|
||||||
|
await self._apply_stealth_to_page(page)
|
||||||
else:
|
else:
|
||||||
context = self.default_context
|
context = self.default_context
|
||||||
pages = context.pages
|
pages = context.pages
|
||||||
@@ -1054,6 +1062,7 @@ class BrowserManager:
|
|||||||
page = pages[0]
|
page = pages[0]
|
||||||
else:
|
else:
|
||||||
page = await context.new_page()
|
page = await context.new_page()
|
||||||
|
await self._apply_stealth_to_page(page)
|
||||||
else:
|
else:
|
||||||
# Otherwise, check if we have an existing context for this config
|
# Otherwise, check if we have an existing context for this config
|
||||||
config_signature = self._make_config_signature(crawlerRunConfig)
|
config_signature = self._make_config_signature(crawlerRunConfig)
|
||||||
@@ -1069,6 +1078,7 @@ class BrowserManager:
|
|||||||
|
|
||||||
# Create a new page from the chosen context
|
# Create a new page from the chosen context
|
||||||
page = await context.new_page()
|
page = await context.new_page()
|
||||||
|
await self._apply_stealth_to_page(page)
|
||||||
|
|
||||||
# If a session_id is specified, store this session so we can reuse later
|
# If a session_id is specified, store this session so we can reuse later
|
||||||
if crawlerRunConfig.session_id:
|
if crawlerRunConfig.session_id:
|
||||||
@@ -1135,19 +1145,5 @@ class BrowserManager:
|
|||||||
self.managed_browser = None
|
self.managed_browser = None
|
||||||
|
|
||||||
if self.playwright:
|
if self.playwright:
|
||||||
# Handle stealth context manager cleanup if it exists
|
await self.playwright.stop()
|
||||||
if hasattr(self, '_stealth_cm') and self._stealth_cm is not None:
|
|
||||||
try:
|
|
||||||
await self._stealth_cm.__aexit__(None, None, None)
|
|
||||||
except Exception as e:
|
|
||||||
if self.logger:
|
|
||||||
self.logger.error(
|
|
||||||
message="Error closing stealth context: {error}",
|
|
||||||
tag="ERROR",
|
|
||||||
params={"error": str(e)}
|
|
||||||
)
|
|
||||||
self._stealth_cm = None
|
|
||||||
self._stealth_instance = None
|
|
||||||
else:
|
|
||||||
await self.playwright.stop()
|
|
||||||
self.playwright = None
|
self.playwright = None
|
||||||
|
|||||||
Reference in New Issue
Block a user