Merge pull request #1501 from unclecode/fix/n-playwright-stealth

feat(StealthAdapter): fix stealth features for Playwright integration
This commit is contained in:
Nasrin
2025-09-19 14:17:35 +08:00
committed by GitHub
2 changed files with 152 additions and 28 deletions

View File

@@ -148,6 +148,134 @@ class PlaywrightAdapter(BrowserAdapter):
return Page, Error, PlaywrightTimeoutError
class StealthAdapter(BrowserAdapter):
"""Adapter for Playwright with stealth features using playwright_stealth"""
def __init__(self):
self._console_script_injected = {}
self._stealth_available = self._check_stealth_availability()
def _check_stealth_availability(self) -> bool:
"""Check if playwright_stealth is available and get the correct function"""
try:
from playwright_stealth import stealth_async
self._stealth_function = stealth_async
return True
except ImportError:
try:
from playwright_stealth import stealth_sync
self._stealth_function = stealth_sync
return True
except ImportError:
self._stealth_function = None
return False
async def apply_stealth(self, page: Page):
"""Apply stealth to a page if available"""
if self._stealth_available and self._stealth_function:
try:
if hasattr(self._stealth_function, '__call__'):
if 'async' in getattr(self._stealth_function, '__name__', ''):
await self._stealth_function(page)
else:
self._stealth_function(page)
except Exception as e:
# Fail silently or log error depending on requirements
pass
async def evaluate(self, page: Page, expression: str, arg: Any = None) -> Any:
"""Standard Playwright evaluate with stealth applied"""
if arg is not None:
return await page.evaluate(expression, arg)
return await page.evaluate(expression)
async def setup_console_capture(self, page: Page, captured_console: List[Dict]) -> Optional[Callable]:
"""Setup console capture using Playwright's event system with stealth"""
# Apply stealth to the page first
await self.apply_stealth(page)
def handle_console_capture(msg):
try:
message_type = "unknown"
try:
message_type = msg.type
except:
pass
message_text = "unknown"
try:
message_text = msg.text
except:
pass
entry = {
"type": message_type,
"text": message_text,
"timestamp": time.time()
}
captured_console.append(entry)
except Exception as e:
captured_console.append({
"type": "console_capture_error",
"error": str(e),
"timestamp": time.time()
})
page.on("console", handle_console_capture)
return handle_console_capture
async def setup_error_capture(self, page: Page, captured_console: List[Dict]) -> Optional[Callable]:
"""Setup error capture using Playwright's event system"""
def handle_pageerror_capture(err):
try:
error_message = "Unknown error"
try:
error_message = err.message
except:
pass
error_stack = ""
try:
error_stack = err.stack
except:
pass
captured_console.append({
"type": "error",
"text": error_message,
"stack": error_stack,
"timestamp": time.time()
})
except Exception as e:
captured_console.append({
"type": "pageerror_capture_error",
"error": str(e),
"timestamp": time.time()
})
page.on("pageerror", handle_pageerror_capture)
return handle_pageerror_capture
async def retrieve_console_messages(self, page: Page) -> List[Dict]:
"""Not needed for Playwright - messages are captured via events"""
return []
async def cleanup_console_capture(self, page: Page, handle_console: Optional[Callable], handle_error: Optional[Callable]):
"""Remove event listeners"""
if handle_console:
page.remove_listener("console", handle_console)
if handle_error:
page.remove_listener("pageerror", handle_error)
def get_imports(self) -> tuple:
"""Return Playwright imports"""
from playwright.async_api import Page, Error
from playwright.async_api import TimeoutError as PlaywrightTimeoutError
return Page, Error, PlaywrightTimeoutError
class UndetectedAdapter(BrowserAdapter):
"""Adapter for undetected browser automation with stealth features"""

View File

@@ -614,9 +614,11 @@ class BrowserManager:
# for all racers). Prevents 'Target page/context closed' errors.
self._page_lock = asyncio.Lock()
# Stealth-related attributes
self._stealth_instance = None
self._stealth_cm = None
# Stealth adapter for stealth mode
self._stealth_adapter = None
if self.config.enable_stealth and not self.use_undetected:
from .browser_adapter import StealthAdapter
self._stealth_adapter = StealthAdapter()
# Initialize ManagedBrowser if needed
if self.config.use_managed_browser:
@@ -650,16 +652,8 @@ class BrowserManager:
else:
from playwright.async_api import async_playwright
# Initialize playwright with or without stealth
if self.config.enable_stealth and not self.use_undetected:
# Import stealth only when needed
from playwright_stealth import Stealth
# Use the recommended stealth wrapper approach
self._stealth_instance = Stealth()
self._stealth_cm = self._stealth_instance.use_async(async_playwright())
self.playwright = await self._stealth_cm.__aenter__()
else:
self.playwright = await async_playwright().start()
# Initialize playwright
self.playwright = await async_playwright().start()
if self.config.cdp_url or self.config.use_managed_browser:
self.config.use_managed_browser = True
@@ -1009,6 +1003,19 @@ class BrowserManager:
signature_hash = hashlib.sha256(signature_json.encode("utf-8")).hexdigest()
return signature_hash
async def _apply_stealth_to_page(self, page):
"""Apply stealth to a page if stealth mode is enabled"""
if self._stealth_adapter:
try:
await self._stealth_adapter.apply_stealth(page)
except Exception as e:
if self.logger:
self.logger.warning(
message="Failed to apply stealth to page: {error}",
tag="STEALTH",
params={"error": str(e)}
)
async def get_page(self, crawlerRunConfig: CrawlerRunConfig):
"""
Get a page for the given session ID, creating a new one if needed.
@@ -1038,6 +1045,7 @@ class BrowserManager:
# See GH-1198: context.pages can be empty under races
async with self._page_lock:
page = await ctx.new_page()
await self._apply_stealth_to_page(page)
else:
context = self.default_context
pages = context.pages
@@ -1054,6 +1062,7 @@ class BrowserManager:
page = pages[0]
else:
page = await context.new_page()
await self._apply_stealth_to_page(page)
else:
# Otherwise, check if we have an existing context for this config
config_signature = self._make_config_signature(crawlerRunConfig)
@@ -1069,6 +1078,7 @@ class BrowserManager:
# Create a new page from the chosen context
page = await context.new_page()
await self._apply_stealth_to_page(page)
# If a session_id is specified, store this session so we can reuse later
if crawlerRunConfig.session_id:
@@ -1135,19 +1145,5 @@ class BrowserManager:
self.managed_browser = None
if self.playwright:
# Handle stealth context manager cleanup if it exists
if hasattr(self, '_stealth_cm') and self._stealth_cm is not None:
try:
await self._stealth_cm.__aexit__(None, None, None)
except Exception as e:
if self.logger:
self.logger.error(
message="Error closing stealth context: {error}",
tag="ERROR",
params={"error": str(e)}
)
self._stealth_cm = None
self._stealth_instance = None
else:
await self.playwright.stop()
await self.playwright.stop()
self.playwright = None