feat(browser): implement modular browser management system

Adds a new browser management system with strategy pattern implementation: - Introduces BrowserManager class with strategy pattern support - Adds PlaywrightBrowserStrategy, CDPBrowserStrategy, and BuiltinBrowserStrategy - Implements BrowserProfileManager for profile management - Adds PagePoolConfig for browser page pooling - Includes comprehensive test suite for all browser strategies BREAKING CHANGE: Browser management has been moved to browser/ module. Direct usage of browser_manager.py and browser_profiler.py is deprecated.
2025-03-21 22:50:00 +08:00
parent 6432ff1257
commit 4ab0893ffb
16 changed files with 2964 additions and 8 deletions
--- a/crawl4ai/async_configs.py
+++ b/crawl4ai/async_configs.py
@@ -156,6 +156,41 @@ def is_empty_value(value: Any) -> bool:
    return False
 class PagePoolConfig:
    """Configuration for browser page pooling.
    This class configures the page pooling mechanism that maintains pre-warmed
    browser pages ready for immediate use, improving performance for scenarios
    where multiple URLs need to be processed in sequence.
    Attributes:
        mode (str): Pooling mode - "static" or "adaptive".
                    "static" uses a fixed pool size defined by static_size.
                    "adaptive" calculates optimal size based on available system memory.
                    Default: "static".
        static_size (int): Number of pages to maintain in the pool when mode is "static".
                           Default: 10.
        memory_per_page (int): Estimated memory used by a single page in MB.
                               Used for "adaptive" mode calculations.
                               Default: 200.
        memory_threshold (float): Maximum percentage of system memory to use in "adaptive" mode.
                                  Default: 0.7 (70% of available memory).
        timeout (float): Seconds to wait for a page from the pool before creating a new one.
                         Default: 5.0.
    """
    def __init__(self, 
                 mode="static",
                 static_size=10,
                 memory_per_page=200,
                 memory_threshold=0.7,
                 timeout=5.0):
        self.mode = mode
        self.static_size = static_size
        self.memory_per_page = memory_per_page
        self.memory_threshold = memory_threshold
        self.timeout = timeout
 class BrowserConfig:
    """
    Configuration class for setting up a browser instance and its context in AsyncPlaywrightCrawlerStrategy.
@@ -220,6 +255,9 @@ class BrowserConfig:
        light_mode (bool): Disables certain background features for performance gains. Default: False.
        extra_args (list): Additional command-line arguments passed to the browser.
                           Default: [].
        page_pool_config (PagePoolConfig or None): Configuration for page pooling mechanism.
                                                  If None, page pooling is disabled.
                                                  Default: None.
    """
    def __init__(
@@ -260,6 +298,7 @@ class BrowserConfig:
        extra_args: list = None,
        debugging_port: int = 9222,
        host: str = "localhost",
        page_pool_config: Optional[PagePoolConfig] = None,
    ):
        self.browser_type = browser_type
        self.headless = headless
@@ -298,6 +337,7 @@ class BrowserConfig:
        self.verbose = verbose
        self.debugging_port = debugging_port
        self.host = host
        self.page_pool_config = page_pool_config
        fa_user_agenr_generator = ValidUAGenerator()
        if self.user_agent_mode == "random":
@@ -328,6 +368,12 @@ class BrowserConfig:
    @staticmethod
    def from_kwargs(kwargs: dict) -> "BrowserConfig":
        # Handle page_pool_config
        page_pool_config = kwargs.get("page_pool_config")
        if isinstance(page_pool_config, dict):
            # If it's a dict, convert to PagePoolConfig
            page_pool_config = PagePoolConfig(**page_pool_config)
        return BrowserConfig(
            browser_type=kwargs.get("browser_type", "chromium"),
            headless=kwargs.get("headless", True),
@@ -361,6 +407,7 @@ class BrowserConfig:
            extra_args=kwargs.get("extra_args", []),
            debugging_port=kwargs.get("debugging_port", 9222),
            host=kwargs.get("host", "localhost"),
            page_pool_config=page_pool_config,
        )
    def to_dict(self):
@@ -395,6 +442,7 @@ class BrowserConfig:
            "verbose": self.verbose,
            "debugging_port": self.debugging_port,
            "host": self.host,
            "page_pool_config": self.page_pool_config,
        }
    def clone(self, **kwargs):
--- a/crawl4ai/browser/init.py
+++ b/crawl4ai/browser/init.py
@@ -0,0 +1,10 @@
 """Browser management module for Crawl4AI.
 This module provides browser management capabilities using different strategies
 for browser creation and interaction.
 """
 from .manager import BrowserManager
 from .profiles import BrowserProfileManager
 __all__ = ['BrowserManager', 'BrowserProfileManager']
--- a/crawl4ai/browser/manager.py
+++ b/crawl4ai/browser/manager.py
@@ -0,0 +1,165 @@
 """Browser manager module for Crawl4AI.
 This module provides a central browser management class that uses the
 strategy pattern internally while maintaining the existing API.
 """
 import asyncio
 import time
 from typing import Optional, Tuple, Dict, Any
 from playwright.async_api import Page, BrowserContext
 from ..async_logger import AsyncLogger
 from ..async_configs import BrowserConfig, CrawlerRunConfig
 from .strategies import (
    BaseBrowserStrategy,
    PlaywrightBrowserStrategy,
    CDPBrowserStrategy,
    BuiltinBrowserStrategy
 )
 class BrowserManager:
    """Main interface for browser management in Crawl4AI.
    This class maintains backward compatibility with the existing implementation
    while using the strategy pattern internally for different browser types.
    Attributes:
        config (BrowserConfig): Configuration object containing all browser settings
        logger: Logger instance for recording events and errors
        browser: The browser instance
        default_context: The default browser context
        managed_browser: The managed browser instance
        playwright: The Playwright instance
        sessions: Dictionary to store session information
        session_ttl: Session timeout in seconds
    """
    def __init__(self, browser_config: Optional[BrowserConfig] = None, logger: Optional[AsyncLogger] = None):
        """Initialize the BrowserManager with a browser configuration.
        Args:
            browser_config: Configuration object containing all browser settings
            logger: Logger instance for recording events and errors
        """
        self.config = browser_config or BrowserConfig()
        self.logger = logger
        # Create strategy based on configuration
        self._strategy = self._create_strategy()
        # Initialize state variables for compatibility with existing code
        self.browser = None
        self.default_context = None
        self.managed_browser = None
        self.playwright = None
        # For session management (from existing implementation)
        self.sessions = {}
        self.session_ttl = 1800  # 30 minutes
    def _create_strategy(self) -> BaseBrowserStrategy:
        """Create appropriate browser strategy based on configuration.
        Returns:
            BaseBrowserStrategy: The selected browser strategy
        """
        if self.config.browser_mode == "builtin":
            return BuiltinBrowserStrategy(self.config, self.logger)
        elif self.config.cdp_url or self.config.use_managed_browser:
            return CDPBrowserStrategy(self.config, self.logger)
        else:
            return PlaywrightBrowserStrategy(self.config, self.logger)
    async def start(self):
        """Start the browser instance and set up the default context.
        Returns:
            self: For method chaining
        """
        # Start the strategy
        await self._strategy.start()
        # Update legacy references
        self.browser = self._strategy.browser
        self.default_context = self._strategy.default_context
        # Set browser process reference (for CDP strategy)
        if hasattr(self._strategy, 'browser_process'):
            self.managed_browser = self._strategy
        # Set Playwright reference
        self.playwright = self._strategy.playwright
        # Sync sessions if needed
        if hasattr(self._strategy, 'sessions'):
            self.sessions = self._strategy.sessions
            self.session_ttl = self._strategy.session_ttl
        return self
    async def get_page(self, crawlerRunConfig: CrawlerRunConfig) -> Tuple[Page, BrowserContext]:
        """Get a page for the given configuration.
        Args:
            crawlerRunConfig: Configuration object for the crawler run
        Returns:
            Tuple of (Page, BrowserContext)
        """
        # Delegate to strategy
        page, context = await self._strategy.get_page(crawlerRunConfig)
        # Sync sessions if needed
        if hasattr(self._strategy, 'sessions'):
            self.sessions = self._strategy.sessions
        return page, context
    async def kill_session(self, session_id: str):
        """Kill a browser session and clean up resources.
        Args:
            session_id: The session ID to kill
        """
        # Handle kill_session via our strategy if it supports it
        if hasattr(self._strategy, '_kill_session'):
            await self._strategy._kill_session(session_id)
        elif session_id in self.sessions:
            context, page, _ = self.sessions[session_id]
            await page.close()
            # Only close context if not using CDP
            if not self.config.use_managed_browser and not self.config.cdp_url and not self.config.browser_mode == "builtin":
                await context.close()
            del self.sessions[session_id]
    def _cleanup_expired_sessions(self):
        """Clean up expired sessions based on TTL."""
        # Use strategy's implementation if available
        if hasattr(self._strategy, '_cleanup_expired_sessions'):
            self._strategy._cleanup_expired_sessions()
            return
        # Otherwise use our own implementation
        current_time = time.time()
        expired_sessions = [
            sid
            for sid, (_, _, last_used) in self.sessions.items()
            if current_time - last_used > self.session_ttl
        ]
        for sid in expired_sessions:
            asyncio.create_task(self.kill_session(sid))
    async def close(self):
        """Close the browser and clean up resources."""
        # Delegate to strategy
        await self._strategy.close()
        # Reset legacy references
        self.browser = None
        self.default_context = None
        self.managed_browser = None
        self.playwright = None
        self.sessions = {}
--- a/crawl4ai/browser/models.py
+++ b/crawl4ai/browser/models.py
--- a/crawl4ai/browser/profiles.py
+++ b/crawl4ai/browser/profiles.py
@@ -0,0 +1,458 @@
 """Browser profile management module for Crawl4AI.
 This module provides functionality for creating and managing browser profiles
 that can be used for authenticated browsing.
 """
 import os
 import asyncio
 import signal
 import sys
 import datetime
 import uuid
 import shutil
 from typing import List, Dict, Optional, Any
 from colorama import Fore, Style, init
 from ..async_configs import BrowserConfig
 from ..async_logger import AsyncLogger, AsyncLoggerBase
 from ..utils import get_home_folder
 from .strategies import is_windows
 class BrowserProfileManager:
    """Manages browser profiles for Crawl4AI.
    This class provides functionality to create and manage browser profiles
    that can be used for authenticated browsing with Crawl4AI.
    Profiles are stored by default in ~/.crawl4ai/profiles/
    """
    def __init__(self, logger: Optional[AsyncLoggerBase] = None):
        """Initialize the BrowserProfileManager.
        Args:
            logger: Logger for outputting messages. If None, a default AsyncLogger is created.
        """
        # Initialize colorama for colorful terminal output
        init()
        # Create a logger if not provided
        if logger is None:
            self.logger = AsyncLogger(verbose=True)
        elif not isinstance(logger, AsyncLoggerBase):
            self.logger = AsyncLogger(verbose=True)
        else:
            self.logger = logger
        # Ensure profiles directory exists
        self.profiles_dir = os.path.join(get_home_folder(), "profiles")
        os.makedirs(self.profiles_dir, exist_ok=True)
    async def create_profile(self, 
                          profile_name: Optional[str] = None, 
                          browser_config: Optional[BrowserConfig] = None) -> Optional[str]:
        """Create a browser profile interactively.
        Args:
            profile_name: Name for the profile. If None, a name is generated.
            browser_config: Configuration for the browser. If None, a default configuration is used.
        Returns:
            Path to the created profile directory, or None if creation failed
        """
        # Create default browser config if none provided
        if browser_config is None:
            browser_config = BrowserConfig(
                browser_type="chromium",
                headless=False,  # Must be visible for user interaction
                verbose=True
            )
        else:
            # Ensure headless is False for user interaction
            browser_config.headless = False
        # Generate profile name if not provided
        if not profile_name:
            timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
            profile_name = f"profile_{timestamp}_{uuid.uuid4().hex[:6]}"
        # Sanitize profile name (replace spaces and special chars)
        profile_name = "".join(c if c.isalnum() or c in "-_" else "_" for c in profile_name)
        # Set user data directory
        profile_path = os.path.join(self.profiles_dir, profile_name)
        os.makedirs(profile_path, exist_ok=True)
        # Print instructions for the user with colorama formatting
        border = f"{Fore.CYAN}{'='*80}{Style.RESET_ALL}"
        self.logger.info(f"\n{border}", tag="PROFILE")
        self.logger.info(f"Creating browser profile: {Fore.GREEN}{profile_name}{Style.RESET_ALL}", tag="PROFILE")
        self.logger.info(f"Profile directory: {Fore.YELLOW}{profile_path}{Style.RESET_ALL}", tag="PROFILE")
        self.logger.info("\nInstructions:", tag="PROFILE")
        self.logger.info("1. A browser window will open for you to set up your profile.", tag="PROFILE")
        self.logger.info(f"2. {Fore.CYAN}Log in to websites{Style.RESET_ALL}, configure settings, etc. as needed.", tag="PROFILE")
        self.logger.info(f"3. When you're done, {Fore.YELLOW}press 'q' in this terminal{Style.RESET_ALL} to close the browser.", tag="PROFILE")
        self.logger.info("4. The profile will be saved and ready to use with Crawl4AI.", tag="PROFILE")
        self.logger.info(f"{border}\n", tag="PROFILE")
        # Import the necessary classes with local imports to avoid circular references
        from .strategies import CDPBrowserStrategy
        # Set browser config to use the profile path
        browser_config.user_data_dir = profile_path
        # Create a CDP browser strategy for the profile creation
        browser_strategy = CDPBrowserStrategy(browser_config, self.logger)
        # Set up signal handlers to ensure cleanup on interrupt
        original_sigint = signal.getsignal(signal.SIGINT)
        original_sigterm = signal.getsignal(signal.SIGTERM)
        # Define cleanup handler for signals
        async def cleanup_handler(sig, frame):
            self.logger.warning("\nCleaning up browser process...", tag="PROFILE")
            await browser_strategy.close()
            # Restore original signal handlers
            signal.signal(signal.SIGINT, original_sigint)
            signal.signal(signal.SIGTERM, original_sigterm)
            if sig == signal.SIGINT:
                self.logger.error("Profile creation interrupted. Profile may be incomplete.", tag="PROFILE")
                sys.exit(1)
        # Set signal handlers
        def sigint_handler(sig, frame):
            asyncio.create_task(cleanup_handler(sig, frame))
        signal.signal(signal.SIGINT, sigint_handler)
        signal.signal(signal.SIGTERM, sigint_handler)
        # Event to signal when user is done with the browser
        user_done_event = asyncio.Event()
        # Run keyboard input loop in a separate task
        async def listen_for_quit_command():
            import termios
            import tty
            import select
            # First output the prompt
            self.logger.info(f"{Fore.CYAN}Press '{Fore.WHITE}q{Fore.CYAN}' when you've finished using the browser...{Style.RESET_ALL}", tag="PROFILE")
            # Save original terminal settings
            fd = sys.stdin.fileno()
            old_settings = termios.tcgetattr(fd)
            try:
                # Switch to non-canonical mode (no line buffering)
                tty.setcbreak(fd)
                while True:
                    # Check if input is available (non-blocking)
                    readable, _, _ = select.select([sys.stdin], [], [], 0.5)
                    if readable:
                        key = sys.stdin.read(1)
                        if key.lower() == 'q':
                            self.logger.info(f"{Fore.GREEN}Closing browser and saving profile...{Style.RESET_ALL}", tag="PROFILE")
                            user_done_event.set()
                            return
                    # Check if the browser process has already exited
                    if browser_strategy.browser_process and browser_strategy.browser_process.poll() is not None:
                        self.logger.info("Browser already closed. Ending input listener.", tag="PROFILE")
                        user_done_event.set()
                        return
                    await asyncio.sleep(0.1)
            finally:
                # Restore terminal settings 
                termios.tcsetattr(fd, termios.TCSADRAIN, old_settings)
        try:
            # Start the browser
            await browser_strategy.start()
            # Check if browser started successfully
            if not browser_strategy.browser_process:
                self.logger.error("Failed to start browser process.", tag="PROFILE")
                return None
            self.logger.info(f"Browser launched. {Fore.CYAN}Waiting for you to finish...{Style.RESET_ALL}", tag="PROFILE") 
            # Start listening for keyboard input
            listener_task = asyncio.create_task(listen_for_quit_command())
            # Wait for either the user to press 'q' or for the browser process to exit naturally
            while not user_done_event.is_set() and browser_strategy.browser_process.poll() is None:
                await asyncio.sleep(0.5)
            # Cancel the listener task if it's still running
            if not listener_task.done():
                listener_task.cancel()
                try:
                    await listener_task
                except asyncio.CancelledError:
                    pass
            # If the browser is still running and the user pressed 'q', terminate it
            if browser_strategy.browser_process.poll() is None and user_done_event.is_set():
                self.logger.info("Terminating browser process...", tag="PROFILE")
                await browser_strategy.close()
            self.logger.success(f"Browser closed. Profile saved at: {Fore.GREEN}{profile_path}{Style.RESET_ALL}", tag="PROFILE")
        except Exception as e:
            self.logger.error(f"Error creating profile: {str(e)}", tag="PROFILE")
            await browser_strategy.close()
            return None
        finally:
            # Restore original signal handlers
            signal.signal(signal.SIGINT, original_sigint)
            signal.signal(signal.SIGTERM, original_sigterm)
            # Make sure browser is fully cleaned up
            await browser_strategy.close()
        # Return the profile path
        return profile_path
    def list_profiles(self) -> List[Dict[str, Any]]:
        """List all available browser profiles.
        Returns:
            List of dictionaries containing profile information
        """
        if not os.path.exists(self.profiles_dir):
            return []
        profiles = []
        for name in os.listdir(self.profiles_dir):
            profile_path = os.path.join(self.profiles_dir, name)
            # Skip if not a directory
            if not os.path.isdir(profile_path):
                continue
            # Check if this looks like a valid browser profile
            # For Chromium: Look for Preferences file
            # For Firefox: Look for prefs.js file
            is_valid = False
            if os.path.exists(os.path.join(profile_path, "Preferences")) or \
               os.path.exists(os.path.join(profile_path, "Default", "Preferences")):
                is_valid = "chromium"
            elif os.path.exists(os.path.join(profile_path, "prefs.js")):
                is_valid = "firefox"
            if is_valid:
                # Get creation time
                created = datetime.datetime.fromtimestamp(
                    os.path.getctime(profile_path)
                )
                profiles.append({
                    "name": name,
                    "path": profile_path,
                    "created": created,
                    "type": is_valid
                })
        # Sort by creation time, newest first
        profiles.sort(key=lambda x: x["created"], reverse=True)
        return profiles
    def get_profile_path(self, profile_name: str) -> Optional[str]:
        """Get the full path to a profile by name.
        Args:
            profile_name: Name of the profile (not the full path)
        Returns:
            Full path to the profile directory, or None if not found
        """
        profile_path = os.path.join(self.profiles_dir, profile_name)
        # Check if path exists and is a valid profile
        if not os.path.isdir(profile_path):
            # Check if profile_name itself is full path
            if os.path.isabs(profile_name):
                profile_path = profile_name
            else:
                return None
        # Look for profile indicators
        is_profile = (
            os.path.exists(os.path.join(profile_path, "Preferences")) or
            os.path.exists(os.path.join(profile_path, "Default", "Preferences")) or
            os.path.exists(os.path.join(profile_path, "prefs.js"))
        )
        if not is_profile:
            return None  # Not a valid browser profile
        return profile_path
    def delete_profile(self, profile_name_or_path: str) -> bool:
        """Delete a browser profile by name or path.
        Args:
            profile_name_or_path: Name of the profile or full path to profile directory
        Returns:
            True if the profile was deleted successfully, False otherwise
        """
        # Determine if input is a name or a path
        if os.path.isabs(profile_name_or_path):
            # Full path provided
            profile_path = profile_name_or_path
        else:
            # Just a name provided, construct path
            profile_path = os.path.join(self.profiles_dir, profile_name_or_path)
        # Check if path exists and is a valid profile
        if not os.path.isdir(profile_path):
            return False
        # Look for profile indicators
        is_profile = (
            os.path.exists(os.path.join(profile_path, "Preferences")) or
            os.path.exists(os.path.join(profile_path, "Default", "Preferences")) or
            os.path.exists(os.path.join(profile_path, "prefs.js"))
        )
        if not is_profile:
            return False  # Not a valid browser profile
        # Delete the profile directory
        try:
            shutil.rmtree(profile_path)
            return True
        except Exception:
            return False
    async def interactive_manager(self, crawl_callback=None):
        """Launch an interactive profile management console.
        Args:
            crawl_callback: Function to call when selecting option to use 
                a profile for crawling. It will be called with (profile_path, url).
        """
        while True:
            self.logger.info(f"\n{Fore.CYAN}Profile Management Options:{Style.RESET_ALL}", tag="MENU")
            self.logger.info(f"1. {Fore.GREEN}Create a new profile{Style.RESET_ALL}", tag="MENU")
            self.logger.info(f"2. {Fore.YELLOW}List available profiles{Style.RESET_ALL}", tag="MENU")
            self.logger.info(f"3. {Fore.RED}Delete a profile{Style.RESET_ALL}", tag="MENU")
            # Only show crawl option if callback provided
            if crawl_callback:
                self.logger.info(f"4. {Fore.CYAN}Use a profile to crawl a website{Style.RESET_ALL}", tag="MENU")
                self.logger.info(f"5. {Fore.MAGENTA}Exit{Style.RESET_ALL}", tag="MENU")
                exit_option = "5"
            else:
                self.logger.info(f"4. {Fore.MAGENTA}Exit{Style.RESET_ALL}", tag="MENU")
                exit_option = "4"
            choice = input(f"\n{Fore.CYAN}Enter your choice (1-{exit_option}): {Style.RESET_ALL}")
            if choice == "1":
                # Create new profile
                name = input(f"{Fore.GREEN}Enter a name for the new profile (or press Enter for auto-generated name): {Style.RESET_ALL}")
                await self.create_profile(name or None)
            elif choice == "2":
                # List profiles
                profiles = self.list_profiles()
                if not profiles:
                    self.logger.warning("  No profiles found. Create one first with option 1.", tag="PROFILES")
                    continue
                # Print profile information with colorama formatting
                self.logger.info("\nAvailable profiles:", tag="PROFILES")
                for i, profile in enumerate(profiles):
                    self.logger.info(f"[{i+1}] {Fore.CYAN}{profile['name']}{Style.RESET_ALL}", tag="PROFILES")
                    self.logger.info(f"    Path: {Fore.YELLOW}{profile['path']}{Style.RESET_ALL}", tag="PROFILES")
                    self.logger.info(f"    Created: {profile['created'].strftime('%Y-%m-%d %H:%M:%S')}", tag="PROFILES")
                    self.logger.info(f"    Browser type: {profile['type']}", tag="PROFILES")
                    self.logger.info("", tag="PROFILES")  # Empty line for spacing
            elif choice == "3":
                # Delete profile
                profiles = self.list_profiles()
                if not profiles:
                    self.logger.warning("No profiles found to delete", tag="PROFILES")
                    continue
                # Display numbered list
                self.logger.info(f"\n{Fore.YELLOW}Available profiles:{Style.RESET_ALL}", tag="PROFILES")
                for i, profile in enumerate(profiles):
                    self.logger.info(f"[{i+1}] {profile['name']}", tag="PROFILES")
                # Get profile to delete
                profile_idx = input(f"{Fore.RED}Enter the number of the profile to delete (or 'c' to cancel): {Style.RESET_ALL}")
                if profile_idx.lower() == 'c':
                    continue
                try:
                    idx = int(profile_idx) - 1
                    if 0 <= idx < len(profiles):
                        profile_name = profiles[idx]["name"]
                        self.logger.info(f"Deleting profile: {Fore.YELLOW}{profile_name}{Style.RESET_ALL}", tag="PROFILES")
                        # Confirm deletion
                        confirm = input(f"{Fore.RED}Are you sure you want to delete this profile? (y/n): {Style.RESET_ALL}")
                        if confirm.lower() == 'y':
                            success = self.delete_profile(profiles[idx]["path"])
                            if success:
                                self.logger.success(f"Profile {Fore.GREEN}{profile_name}{Style.RESET_ALL} deleted successfully", tag="PROFILES")
                            else:
                                self.logger.error(f"Failed to delete profile {Fore.RED}{profile_name}{Style.RESET_ALL}", tag="PROFILES")
                    else:
                        self.logger.error("Invalid profile number", tag="PROFILES")
                except ValueError:
                    self.logger.error("Please enter a valid number", tag="PROFILES")
            elif choice == "4" and crawl_callback:
                # Use profile to crawl a site
                profiles = self.list_profiles()
                if not profiles:
                    self.logger.warning("No profiles found. Create one first.", tag="PROFILES")
                    continue
                # Display numbered list
                self.logger.info(f"\n{Fore.YELLOW}Available profiles:{Style.RESET_ALL}", tag="PROFILES")
                for i, profile in enumerate(profiles):
                    self.logger.info(f"[{i+1}] {profile['name']}", tag="PROFILES")
                # Get profile to use
                profile_idx = input(f"{Fore.CYAN}Enter the number of the profile to use (or 'c' to cancel): {Style.RESET_ALL}")
                if profile_idx.lower() == 'c':
                    continue
                try:
                    idx = int(profile_idx) - 1
                    if 0 <= idx < len(profiles):
                        profile_path = profiles[idx]["path"]
                        url = input(f"{Fore.CYAN}Enter the URL to crawl: {Style.RESET_ALL}")
                        if url:
                            # Call the provided crawl callback
                            await crawl_callback(profile_path, url)
                        else:
                            self.logger.error("No URL provided", tag="CRAWL")
                    else:
                        self.logger.error("Invalid profile number", tag="PROFILES")
                except ValueError:
                    self.logger.error("Please enter a valid number", tag="PROFILES")
            elif (choice == "4" and not crawl_callback) or (choice == "5" and crawl_callback):
                # Exit
                self.logger.info("Exiting profile management", tag="MENU")
                break
            else:
                self.logger.error(f"Invalid choice. Please enter a number between 1 and {exit_option}.", tag="MENU")
--- a/crawl4ai/browser/strategies.py
+++ b/crawl4ai/browser/strategies.py
--- a/crawl4ai/browser/utils.py
+++ b/crawl4ai/browser/utils.py
@@ -0,0 +1,105 @@
 """Browser utilities module for Crawl4AI.
 This module provides utility functions for browser management,
 including process management, CDP connection utilities,
 and Playwright instance management.
 """
 import asyncio
 import os
 import sys
 import platform
 import tempfile
 from typing import Optional, Any
 from playwright.async_api import async_playwright
 from ..async_logger import AsyncLogger
 from ..utils import get_chromium_path
 _playwright_instance = None
 async def get_playwright():
    """Get or create the Playwright instance (singleton pattern).
    Returns:
        Playwright: The Playwright instance
    """
    global _playwright_instance
    if _playwright_instance is None or True:
        _playwright_instance = await async_playwright().start()
    return _playwright_instance
 def get_browser_executable(browser_type: str) -> str:
    """Get the path to browser executable, with platform-specific handling.
    Args:
        browser_type: Type of browser (chromium, firefox, webkit)
    Returns:
        Path to browser executable
    """
    return get_chromium_path(browser_type)
 def create_temp_directory(prefix="browser-profile-") -> str:
    """Create a temporary directory for browser data.
    Args:
        prefix: Prefix for the temporary directory name
    Returns:
        Path to the created temporary directory
    """
    return tempfile.mkdtemp(prefix=prefix)
 def is_windows() -> bool:
    """Check if the current platform is Windows.
    Returns:
        True if Windows, False otherwise
    """
    return sys.platform == "win32"
 def is_macos() -> bool:
    """Check if the current platform is macOS.
    Returns:
        True if macOS, False otherwise
    """
    return sys.platform == "darwin"
 def is_linux() -> bool:
    """Check if the current platform is Linux.
    Returns:
        True if Linux, False otherwise
    """
    return not (is_windows() or is_macos())
 def get_browser_disable_options() -> list:
    """Get standard list of browser disable options for performance.
    Returns:
        List of command-line options to disable various browser features
    """
    return [
        "--disable-background-networking",
        "--disable-background-timer-throttling",
        "--disable-backgrounding-occluded-windows",
        "--disable-breakpad",
        "--disable-client-side-phishing-detection",
        "--disable-component-extensions-with-background-pages",
        "--disable-default-apps",
        "--disable-extensions",
        "--disable-features=TranslateUI",
        "--disable-hang-monitor",
        "--disable-ipc-flooding-protection",
        "--disable-popup-blocking",
        "--disable-prompt-on-repost",
        "--disable-sync",
        "--force-color-profile=srgb",
        "--metrics-recording-only",
        "--no-first-run",
        "--password-store=basic",
        "--use-mock-keychain",
    ]
--- a/crawl4ai/browser_manager.py
+++ b/crawl4ai/browser_manager.py
@@ -163,6 +163,7 @@ class ManagedBrowser:
                )
            # We'll monitor for a short time to make sure it starts properly, but won't keep monitoring
            await asyncio.sleep(0.5)  # Give browser time to start
            await self._initial_startup_check()
            await asyncio.sleep(2)  # Give browser time to start
            return f"http://{self.host}:{self.debugging_port}"
--- a/crawl4ai/browser_profiler.py
+++ b/crawl4ai/browser_profiler.py
@@ -555,7 +555,6 @@ class BrowserProfiler:
            else:
                self.logger.error(f"Invalid choice. Please enter a number between 1 and {exit_option}.", tag="MENU")
    async def launch_standalone_browser(self, 
                                  browser_type: str = "chromium",
                                  user_data_dir: Optional[str] = None,
--- a/docs/examples/hello_world.py
+++ b/docs/examples/hello_world.py
@@ -9,6 +9,26 @@ from crawl4ai import (
    CrawlResult
 )
 async def example_cdp():
    browser_conf = BrowserConfig(
        headless=False,
        cdp_url="http://localhost:9223"
    )
    crawler_config = CrawlerRunConfig(
        session_id="test",
        js_code = """(() => { return {"result": "Hello World!"} })()""",
        js_only=True
    )
    async with AsyncWebCrawler(
        config=browser_conf,
        verbose=True,
    ) as crawler:
        result : CrawlResult = await crawler.arun(
            url="https://www.helloworld.org",
            config=crawler_config,
        )
        print(result.js_execution_result)
 async def main():
    browser_config = BrowserConfig(headless=True, verbose=True)
@@ -16,18 +36,15 @@ async def main():
        crawler_config = CrawlerRunConfig(
            cache_mode=CacheMode.BYPASS,
            markdown_generator=DefaultMarkdownGenerator(
-                # content_filter=PruningContentFilter(
+                content_filter=PruningContentFilter(
-                #     threshold=0.48, threshold_type="fixed", min_word_threshold=0
+                     threshold=0.48, threshold_type="fixed", min_word_threshold=0
-                # )
+                )
            ),
        )
        result : CrawlResult = await crawler.arun(
-            # url="https://www.helloworld.org", config=crawler_config
+            url="https://www.helloworld.org", config=crawler_config
            url="https://www.kidocode.com", config=crawler_config
        )
        print(result.markdown.raw_markdown[:500])
        # print(result.model_dump())
 if __name__ == "__main__":
    asyncio.run(main())
--- a/tests/browser/test_browser_manager.py
+++ b/tests/browser/test_browser_manager.py
@@ -0,0 +1,190 @@
 """Test examples for BrowserManager.
 These examples demonstrate the functionality of BrowserManager
 and serve as functional tests.
 """
 import asyncio
 import os
 import sys
 from typing import List
 # Add the project root to Python path if running directly
 if __name__ == "__main__":
    sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))
 from crawl4ai.browser import BrowserManager
 from crawl4ai.async_configs import BrowserConfig, CrawlerRunConfig
 from crawl4ai.async_logger import AsyncLogger
 # Create a logger for clear terminal output
 logger = AsyncLogger(verbose=True, log_file=None)
 async def test_basic_browser_manager():
    """Test basic BrowserManager functionality with default configuration."""
    logger.info("Starting test_basic_browser_manager", tag="TEST")
    try:
        # Create a browser manager with default config
        manager = BrowserManager(logger=logger)
        # Start the browser
        await manager.start()
        logger.info("Browser started successfully", tag="TEST")
        # Get a page
        crawler_config = CrawlerRunConfig(url="https://example.com")
        page, context = await manager.get_page(crawler_config)
        logger.info("Page created successfully", tag="TEST")
        # Navigate to a website
        await page.goto("https://example.com")
        title = await page.title()
        logger.info(f"Page title: {title}", tag="TEST")
        # Clean up
        await manager.close()
        logger.success("test_basic_browser_manager completed successfully", tag="TEST")
        return True
    except Exception as e:
        logger.error(f"test_basic_browser_manager failed: {str(e)}", tag="TEST")
        return False
 async def test_custom_browser_config():
    """Test BrowserManager with custom browser configuration."""
    logger.info("Starting test_custom_browser_config", tag="TEST")
    try:
        # Create a custom browser config
        browser_config = BrowserConfig(
            browser_type="chromium",
            headless=True,
            viewport_width=1280,
            viewport_height=800,
            light_mode=True
        )
        # Create browser manager with the config
        manager = BrowserManager(browser_config=browser_config, logger=logger)
        # Start the browser
        await manager.start()
        logger.info("Browser started successfully with custom config", tag="TEST")
        # Get a page
        crawler_config = CrawlerRunConfig(url="https://example.com")
        page, context = await manager.get_page(crawler_config)
        # Navigate to a website
        await page.goto("https://example.com")
        title = await page.title()
        logger.info(f"Page title: {title}", tag="TEST")
        # Verify viewport size
        viewport_size = await page.evaluate("() => ({ width: window.innerWidth, height: window.innerHeight })")
        logger.info(f"Viewport size: {viewport_size}", tag="TEST")
        # Clean up
        await manager.close()
        logger.success("test_custom_browser_config completed successfully", tag="TEST")
        return True
    except Exception as e:
        logger.error(f"test_custom_browser_config failed: {str(e)}", tag="TEST")
        return False
 async def test_multiple_pages():
    """Test BrowserManager with multiple pages."""
    logger.info("Starting test_multiple_pages", tag="TEST")
    try:
        # Create browser manager
        manager = BrowserManager(logger=logger)
        # Start the browser
        await manager.start()
        logger.info("Browser started successfully", tag="TEST")
        # Create multiple pages
        pages = []
        urls = ["https://example.com", "https://example.org", "https://mozilla.org"]
        for i, url in enumerate(urls):
            crawler_config = CrawlerRunConfig(url=url)
            page, context = await manager.get_page(crawler_config)
            await page.goto(url)
            pages.append((page, url))
            logger.info(f"Created page {i+1} for {url}", tag="TEST")
        # Verify all pages are loaded correctly
        for i, (page, url) in enumerate(pages):
            title = await page.title()
            logger.info(f"Page {i+1} title: {title}", tag="TEST")
        # Clean up
        await manager.close()
        logger.success("test_multiple_pages completed successfully", tag="TEST")
        return True
    except Exception as e:
        logger.error(f"test_multiple_pages failed: {str(e)}", tag="TEST")
        return False
 async def test_session_management():
    """Test session management in BrowserManager."""
    logger.info("Starting test_session_management", tag="TEST")
    try:
        # Create browser manager
        manager = BrowserManager(logger=logger)
        # Start the browser
        await manager.start()
        logger.info("Browser started successfully", tag="TEST")
        # Create a session
        session_id = "test_session_1"
        crawler_config = CrawlerRunConfig(url="https://example.com", session_id=session_id)
        page1, context1 = await manager.get_page(crawler_config)
        await page1.goto("https://example.com")
        logger.info(f"Created session with ID: {session_id}", tag="TEST")
        # Get the same session again
        page2, context2 = await manager.get_page(crawler_config)
        # Verify it's the same page/context
        is_same_page = page1 == page2
        is_same_context = context1 == context2
        logger.info(f"Same page: {is_same_page}, Same context: {is_same_context}", tag="TEST")
        # Kill the session
        await manager.kill_session(session_id)
        logger.info(f"Killed session with ID: {session_id}", tag="TEST")
        # Clean up
        await manager.close()
        logger.success("test_session_management completed successfully", tag="TEST")
        return True
    except Exception as e:
        logger.error(f"test_session_management failed: {str(e)}", tag="TEST")
        return False
 async def run_tests():
    """Run all tests sequentially."""
    results = []
    # results.append(await test_basic_browser_manager())
    # results.append(await test_custom_browser_config())
    # results.append(await test_multiple_pages())
    results.append(await test_session_management())
    # Print summary
    total = len(results)
    passed = sum(results)
    logger.info(f"Tests complete: {passed}/{total} passed", tag="SUMMARY")
    if passed == total:
        logger.success("All tests passed!", tag="SUMMARY")
    else:
        logger.error(f"{total - passed} tests failed", tag="SUMMARY")
 if __name__ == "__main__":
    asyncio.run(run_tests())
--- a/tests/browser/test_builtin_strategy.py
+++ b/tests/browser/test_builtin_strategy.py
@@ -0,0 +1,160 @@
 """Test examples for BuiltinBrowserStrategy.
 These examples demonstrate the functionality of BuiltinBrowserStrategy
 and serve as functional tests.
 """
 import asyncio
 import os
 import sys
 # Add the project root to Python path if running directly
 if __name__ == "__main__":
    sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))
 from crawl4ai.browser import BrowserManager
 from crawl4ai.async_configs import BrowserConfig, CrawlerRunConfig
 from crawl4ai.async_logger import AsyncLogger
 # Create a logger for clear terminal output
 logger = AsyncLogger(verbose=True, log_file=None)
 async def test_builtin_browser():
    """Test using a builtin browser that persists between sessions."""
    logger.info("Testing builtin browser", tag="TEST")
    browser_config = BrowserConfig(
        browser_mode="builtin",
        headless=True
    )
    manager = BrowserManager(browser_config=browser_config, logger=logger)
    try:
        # Start should connect to existing builtin browser or create one
        await manager.start()
        logger.info("Connected to builtin browser", tag="TEST")
        # Test page creation
        crawler_config = CrawlerRunConfig()
        page, context = await manager.get_page(crawler_config)
        # Test navigation
        await page.goto("https://example.com")
        title = await page.title()
        logger.info(f"Page title: {title}", tag="TEST")
        # Close manager (should not close the builtin browser)
        await manager.close()
        logger.info("First session closed", tag="TEST")
        # Create a second manager to verify browser persistence
        logger.info("Creating second session to verify persistence", tag="TEST")
        manager2 = BrowserManager(browser_config=browser_config, logger=logger)
        await manager2.start()
        logger.info("Connected to existing builtin browser", tag="TEST")
        page2, context2 = await manager2.get_page(crawler_config)
        await page2.goto("https://example.org")
        title2 = await page2.title()
        logger.info(f"Second session page title: {title2}", tag="TEST")
        await manager2.close()
        logger.info("Second session closed successfully", tag="TEST")
        return True
    except Exception as e:
        logger.error(f"Test failed: {str(e)}", tag="TEST")
        try:
            await manager.close()
        except:
            pass
        return False
 async def test_builtin_browser_status():
    """Test getting status of the builtin browser."""
    logger.info("Testing builtin browser status", tag="TEST")
    from crawl4ai.browser.strategies import BuiltinBrowserStrategy
    browser_config = BrowserConfig(
        browser_mode="builtin",
        headless=True
    )
    # Create strategy directly to access its status methods
    strategy = BuiltinBrowserStrategy(browser_config, logger)
    try:
        # Get status before starting (should be not running)
        status_before = await strategy.get_builtin_browser_status()
        logger.info(f"Initial status: {status_before}", tag="TEST")
        # Start the browser
        await strategy.start()
        logger.info("Browser started successfully", tag="TEST")
        # Get status after starting
        status_after = await strategy.get_builtin_browser_status()
        logger.info(f"Status after start: {status_after}", tag="TEST")
        # Create a page to verify functionality
        crawler_config = CrawlerRunConfig()
        page, context = await strategy.get_page(crawler_config)
        await page.goto("https://example.com")
        title = await page.title()
        logger.info(f"Page title: {title}", tag="TEST")
        # Close strategy (should not kill the builtin browser)
        await strategy.close()
        logger.info("Strategy closed successfully", tag="TEST")
        # Create a new strategy object
        strategy2 = BuiltinBrowserStrategy(browser_config, logger)
        # Get status again (should still be running)
        status_final = await strategy2.get_builtin_browser_status()
        logger.info(f"Final status: {status_final}", tag="TEST")
        # Verify that the status shows the browser is running
        is_running = status_final.get('running', False)
        logger.info(f"Builtin browser persistence confirmed: {is_running}", tag="TEST")
        # Kill the builtin browser to clean up
        logger.info("Killing builtin browser", tag="TEST")
        success = await strategy2.kill_builtin_browser()
        logger.info(f"Killed builtin browser successfully: {success}", tag="TEST")
        return is_running and success
    except Exception as e:
        logger.error(f"Test failed: {str(e)}", tag="TEST")
        try:
            await strategy.close()
            # Try to kill the builtin browser to clean up
            strategy2 = BuiltinBrowserStrategy(browser_config, logger)
            await strategy2.kill_builtin_browser()
        except:
            pass
        return False
 async def run_tests():
    """Run all tests sequentially."""
    results = []
    results.append(await test_builtin_browser())
    results.append(await test_builtin_browser_status())
    # Print summary
    total = len(results)
    passed = sum(results)
    logger.info(f"Tests complete: {passed}/{total} passed", tag="SUMMARY")
    if passed == total:
        logger.success("All tests passed!", tag="SUMMARY")
    else:
        logger.error(f"{total - passed} tests failed", tag="SUMMARY")
 if __name__ == "__main__":
    asyncio.run(run_tests())
--- a/tests/browser/test_cdp_strategy.py
+++ b/tests/browser/test_cdp_strategy.py
@@ -0,0 +1,227 @@
 """Test examples for CDPBrowserStrategy.
 These examples demonstrate the functionality of CDPBrowserStrategy
 and serve as functional tests.
 """
 import asyncio
 import os
 import sys
 # Add the project root to Python path if running directly
 if __name__ == "__main__":
    sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))
 from crawl4ai.browser import BrowserManager
 from crawl4ai.async_configs import BrowserConfig, CrawlerRunConfig
 from crawl4ai.async_logger import AsyncLogger
 # Create a logger for clear terminal output
 logger = AsyncLogger(verbose=True, log_file=None)
 async def test_cdp_launch_connect():
    """Test launching a browser and connecting via CDP."""
    logger.info("Testing launch and connect via CDP", tag="TEST")
    browser_config = BrowserConfig(
        use_managed_browser=True,
        headless=True
    )
    manager = BrowserManager(browser_config=browser_config, logger=logger)
    try:
        await manager.start()
        logger.info("Browser launched and connected via CDP", tag="TEST")
        # Test with multiple pages
        pages = []
        for i in range(3):
            crawler_config = CrawlerRunConfig()
            page, context = await manager.get_page(crawler_config)
            await page.goto(f"https://example.com?test={i}")
            pages.append(page)
            logger.info(f"Created page {i+1}", tag="TEST")
        # Verify all pages are working
        for i, page in enumerate(pages):
            title = await page.title()
            logger.info(f"Page {i+1} title: {title}", tag="TEST")
        await manager.close()
        logger.info("Browser closed successfully", tag="TEST")
        return True
    except Exception as e:
        logger.error(f"Test failed: {str(e)}", tag="TEST")
        try:
            await manager.close()
        except:
            pass
        return False
 async def test_cdp_with_user_data_dir():
    """Test CDP browser with a user data directory."""
    logger.info("Testing CDP browser with user data directory", tag="TEST")
    # Create a temporary user data directory
    import tempfile
    user_data_dir = tempfile.mkdtemp(prefix="crawl4ai-test-")
    logger.info(f"Created temporary user data directory: {user_data_dir}", tag="TEST")
    browser_config = BrowserConfig(
        use_managed_browser=True,
        headless=True,
        user_data_dir=user_data_dir
    )
    manager = BrowserManager(browser_config=browser_config, logger=logger)
    try:
        await manager.start()
        logger.info("Browser launched with user data directory", tag="TEST")
        # Navigate to a page and store some data
        crawler_config = CrawlerRunConfig()
        page, context = await manager.get_page(crawler_config)
        # Set a cookie
        await context.add_cookies([{
            "name": "test_cookie",
            "value": "test_value",
            "url": "https://example.com"
        }])
        # Visit the site
        await page.goto("https://example.com")
        # Verify cookie was set
        cookies = await context.cookies(["https://example.com"])
        has_test_cookie = any(cookie["name"] == "test_cookie" for cookie in cookies)
        logger.info(f"Cookie set successfully: {has_test_cookie}", tag="TEST")
        # Close the browser
        await manager.close()
        logger.info("First browser session closed", tag="TEST")
        # Start a new browser with the same user data directory
        logger.info("Starting second browser session with same user data directory", tag="TEST")
        manager2 = BrowserManager(browser_config=browser_config, logger=logger)
        await manager2.start()
        # Get a new page and check if the cookie persists
        page2, context2 = await manager2.get_page(crawler_config)
        await page2.goto("https://example.com")
        # Verify cookie persisted
        cookies2 = await context2.cookies(["https://example.com"])
        has_test_cookie2 = any(cookie["name"] == "test_cookie" for cookie in cookies2)
        logger.info(f"Cookie persisted across sessions: {has_test_cookie2}", tag="TEST")
        # Clean up
        await manager2.close()
        # Remove temporary directory
        import shutil
        shutil.rmtree(user_data_dir, ignore_errors=True)
        logger.info(f"Removed temporary user data directory", tag="TEST")
        return has_test_cookie and has_test_cookie2
    except Exception as e:
        logger.error(f"Test failed: {str(e)}", tag="TEST")
        try:
            await manager.close()
        except:
            pass
        # Clean up temporary directory
        try:
            import shutil
            shutil.rmtree(user_data_dir, ignore_errors=True)
        except:
            pass
        return False
 async def test_cdp_session_management():
    """Test session management with CDP browser."""
    logger.info("Testing session management with CDP browser", tag="TEST")
    browser_config = BrowserConfig(
        use_managed_browser=True,
        headless=True
    )
    manager = BrowserManager(browser_config=browser_config, logger=logger)
    try:
        await manager.start()
        logger.info("Browser launched successfully", tag="TEST")
        # Create two sessions
        session1_id = "test_session_1"
        session2_id = "test_session_2"
        # Set up first session
        crawler_config1 = CrawlerRunConfig(session_id=session1_id)
        page1, context1 = await manager.get_page(crawler_config1)
        await page1.goto("https://example.com")
        await page1.evaluate("localStorage.setItem('session1_data', 'test_value')")
        logger.info(f"Set up session 1 with ID: {session1_id}", tag="TEST")
        # Set up second session
        crawler_config2 = CrawlerRunConfig(session_id=session2_id)
        page2, context2 = await manager.get_page(crawler_config2)
        await page2.goto("https://example.org")
        await page2.evaluate("localStorage.setItem('session2_data', 'test_value2')")
        logger.info(f"Set up session 2 with ID: {session2_id}", tag="TEST")
        # Get first session again
        page1_again, _ = await manager.get_page(crawler_config1)
        # Verify it's the same page and data persists
        is_same_page = page1 == page1_again
        data1 = await page1_again.evaluate("localStorage.getItem('session1_data')")
        logger.info(f"Session 1 reuse successful: {is_same_page}, data: {data1}", tag="TEST")
        # Kill first session
        await manager.kill_session(session1_id)
        logger.info(f"Killed session 1", tag="TEST")
        # Verify second session still works
        data2 = await page2.evaluate("localStorage.getItem('session2_data')")
        logger.info(f"Session 2 still functional after killing session 1, data: {data2}", tag="TEST")
        # Clean up
        await manager.close()
        logger.info("Browser closed successfully", tag="TEST")
        return is_same_page and data1 == "test_value" and data2 == "test_value2"
    except Exception as e:
        logger.error(f"Test failed: {str(e)}", tag="TEST")
        try:
            await manager.close()
        except:
            pass
        return False
 async def run_tests():
    """Run all tests sequentially."""
    results = []
    results.append(await test_cdp_launch_connect())
    results.append(await test_cdp_with_user_data_dir())
    results.append(await test_cdp_session_management())
    # Print summary
    total = len(results)
    passed = sum(results)
    logger.info(f"Tests complete: {passed}/{total} passed", tag="SUMMARY")
    if passed == total:
        logger.success("All tests passed!", tag="SUMMARY")
    else:
        logger.error(f"{total - passed} tests failed", tag="SUMMARY")
 if __name__ == "__main__":
    asyncio.run(run_tests())
--- a/tests/browser/test_combined.py
+++ b/tests/browser/test_combined.py
@@ -0,0 +1,77 @@
 """Combined test runner for all browser module tests.
 This script runs all the browser module tests in sequence and
 provides a comprehensive summary.
 """
 import asyncio
 import os
 import sys
 import time
 # Add the project root to Python path if running directly
 if __name__ == "__main__":
    sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))
 from crawl4ai.async_logger import AsyncLogger
 # Create a logger for clear terminal output
 logger = AsyncLogger(verbose=True, log_file=None)
 async def run_test_module(module_name, header):
    """Run all tests in a module and return results."""
    logger.info(f"\n{'-'*30}", tag="TEST")
    logger.info(f"RUNNING: {header}", tag="TEST")
    logger.info(f"{'-'*30}", tag="TEST")
    # Import the module dynamically
    module = __import__(f"tests.browser.{module_name}", fromlist=["run_tests"])
    # Track time for performance measurement
    start_time = time.time()
    # Run the tests
    await module.run_tests()
    # Calculate time taken
    time_taken = time.time() - start_time
    logger.info(f"Time taken: {time_taken:.2f} seconds", tag="TIMING")
    return time_taken
 async def main():
    """Run all test modules."""
    logger.info("STARTING COMPREHENSIVE BROWSER MODULE TESTS", tag="MAIN")
    # List of test modules to run
    test_modules = [
        ("test_browser_manager", "Browser Manager Tests"),
        ("test_playwright_strategy", "Playwright Strategy Tests"),
        ("test_cdp_strategy", "CDP Strategy Tests"),
        ("test_builtin_strategy", "Builtin Browser Strategy Tests"),
        ("test_profiles", "Profile Management Tests")
    ]
    # Run each test module
    timings = {}
    for module_name, header in test_modules:
        try:
            time_taken = await run_test_module(module_name, header)
            timings[module_name] = time_taken
        except Exception as e:
            logger.error(f"Error running {module_name}: {str(e)}", tag="ERROR")
    # Print summary
    logger.info("\n\nTEST SUMMARY:", tag="SUMMARY")
    logger.info(f"{'-'*50}", tag="SUMMARY")
    for module_name, header in test_modules:
        if module_name in timings:
            logger.info(f"{header}: {timings[module_name]:.2f} seconds", tag="SUMMARY")
        else:
            logger.error(f"{header}: FAILED TO RUN", tag="SUMMARY")
    logger.info(f"{'-'*50}", tag="SUMMARY")
    total_time = sum(timings.values())
    logger.info(f"Total time: {total_time:.2f} seconds", tag="SUMMARY")
 if __name__ == "__main__":
    asyncio.run(main())
--- a/tests/browser/test_playwright_strategy.py
+++ b/tests/browser/test_playwright_strategy.py
@@ -0,0 +1,275 @@
 """Test examples for PlaywrightBrowserStrategy.
 These examples demonstrate the functionality of PlaywrightBrowserStrategy
 and serve as functional tests.
 """
 import asyncio
 import os
 import sys
 # Add the project root to Python path if running directly
 if __name__ == "__main__":
    sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))
 from crawl4ai.browser import BrowserManager
 from crawl4ai.async_configs import BrowserConfig, CrawlerRunConfig
 from crawl4ai.async_logger import AsyncLogger
 # Create a logger for clear terminal output
 logger = AsyncLogger(verbose=True, log_file=None)
 async def test_playwright_basic():
    """Test basic Playwright browser functionality."""
    logger.info("Testing standard Playwright browser", tag="TEST")
    # Create browser config for standard Playwright
    browser_config = BrowserConfig(
        headless=True,
        viewport_width=1280,
        viewport_height=800
    )
    # Create browser manager with the config
    manager = BrowserManager(browser_config=browser_config, logger=logger)
    try:
        # Start the browser
        await manager.start()
        logger.info("Browser started successfully", tag="TEST")
        # Create crawler config
        crawler_config = CrawlerRunConfig(url="https://example.com")
        # Get a page
        page, context = await manager.get_page(crawler_config)
        logger.info("Got page successfully", tag="TEST")
        # Navigate to a website
        await page.goto("https://example.com")
        logger.info("Navigated to example.com", tag="TEST")
        # Get page title
        title = await page.title()
        logger.info(f"Page title: {title}", tag="TEST")
        # Clean up
        await manager.close()
        logger.info("Browser closed successfully", tag="TEST")
        return True
    except Exception as e:
        logger.error(f"Test failed: {str(e)}", tag="TEST")
        # Ensure cleanup
        try:
            await manager.close()
        except:
            pass
        return False
 async def test_playwright_text_mode():
    """Test Playwright browser in text-only mode."""
    logger.info("Testing Playwright text mode", tag="TEST")
    # Create browser config with text mode enabled
    browser_config = BrowserConfig(
        headless=True,
        text_mode=True  # Enable text-only mode
    )
    # Create browser manager with the config
    manager = BrowserManager(browser_config=browser_config, logger=logger)
    try:
        # Start the browser
        await manager.start()
        logger.info("Browser started successfully in text mode", tag="TEST")
        # Get a page
        crawler_config = CrawlerRunConfig(url="https://example.com")
        page, context = await manager.get_page(crawler_config)
        # Navigate to a website
        await page.goto("https://example.com")
        logger.info("Navigated to example.com", tag="TEST")
        # Get page title
        title = await page.title()
        logger.info(f"Page title: {title}", tag="TEST")
        # Check if images are blocked in text mode
        # We'll check if any image requests were made
        has_images = False
        async with page.expect_request("**/*.{png,jpg,jpeg,gif,webp,svg}", timeout=1000) as request_info:
            try:
                # Try to load a page with images
                await page.goto("https://picsum.photos/", wait_until="domcontentloaded")
                request = await request_info.value
                has_images = True
            except:
                # Timeout without image requests means text mode is working
                has_images = False
        logger.info(f"Text mode image blocking working: {not has_images}", tag="TEST")
        # Clean up
        await manager.close()
        logger.info("Browser closed successfully", tag="TEST")
        return True
    except Exception as e:
        logger.error(f"Test failed: {str(e)}", tag="TEST")
        # Ensure cleanup
        try:
            await manager.close()
        except:
            pass
        return False
 async def test_playwright_context_reuse():
    """Test context caching and reuse with identical configurations."""
    logger.info("Testing context reuse with identical configurations", tag="TEST")
    # Create browser config
    browser_config = BrowserConfig(headless=True)
    # Create browser manager
    manager = BrowserManager(browser_config=browser_config, logger=logger)
    try:
        # Start the browser
        await manager.start()
        logger.info("Browser started successfully", tag="TEST")
        # Create identical crawler configs
        crawler_config1 = CrawlerRunConfig(
            url="https://example.com",
            viewport_width=1280,
            viewport_height=800
        )
        crawler_config2 = CrawlerRunConfig(
            url="https://example.org",  # Different URL but same browser parameters
            viewport_width=1280,
            viewport_height=800
        )
        # Get pages with these configs
        page1, context1 = await manager.get_page(crawler_config1)
        page2, context2 = await manager.get_page(crawler_config2)
        # Check if contexts are reused
        is_same_context = context1 == context2
        logger.info(f"Contexts reused: {is_same_context}", tag="TEST")
        # Now try with a different config
        crawler_config3 = CrawlerRunConfig(
            url="https://example.net",
            viewport_width=800,  # Different viewport size
            viewport_height=600
        )
        page3, context3 = await manager.get_page(crawler_config3)
        # This should be a different context
        is_different_context = context1 != context3
        logger.info(f"Different contexts for different configs: {is_different_context}", tag="TEST")
        # Clean up
        await manager.close()
        logger.info("Browser closed successfully", tag="TEST")
        # Both tests should pass for success
        return is_same_context and is_different_context
    except Exception as e:
        logger.error(f"Test failed: {str(e)}", tag="TEST")
        # Ensure cleanup
        try:
            await manager.close()
        except:
            pass
        return False
 async def test_playwright_session_management():
    """Test session management with Playwright browser."""
    logger.info("Testing session management with Playwright browser", tag="TEST")
    browser_config = BrowserConfig(
        headless=True
    )
    manager = BrowserManager(browser_config=browser_config, logger=logger)
    try:
        await manager.start()
        logger.info("Browser launched successfully", tag="TEST")
        # Create two sessions
        session1_id = "playwright_session_1"
        session2_id = "playwright_session_2"
        # Set up first session
        crawler_config1 = CrawlerRunConfig(session_id=session1_id, url="https://example.com")
        page1, context1 = await manager.get_page(crawler_config1)
        await page1.goto("https://example.com")
        await page1.evaluate("localStorage.setItem('playwright_session1_data', 'test_value1')")
        logger.info(f"Set up session 1 with ID: {session1_id}", tag="TEST")
        # Set up second session
        crawler_config2 = CrawlerRunConfig(session_id=session2_id, url="https://example.org")
        page2, context2 = await manager.get_page(crawler_config2)
        await page2.goto("https://example.org")
        await page2.evaluate("localStorage.setItem('playwright_session2_data', 'test_value2')")
        logger.info(f"Set up session 2 with ID: {session2_id}", tag="TEST")
        # Get first session again
        page1_again, context1_again = await manager.get_page(crawler_config1)
        # Verify it's the same page and data persists
        is_same_page = page1 == page1_again
        is_same_context = context1 == context1_again
        data1 = await page1_again.evaluate("localStorage.getItem('playwright_session1_data')")
        logger.info(f"Session 1 reuse successful: {is_same_page}, data: {data1}", tag="TEST")
        # Kill first session
        await manager.kill_session(session1_id)
        logger.info(f"Killed session 1", tag="TEST")
        # Verify second session still works
        data2 = await page2.evaluate("localStorage.getItem('playwright_session2_data')")
        logger.info(f"Session 2 still functional after killing session 1, data: {data2}", tag="TEST")
        # Clean up
        await manager.close()
        logger.info("Browser closed successfully", tag="TEST")
        return is_same_page and is_same_context and data1 == "test_value1" and data2 == "test_value2"
    except Exception as e:
        logger.error(f"Test failed: {str(e)}", tag="TEST")
        try:
            await manager.close()
        except:
            pass
        return False
 async def run_tests():
    """Run all tests sequentially."""
    results = []
    results.append(await test_playwright_basic())
    results.append(await test_playwright_text_mode())
    results.append(await test_playwright_context_reuse())
    results.append(await test_playwright_session_management())
    # Print summary
    total = len(results)
    passed = sum(results)
    logger.info(f"Tests complete: {passed}/{total} passed", tag="SUMMARY")
    if passed == total:
        logger.success("All tests passed!", tag="SUMMARY")
    else:
        logger.error(f"{total - passed} tests failed", tag="SUMMARY")
 if __name__ == "__main__":
    asyncio.run(run_tests())
--- a/tests/browser/test_profiles.py
+++ b/tests/browser/test_profiles.py
@@ -0,0 +1,176 @@
 """Test examples for BrowserProfileManager.
 These examples demonstrate the functionality of BrowserProfileManager
 and serve as functional tests.
 """
 import asyncio
 import os
 import sys
 import uuid
 import shutil
 # Add the project root to Python path if running directly
 if __name__ == "__main__":
    sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))
 from crawl4ai.browser import BrowserManager, BrowserProfileManager
 from crawl4ai.async_configs import BrowserConfig, CrawlerRunConfig
 from crawl4ai.async_logger import AsyncLogger
 # Create a logger for clear terminal output
 logger = AsyncLogger(verbose=True, log_file=None)
 async def test_profile_creation():
    """Test creating and managing browser profiles."""
    logger.info("Testing profile creation and management", tag="TEST")
    profile_manager = BrowserProfileManager(logger=logger)
    try:
        # List existing profiles
        profiles = profile_manager.list_profiles()
        logger.info(f"Found {len(profiles)} existing profiles", tag="TEST")
        # Generate a unique profile name for testing
        test_profile_name = f"test-profile-{uuid.uuid4().hex[:8]}"
        # Create a test profile directory
        profile_path = os.path.join(profile_manager.profiles_dir, test_profile_name)
        os.makedirs(os.path.join(profile_path, "Default"), exist_ok=True)
        # Create a dummy Preferences file to simulate a Chrome profile
        with open(os.path.join(profile_path, "Default", "Preferences"), "w") as f:
            f.write("{\"test\": true}")
        logger.info(f"Created test profile at: {profile_path}", tag="TEST")
        # Verify the profile is now in the list
        profiles = profile_manager.list_profiles()
        profile_found = any(p["name"] == test_profile_name for p in profiles)
        logger.info(f"Profile found in list: {profile_found}", tag="TEST")
        # Try to get the profile path
        retrieved_path = profile_manager.get_profile_path(test_profile_name)
        path_match = retrieved_path == profile_path
        logger.info(f"Retrieved correct profile path: {path_match}", tag="TEST")
        # Delete the profile
        success = profile_manager.delete_profile(test_profile_name)
        logger.info(f"Profile deletion successful: {success}", tag="TEST")
        # Verify it's gone
        profiles_after = profile_manager.list_profiles()
        profile_removed = not any(p["name"] == test_profile_name for p in profiles_after)
        logger.info(f"Profile removed from list: {profile_removed}", tag="TEST")
        # Clean up just in case
        if os.path.exists(profile_path):
            shutil.rmtree(profile_path, ignore_errors=True)
        return profile_found and path_match and success and profile_removed
    except Exception as e:
        logger.error(f"Test failed: {str(e)}", tag="TEST")
        # Clean up test directory
        try:
            if os.path.exists(profile_path):
                shutil.rmtree(profile_path, ignore_errors=True)
        except:
            pass
        return False
 async def test_profile_with_browser():
    """Test using a profile with a browser."""
    logger.info("Testing using a profile with a browser", tag="TEST")
    profile_manager = BrowserProfileManager(logger=logger)
    test_profile_name = f"test-browser-profile-{uuid.uuid4().hex[:8]}"
    profile_path = None
    try:
        # Create a test profile directory
        profile_path = os.path.join(profile_manager.profiles_dir, test_profile_name)
        os.makedirs(os.path.join(profile_path, "Default"), exist_ok=True)
        # Create a dummy Preferences file to simulate a Chrome profile
        with open(os.path.join(profile_path, "Default", "Preferences"), "w") as f:
            f.write("{\"test\": true}")
        logger.info(f"Created test profile at: {profile_path}", tag="TEST")
        # Now use this profile with a browser
        browser_config = BrowserConfig(
            user_data_dir=profile_path,
            headless=True
        )
        manager = BrowserManager(browser_config=browser_config, logger=logger)
        # Start the browser with the profile
        await manager.start()
        logger.info("Browser started with profile", tag="TEST")
        # Create a page
        crawler_config = CrawlerRunConfig()
        page, context = await manager.get_page(crawler_config)
        # Navigate and set some data to verify profile works
        await page.goto("https://example.com")
        await page.evaluate("localStorage.setItem('test_data', 'profile_value')")
        # Close browser
        await manager.close()
        logger.info("First browser session closed", tag="TEST")
        # Create a new browser with the same profile
        manager2 = BrowserManager(browser_config=browser_config, logger=logger)
        await manager2.start()
        logger.info("Second browser session started with same profile", tag="TEST")
        # Get a page and check if the data persists
        page2, context2 = await manager2.get_page(crawler_config)
        await page2.goto("https://example.com")
        data = await page2.evaluate("localStorage.getItem('test_data')")
        # Verify data persisted
        data_persisted = data == "profile_value"
        logger.info(f"Data persisted across sessions: {data_persisted}", tag="TEST")
        # Clean up
        await manager2.close()
        logger.info("Second browser session closed", tag="TEST")
        # Delete the test profile
        success = profile_manager.delete_profile(test_profile_name)
        logger.info(f"Test profile deleted: {success}", tag="TEST")
        return data_persisted and success
    except Exception as e:
        logger.error(f"Test failed: {str(e)}", tag="TEST")
        # Clean up
        try:
            if profile_path and os.path.exists(profile_path):
                shutil.rmtree(profile_path, ignore_errors=True)
        except:
            pass
        return False
 async def run_tests():
    """Run all tests sequentially."""
    results = []
    results.append(await test_profile_creation())
    results.append(await test_profile_with_browser())
    # Print summary
    total = len(results)
    passed = sum(results)
    logger.info(f"Tests complete: {passed}/{total} passed", tag="SUMMARY")
    if passed == total:
        logger.success("All tests passed!", tag="SUMMARY")
    else:
        logger.error(f"{total - passed} tests failed", tag="SUMMARY")
 if __name__ == "__main__":
    asyncio.run(run_tests())