refactor(browser): improve parallel crawling and browser management

Remove PagePoolConfig in favor of direct page management in browser strategies. Add get_pages() method for efficient parallel page creation. Improve storage state handling and persistence. Add comprehensive parallel crawling tests and performance analysis. BREAKING CHANGE: Removed PagePoolConfig class and related functionality.
2025-03-23 18:53:24 +08:00
parent 4ab0893ffb
commit 0094cac675
7 changed files with 2115 additions and 351 deletions
--- a/crawl4ai/async_configs.py
+++ b/crawl4ai/async_configs.py
@@ -156,41 +156,6 @@ def is_empty_value(value: Any) -> bool:
    return False


-class PagePoolConfig:
-    """Configuration for browser page pooling.
-    
-    This class configures the page pooling mechanism that maintains pre-warmed
-    browser pages ready for immediate use, improving performance for scenarios
-    where multiple URLs need to be processed in sequence.
-    
-    Attributes:
-        mode (str): Pooling mode - "static" or "adaptive".
-                    "static" uses a fixed pool size defined by static_size.
-                    "adaptive" calculates optimal size based on available system memory.
-                    Default: "static".
-        static_size (int): Number of pages to maintain in the pool when mode is "static".
-                           Default: 10.
-        memory_per_page (int): Estimated memory used by a single page in MB.
-                               Used for "adaptive" mode calculations.
-                               Default: 200.
-        memory_threshold (float): Maximum percentage of system memory to use in "adaptive" mode.
-                                  Default: 0.7 (70% of available memory).
-        timeout (float): Seconds to wait for a page from the pool before creating a new one.
-                         Default: 5.0.
-    """
-    
-    def __init__(self, 
-                 mode="static",
-                 static_size=10,
-                 memory_per_page=200,
-                 memory_threshold=0.7,
-                 timeout=5.0):
-        self.mode = mode
-        self.static_size = static_size
-        self.memory_per_page = memory_per_page
-        self.memory_threshold = memory_threshold
-        self.timeout = timeout
-
 class BrowserConfig:
    """
    Configuration class for setting up a browser instance and its context in AsyncPlaywrightCrawlerStrategy.
@@ -235,7 +200,7 @@ class BrowserConfig:
                                 Default: False.
        downloads_path (str or None): Directory to store downloaded files. If None and accept_downloads is True,
                                      a default path will be created. Default: None.
-        storage_state (str or dict or None): Path or object describing storage state (cookies, localStorage).
+        storage_state (str or dict or None): An in-memory storage state (cookies, localStorage).
                                             Default: None.
        ignore_https_errors (bool): Ignore HTTPS certificate errors. Default: True.
        java_script_enabled (bool): Enable JavaScript execution in pages. Default: True.
@@ -255,9 +220,6 @@ class BrowserConfig:
        light_mode (bool): Disables certain background features for performance gains. Default: False.
        extra_args (list): Additional command-line arguments passed to the browser.
                           Default: [].
-        page_pool_config (PagePoolConfig or None): Configuration for page pooling mechanism.
-                                                  If None, page pooling is disabled.
-                                                  Default: None.
    """

    def __init__(
@@ -298,7 +260,6 @@ class BrowserConfig:
        extra_args: list = None,
        debugging_port: int = 9222,
        host: str = "localhost",
-        page_pool_config: Optional[PagePoolConfig] = None,
    ):
        self.browser_type = browser_type
        self.headless = headless
@@ -337,7 +298,6 @@ class BrowserConfig:
        self.verbose = verbose
        self.debugging_port = debugging_port
        self.host = host
-        self.page_pool_config = page_pool_config

        fa_user_agenr_generator = ValidUAGenerator()
        if self.user_agent_mode == "random":
@@ -368,12 +328,6 @@ class BrowserConfig:

    @staticmethod
    def from_kwargs(kwargs: dict) -> "BrowserConfig":
-        # Handle page_pool_config
-        page_pool_config = kwargs.get("page_pool_config")
-        if isinstance(page_pool_config, dict):
-            # If it's a dict, convert to PagePoolConfig
-            page_pool_config = PagePoolConfig(**page_pool_config)
-            
        return BrowserConfig(
            browser_type=kwargs.get("browser_type", "chromium"),
            headless=kwargs.get("headless", True),
@@ -407,7 +361,6 @@ class BrowserConfig:
            extra_args=kwargs.get("extra_args", []),
            debugging_port=kwargs.get("debugging_port", 9222),
            host=kwargs.get("host", "localhost"),
-            page_pool_config=page_pool_config,
        )

    def to_dict(self):
@@ -442,7 +395,6 @@ class BrowserConfig:
            "verbose": self.verbose,
            "debugging_port": self.debugging_port,
            "host": self.host,
-            "page_pool_config": self.page_pool_config,
        }

    def clone(self, **kwargs):
--- a/crawl4ai/browser/manager.py
+++ b/crawl4ai/browser/manager.py
@@ -2,11 +2,14 @@

 This module provides a central browser management class that uses the
 strategy pattern internally while maintaining the existing API.
+It also implements a page pooling mechanism for improved performance.
 """

 import asyncio
 import time
-from typing import Optional, Tuple, Dict, Any
+import os
+import psutil
+from typing import Optional, Tuple, Dict, Any, List, Set

 from playwright.async_api import Page, BrowserContext

@@ -117,6 +120,28 @@ class BrowserManager:
            self.sessions = self._strategy.sessions
        
        return page, context
+        
+    async def get_pages(self, crawlerRunConfig: CrawlerRunConfig, count: int = 1) -> List[Tuple[Page, BrowserContext]]:
+        """Get multiple pages with the same configuration.
+        
+        This method efficiently creates multiple browser pages using the same configuration,
+        which is useful for parallel crawling of multiple URLs.
+        
+        Args:
+            crawlerRunConfig: Configuration for the pages
+            count: Number of pages to create
+            
+        Returns:
+            List of (Page, Context) tuples
+        """
+        # Delegate to strategy
+        pages = await self._strategy.get_pages(crawlerRunConfig, count)
+        
+        # Sync sessions if needed
+        if hasattr(self._strategy, 'sessions'):
+            self.sessions = self._strategy.sessions
+            
+        return pages
    
    async def kill_session(self, session_id: str):
        """Kill a browser session and clean up resources.
--- a/crawl4ai/browser/strategies.py
+++ b/crawl4ai/browser/strategies.py
@@ -23,7 +23,7 @@ from ..async_configs import BrowserConfig, CrawlerRunConfig
 from ..config import DOWNLOAD_PAGE_TIMEOUT
 from ..js_snippet import load_js_script
 from ..utils import get_home_folder
-from .utils import get_playwright, get_browser_executable, get_browser_disable_options, create_temp_directory, is_windows
+from .utils import get_playwright, get_browser_executable, get_browser_disable_options, create_temp_directory, is_windows, is_browser_running

 from playwright_stealth import StealthConfig

@@ -85,6 +85,22 @@ class BaseBrowserStrategy(ABC):
        """
        pass
    
+    async def get_pages(self, crawlerRunConfig: CrawlerRunConfig, count: int = 1) -> List[Tuple[Page, BrowserContext]]:
+        """Get multiple pages with the same configuration.
+        
+        Args:
+            crawlerRunConfig: Configuration for the pages
+            count: Number of pages to create
+            
+        Returns:
+            List of (Page, Context) tuples
+        """
+        pages = []
+        for _ in range(count):
+            page, context = await self.get_page(crawlerRunConfig)
+            pages.append((page, context))
+        return pages
+    
    @abstractmethod
    async def close(self):
        """Close the browser and clean up resources."""
@@ -136,9 +152,6 @@ class BaseBrowserStrategy(ABC):
        if self.config.cookies:
            await context.add_cookies(self.config.cookies)

-        if self.config.storage_state:
-            await context.storage_state(path=None)
-
        if self.config.accept_downloads:
            context.set_default_timeout(DOWNLOAD_PAGE_TIMEOUT)
            context.set_default_navigation_timeout(DOWNLOAD_PAGE_TIMEOUT)
@@ -161,7 +174,7 @@ class BaseBrowserStrategy(ABC):
                {
                    "name": "cookiesEnabled",
                    "value": "true",
-                    "url": crawlerRunConfig.url if crawlerRunConfig else "https://crawl4ai.com/",
+                    "url": crawlerRunConfig and crawlerRunConfig.url or "https://crawl4ai.com/",
                }
            ]
        )
@@ -324,12 +337,31 @@ class PlaywrightBrowserStrategy(BaseBrowserStrategy):
            "viewport": viewport_settings,
            "proxy": proxy_settings,
            "accept_downloads": self.config.accept_downloads,
-            "storage_state": self.config.storage_state,
            "ignore_https_errors": self.config.ignore_https_errors,
            "device_scale_factor": 1.0,
            "java_script_enabled": self.config.java_script_enabled,
        }
        
+        # Handle storage state properly - this is key for persistence
+        if self.config.storage_state:
+            context_settings["storage_state"] = self.config.storage_state
+            if self.logger:
+                if isinstance(self.config.storage_state, str):
+                    self.logger.debug(f"Using storage state from file: {self.config.storage_state}", tag="BROWSER")
+                else:
+                    self.logger.debug("Using storage state from config object", tag="BROWSER")
+        
+        if self.config.user_data_dir:
+            context_settings["storage_state"] = os.path.join(
+                self.config.user_data_dir, "Default", "storage_state.json"
+            )
+            # Create the file if it doesn't exist
+            if not os.path.exists(context_settings["storage_state"]):
+                os.makedirs(os.path.dirname(context_settings["storage_state"]), exist_ok=True)
+                with open(context_settings["storage_state"], "w") as f:
+                    json.dump({}, f)
+
+        
        if crawlerRunConfig:
            # Check if there is value for crawlerRunConfig.proxy_config set add that to context
            if crawlerRunConfig.proxy_config:
@@ -428,6 +460,21 @@ class PlaywrightBrowserStrategy(BaseBrowserStrategy):
        if self.config.sleep_on_close:
            await asyncio.sleep(0.5)
            
+        # If we have a user_data_dir configured, ensure persistence of storage state
+        if self.config.user_data_dir and self.browser and self.default_context:
+            for context in self.browser.contexts:
+                try:
+                    await context.storage_state(path=os.path.join(self.config.user_data_dir, "Default", "storage_state.json"))
+                    if self.logger:
+                        self.logger.debug("Ensuring storage state is persisted before closing browser", tag="BROWSER")
+                except Exception as e:
+                    if self.logger:
+                        self.logger.warning(
+                            message="Failed to ensure storage persistence: {error}",
+                            tag="BROWSER", 
+                            params={"error": str(e)}
+                        )
+        
        # Close all sessions
        session_ids = list(self.sessions.keys())
        for session_id in session_ids:
@@ -582,7 +629,7 @@ class CDPBrowserStrategy(BaseBrowserStrategy):
        Returns:
            List of command-line arguments for the browser
        """
-        browser_path = get_browser_executable(self.config.browser_type)
+        browser_path = await get_browser_executable(self.config.browser_type)
        base_args = [browser_path]

        if self.config.browser_type == "chromium":
@@ -727,6 +774,22 @@ class CDPBrowserStrategy(BaseBrowserStrategy):
        if self.config.sleep_on_close:
            await asyncio.sleep(0.5)
        
+        # If we have a user_data_dir configured, ensure persistence of storage state
+        if self.config.user_data_dir and self.browser:
+            try:
+                # Create a brief sleep to allow the browser to flush any pending operations
+                # This helps ensure all storage state (localStorage, cookies, etc.) gets saved
+                await asyncio.sleep(0.3)
+                if self.logger:
+                    self.logger.debug("Ensuring storage state is persisted before closing CDP browser", tag="BROWSER")
+            except Exception as e:
+                if self.logger:
+                    self.logger.warning(
+                        message="Failed to ensure storage persistence: {error}",
+                        tag="BROWSER", 
+                        params={"error": str(e)}
+                    )
+        
        # Close all sessions
        session_ids = list(self.sessions.keys())
        for session_id in session_ids:
@@ -775,19 +838,46 @@ class BuiltinBrowserStrategy(CDPBrowserStrategy):
            logger: Logger for recording events and errors
        """
        super().__init__(config, logger)
-        self.builtin_browser_dir = os.path.join(get_home_folder(), "builtin-browser")
+        self.builtin_browser_dir = os.path.join(get_home_folder(), "builtin-browser") if not self.config.user_data_dir else self.config.user_data_dir
        self.builtin_config_file = os.path.join(self.builtin_browser_dir, "browser_config.json")
+
+        # Raise error if user data dir is already engaged
+        if self._check_user_dir_is_engaged(self.builtin_browser_dir):
+            raise Exception(f"User data directory {self.builtin_browser_dir} is already engaged by another browser instance.")
+
        os.makedirs(self.builtin_browser_dir, exist_ok=True)
    
+    def _check_user_dir_is_engaged(self, user_data_dir: str) -> bool:
+        """Check if the user data directory is already in use.
+        
+        Returns:
+            bool: True if the directory is engaged, False otherwise
+        """
+        # Load browser config file, then iterate in port_map values, check "user_data_dir" key if it matches
+        # the current user data directory
+        if os.path.exists(self.builtin_config_file):
+            try:
+                with open(self.builtin_config_file, 'r') as f:
+                    browser_info_dict = json.load(f)
+                
+                # Check if user data dir is already engaged
+                for port_str, browser_info in browser_info_dict.get("port_map", {}).items():
+                    if browser_info.get("user_data_dir") == user_data_dir:
+                        return True
+            except Exception as e:
+                if self.logger:
+                    self.logger.error(f"Error reading built-in browser config: {str(e)}", tag="BUILTIN")
+        return False
+
    async def start(self):
        """Start or connect to the built-in browser.
        
        Returns:
            self: For method chaining
        """
-        # Check for existing built-in browser
-        browser_info = self.get_builtin_browser_info()
-        if browser_info and self._is_browser_running(browser_info.get('pid')):
+        # Check for existing built-in browser (get_browser_info already checks if running)
+        browser_info = self.get_browser_info()
+        if browser_info:
            if self.logger:
                self.logger.info(f"Using existing built-in browser at {browser_info.get('cdp_url')}", tag="BROWSER")
            self.config.cdp_url = browser_info.get('cdp_url')
@@ -797,7 +887,7 @@ class BuiltinBrowserStrategy(CDPBrowserStrategy):
            cdp_url = await self.launch_builtin_browser(
                browser_type=self.config.browser_type,
                debugging_port=self.config.debugging_port,
-                headless=self.config.headless
+                headless=self.config.headless,
            )
            if not cdp_url:
                if self.logger:
@@ -808,55 +898,62 @@ class BuiltinBrowserStrategy(CDPBrowserStrategy):
        # Call parent class implementation with updated CDP URL
        return await super().start()
    
-    def get_builtin_browser_info(self) -> Optional[Dict[str, Any]]:
-        """Get information about the built-in browser.
-        
-        Returns:
-            dict: Browser information or None if no built-in browser is configured
-        """
-        if not os.path.exists(self.builtin_config_file):
-            return None
-            
-        try:
-            with open(self.builtin_config_file, 'r') as f:
-                browser_info = json.load(f)
-                
-            # Check if the browser is still running
-            if not self._is_browser_running(browser_info.get('pid')):
-                if self.logger:
-                    self.logger.warning("Built-in browser is not running", tag="BUILTIN")
-                return None
-                
-            return browser_info
-        except Exception as e:
-            if self.logger:
-                self.logger.error(f"Error reading built-in browser config: {str(e)}", tag="BUILTIN")
-            return None
-    
-    def _is_browser_running(self, pid: Optional[int]) -> bool:
-        """Check if a process with the given PID is running.
+    @classmethod
+    def get_builtin_browser_info(cls, debugging_port: int, config_file: str, logger: Optional[AsyncLogger] = None) -> Optional[Dict[str, Any]]:
+        """Get information about the built-in browser for a specific debugging port.
        
        Args:
-            pid: Process ID to check
+            debugging_port: The debugging port to look for
+            config_file: Path to the config file
+            logger: Optional logger for recording events
            
        Returns:
-            bool: True if the process is running, False otherwise
+            dict: Browser information or None if no running browser is configured for this port
        """
-        if not pid:
-            return False
+        if not os.path.exists(config_file):
+            return None
            
        try:
-            # Check if the process exists
-            if is_windows():
-                process = subprocess.run(["tasklist", "/FI", f"PID eq {pid}"], 
-                                         capture_output=True, text=True)
-                return str(pid) in process.stdout
-            else:
-                # Unix-like systems
-                os.kill(pid, 0)  # This doesn't actually kill the process, just checks if it exists
-            return True
-        except (ProcessLookupError, PermissionError, OSError):
-            return False
+            with open(config_file, 'r') as f:
+                browser_info_dict = json.load(f)
+            
+            # Get browser info from port map
+            if isinstance(browser_info_dict, dict) and "port_map" in browser_info_dict:
+                port_str = str(debugging_port)
+                if port_str in browser_info_dict["port_map"]:
+                    browser_info = browser_info_dict["port_map"][port_str]
+                    
+                    # Check if the browser is still running
+                    if not is_browser_running(browser_info.get('pid')):
+                        if logger:
+                            logger.warning(f"Built-in browser on port {debugging_port} is not running", tag="BUILTIN")
+                        # Remove this port from the dictionary
+                        del browser_info_dict["port_map"][port_str]
+                        with open(config_file, 'w') as f:
+                            json.dump(browser_info_dict, f, indent=2)
+                        return None
+                    
+                    return browser_info
+            
+            return None
+                
+        except Exception as e:
+            if logger:
+                logger.error(f"Error reading built-in browser config: {str(e)}", tag="BUILTIN")
+            return None
+            
+    def get_browser_info(self) -> Optional[Dict[str, Any]]:
+        """Get information about the current built-in browser instance.
+        
+        Returns:
+            dict: Browser information or None if no running browser is configured
+        """
+        return self.get_builtin_browser_info(
+            debugging_port=self.config.debugging_port,
+            config_file=self.builtin_config_file,
+            logger=self.logger
+        )
+    
    
    async def launch_builtin_browser(self, 
                               browser_type: str = "chromium",
@@ -873,18 +970,27 @@ class BuiltinBrowserStrategy(CDPBrowserStrategy):
            str: CDP URL for the browser, or None if launch failed
        """
        # Check if there's an existing browser still running
-        browser_info = self.get_builtin_browser_info()
-        if browser_info and self._is_browser_running(browser_info.get('pid')):
+        browser_info = self.get_builtin_browser_info(
+            debugging_port=debugging_port,
+            config_file=self.builtin_config_file,
+            logger=self.logger
+        )
+        if browser_info:
            if self.logger:
-                self.logger.info("Built-in browser is already running", tag="BUILTIN")
+                self.logger.info(f"Built-in browser is already running on port {debugging_port}", tag="BUILTIN")
            return browser_info.get('cdp_url')
        
        # Create a user data directory for the built-in browser
        user_data_dir = os.path.join(self.builtin_browser_dir, "user_data")
+        # Raise error if user data dir is already engaged
+        if self._check_user_dir_is_engaged(user_data_dir):
+            raise Exception(f"User data directory {user_data_dir} is already engaged by another browser instance.")
+            
+        # Create the user data directory if it doesn't exist
        os.makedirs(user_data_dir, exist_ok=True)
        
        # Prepare browser launch arguments
-        browser_path = get_browser_executable(browser_type)
+        browser_path = await get_browser_executable(browser_type)
        if browser_type == "chromium":
            args = [
                browser_path,
@@ -957,7 +1063,7 @@ class BuiltinBrowserStrategy(CDPBrowserStrategy):
                if self.logger:
                    self.logger.warning(f"Could not verify browser: {str(e)}", tag="BUILTIN")
            
-            # Save browser info
+            # Create browser info
            browser_info = {
                'pid': process.pid,
                'cdp_url': cdp_url,
@@ -968,8 +1074,31 @@ class BuiltinBrowserStrategy(CDPBrowserStrategy):
                'config': config_json
            }
            
+            # Read existing config file if it exists
+            port_map = {}
+            if os.path.exists(self.builtin_config_file):
+                try:
+                    with open(self.builtin_config_file, 'r') as f:
+                        existing_data = json.load(f)
+                    
+                    # Check if it already uses port mapping
+                    if isinstance(existing_data, dict) and "port_map" in existing_data:
+                        port_map = existing_data["port_map"]
+                    # Convert legacy format to port mapping
+                    elif isinstance(existing_data, dict) and "debugging_port" in existing_data:
+                        old_port = str(existing_data.get("debugging_port"))
+                        if self._is_browser_running(existing_data.get("pid")):
+                            port_map[old_port] = existing_data
+                except Exception as e:
+                    if self.logger:
+                        self.logger.warning(f"Could not read existing config: {str(e)}", tag="BUILTIN")
+            
+            # Add/update this browser in the port map
+            port_map[str(debugging_port)] = browser_info
+            
+            # Write updated config
            with open(self.builtin_config_file, 'w') as f:
-                json.dump(browser_info, f, indent=2)
+                json.dump({"port_map": port_map}, f, indent=2)
                
            # Detach from the browser process - don't keep any references
            # This is important to allow the Python script to exit while the browser continues running
@@ -990,10 +1119,10 @@ class BuiltinBrowserStrategy(CDPBrowserStrategy):
        Returns:
            bool: True if the browser was killed, False otherwise
        """
-        browser_info = self.get_builtin_browser_info()
+        browser_info = self.get_browser_info()
        if not browser_info:
            if self.logger:
-                self.logger.warning("No built-in browser found", tag="BUILTIN")
+                self.logger.warning(f"No built-in browser found on port {self.config.debugging_port}", tag="BUILTIN")
            return False
            
        pid = browser_info.get('pid')
@@ -1007,16 +1136,29 @@ class BuiltinBrowserStrategy(CDPBrowserStrategy):
                os.kill(pid, signal.SIGTERM)
                # Wait for termination
                for _ in range(5):
-                    if not self._is_browser_running(pid):
+                    if not is_browser_running(pid):
                        break
                    await asyncio.sleep(0.5)
                else:
                    # Force kill if still running
                    os.kill(pid, signal.SIGKILL)
                    
-            # Remove config file
-            if os.path.exists(self.builtin_config_file):
-                os.unlink(self.builtin_config_file)
+            # Update config file to remove this browser
+            with open(self.builtin_config_file, 'r') as f:
+                browser_info_dict = json.load(f)
+            # Remove this port from the dictionary
+            port_str = str(self.config.debugging_port)
+            if port_str in browser_info_dict.get("port_map", {}):
+                del browser_info_dict["port_map"][port_str]
+            with open(self.builtin_config_file, 'w') as f:
+                json.dump(browser_info_dict, f, indent=2)
+            # Remove user data directory if it exists
+            if os.path.exists(self.builtin_browser_dir):
+                shutil.rmtree(self.builtin_browser_dir)
+            # Clear the browser info cache
+            self.browser = None
+            self.temp_dir = None
+            self.shutting_down = True
                
            if self.logger:
                self.logger.success("Built-in browser terminated", tag="BUILTIN")
@@ -1032,17 +1174,29 @@ class BuiltinBrowserStrategy(CDPBrowserStrategy):
        Returns:
            dict: Status information with running, cdp_url, and info fields
        """
-        browser_info = self.get_builtin_browser_info()
+        browser_info = self.get_browser_info()
        
        if not browser_info:
            return {
                'running': False,
                'cdp_url': None,
-                'info': None
+                'info': None,
+                'port': self.config.debugging_port
            }
            
        return {
            'running': True,
            'cdp_url': browser_info.get('cdp_url'),
-            'info': browser_info
+            'info': browser_info,
+            'port': self.config.debugging_port
        }
+
+    # Override the close method to handle built-in browser cleanup
+    async def close(self):
+        """Close the built-in browser and clean up resources."""
+        # Call parent class close method
+        await super().close()
+        
+        # Clean up built-in browser if we created it
+        if self.shutting_down:
+            await self.kill_builtin_browser()
--- a/crawl4ai/browser/utils.py
+++ b/crawl4ai/browser/utils.py
@@ -8,14 +8,18 @@ and Playwright instance management.
 import asyncio
 import os
 import sys
-import platform
+import time
 import tempfile
-from typing import Optional, Any
+import subprocess
+from typing import Optional

 from playwright.async_api import async_playwright

-from ..async_logger import AsyncLogger
 from ..utils import get_chromium_path
+from ..async_configs import BrowserConfig, CrawlerRunConfig
+
+from ..async_logger import AsyncLogger
+

 _playwright_instance = None

@@ -30,7 +34,7 @@ async def get_playwright():
        _playwright_instance = await async_playwright().start()
    return _playwright_instance

-def get_browser_executable(browser_type: str) -> str:
+async def get_browser_executable(browser_type: str) -> str:
    """Get the path to browser executable, with platform-specific handling.
    
    Args:
@@ -39,7 +43,7 @@ def get_browser_executable(browser_type: str) -> str:
    Returns:
        Path to browser executable
    """
-    return get_chromium_path(browser_type)
+    return await get_chromium_path(browser_type)

 def create_temp_directory(prefix="browser-profile-") -> str:
    """Create a temporary directory for browser data.
@@ -75,6 +79,31 @@ def is_linux() -> bool:
        True if Linux, False otherwise
    """
    return not (is_windows() or is_macos())
+    
+def is_browser_running(pid: Optional[int]) -> bool:
+    """Check if a process with the given PID is running.
+    
+    Args:
+        pid: Process ID to check
+        
+    Returns:
+        bool: True if the process is running, False otherwise
+    """
+    if not pid:
+        return False
+        
+    try:
+        # Check if the process exists
+        if is_windows():
+            process = subprocess.run(["tasklist", "/FI", f"PID eq {pid}"], 
+                                     capture_output=True, text=True)
+            return str(pid) in process.stdout
+        else:
+            # Unix-like systems
+            os.kill(pid, 0)  # This doesn't actually kill the process, just checks if it exists
+        return True
+    except (ProcessLookupError, PermissionError, OSError):
+        return False

 def get_browser_disable_options() -> list:
    """Get standard list of browser disable options for performance.
@@ -103,3 +132,197 @@ def get_browser_disable_options() -> list:
        "--password-store=basic",
        "--use-mock-keychain",
    ]
+
+
+async def find_optimal_browser_config(total_urls=50, verbose=True, rate_limit_delay=0.2):
+    """Find optimal browser configuration for crawling a specific number of URLs.
+    
+    Args:
+        total_urls: Number of URLs to crawl
+        verbose: Whether to print progress
+        rate_limit_delay: Delay between page loads to avoid rate limiting
+        
+    Returns:
+        dict: Contains fastest, lowest_memory, and optimal configurations
+    """
+    from .manager import BrowserManager
+    if verbose:
+        print(f"\n=== Finding optimal configuration for crawling {total_urls} URLs ===\n")
+    
+    # Generate test URLs with timestamp to avoid caching
+    timestamp = int(time.time())
+    urls = [f"https://example.com/page_{i}?t={timestamp}" for i in range(total_urls)]
+    
+    # Limit browser configurations to test (1 browser to max 10)
+    max_browsers = min(10, total_urls)
+    configs_to_test = []
+    
+    # Generate configurations (browser count, pages distribution)
+    for num_browsers in range(1, max_browsers + 1):
+        base_pages = total_urls // num_browsers
+        remainder = total_urls % num_browsers
+        
+        # Create distribution array like [3, 3, 2, 2] (some browsers get one more page)
+        if remainder > 0:
+            distribution = [base_pages + 1] * remainder + [base_pages] * (num_browsers - remainder)
+        else:
+            distribution = [base_pages] * num_browsers
+            
+        configs_to_test.append((num_browsers, distribution))
+    
+    results = []
+    
+    # Test each configuration
+    for browser_count, page_distribution in configs_to_test:
+        if verbose:
+            print(f"Testing {browser_count} browsers with distribution {tuple(page_distribution)}")
+        
+        try:
+            # Track memory if possible
+            try:
+                import psutil
+                process = psutil.Process()
+                start_memory = process.memory_info().rss / (1024 * 1024)  # MB
+            except ImportError:
+                if verbose: 
+                    print("Memory tracking not available (psutil not installed)")
+                start_memory = 0
+            
+            # Start browsers in parallel
+            managers = []
+            start_tasks = []
+            start_time = time.time()
+
+            logger = AsyncLogger(verbose=True, log_file=None)
+            
+            for i in range(browser_count):
+                config = BrowserConfig(headless=True)
+                manager = BrowserManager(browser_config=config, logger=logger)
+                start_tasks.append(manager.start())
+                managers.append(manager)
+            
+            await asyncio.gather(*start_tasks)
+            
+            # Distribute URLs among browsers
+            urls_per_manager = {}
+            url_index = 0
+            
+            for i, manager in enumerate(managers):
+                pages_for_this_browser = page_distribution[i]
+                end_index = url_index + pages_for_this_browser
+                urls_per_manager[manager] = urls[url_index:end_index]
+                url_index = end_index
+            
+            # Create pages for each browser
+            all_pages = []
+            for manager, manager_urls in urls_per_manager.items():
+                if not manager_urls:
+                    continue
+                pages = await manager.get_pages(CrawlerRunConfig(), count=len(manager_urls))
+                all_pages.extend(zip(pages, manager_urls))
+            
+            # Crawl pages with delay to avoid rate limiting
+            async def crawl_page(page_ctx, url):
+                page, _ = page_ctx
+                try:
+                    await page.goto(url)
+                    if rate_limit_delay > 0:
+                        await asyncio.sleep(rate_limit_delay)
+                    title = await page.title()
+                    return title
+                finally:
+                    await page.close()
+            
+            crawl_start = time.time()
+            crawl_tasks = [crawl_page(page_ctx, url) for page_ctx, url in all_pages]
+            await asyncio.gather(*crawl_tasks)
+            crawl_time = time.time() - crawl_start
+            total_time = time.time() - start_time
+            
+            # Measure final memory usage
+            if start_memory > 0:
+                end_memory = process.memory_info().rss / (1024 * 1024)
+                memory_used = end_memory - start_memory
+            else:
+                memory_used = 0
+            
+            # Close all browsers
+            for manager in managers:
+                await manager.close()
+            
+            # Calculate metrics
+            pages_per_second = total_urls / crawl_time
+            
+            # Calculate efficiency score (higher is better)
+            # This balances speed vs memory
+            if memory_used > 0:
+                efficiency = pages_per_second / (memory_used + 1)
+            else:
+                efficiency = pages_per_second
+            
+            # Store result
+            result = {
+                "browser_count": browser_count,
+                "distribution": tuple(page_distribution),
+                "crawl_time": crawl_time,
+                "total_time": total_time,
+                "memory_used": memory_used,
+                "pages_per_second": pages_per_second, 
+                "efficiency": efficiency
+            }
+            
+            results.append(result)
+            
+            if verbose:
+                print(f"  ✓ Crawled {total_urls} pages in {crawl_time:.2f}s ({pages_per_second:.1f} pages/sec)")
+                if memory_used > 0:
+                    print(f"  ✓ Memory used: {memory_used:.1f}MB ({memory_used/total_urls:.1f}MB per page)")
+                print(f"  ✓ Efficiency score: {efficiency:.4f}")
+            
+        except Exception as e:
+            if verbose:
+                print(f"  ✗ Error: {str(e)}")
+            
+            # Clean up
+            for manager in managers:
+                try:
+                    await manager.close()
+                except:
+                    pass
+    
+    # If no successful results, return None
+    if not results:
+        return None
+    
+    # Find best configurations
+    fastest = sorted(results, key=lambda x: x["crawl_time"])[0]
+    
+    # Only consider memory if available
+    memory_results = [r for r in results if r["memory_used"] > 0]
+    if memory_results:
+        lowest_memory = sorted(memory_results, key=lambda x: x["memory_used"])[0]
+    else:
+        lowest_memory = fastest
+    
+    # Find most efficient (balanced speed vs memory)
+    optimal = sorted(results, key=lambda x: x["efficiency"], reverse=True)[0]
+    
+    # Print summary
+    if verbose:
+        print("\n=== OPTIMAL CONFIGURATIONS ===")
+        print(f"⚡ Fastest: {fastest['browser_count']} browsers {fastest['distribution']}")
+        print(f"   {fastest['crawl_time']:.2f}s, {fastest['pages_per_second']:.1f} pages/sec")
+        
+        print(f"💾 Memory-efficient: {lowest_memory['browser_count']} browsers {lowest_memory['distribution']}")
+        if lowest_memory["memory_used"] > 0:
+            print(f"   {lowest_memory['memory_used']:.1f}MB, {lowest_memory['memory_used']/total_urls:.2f}MB per page")
+        
+        print(f"🌟 Balanced optimal: {optimal['browser_count']} browsers {optimal['distribution']}")
+        print(f"   {optimal['crawl_time']:.2f}s, {optimal['pages_per_second']:.1f} pages/sec, score: {optimal['efficiency']:.4f}")
+    
+    return {
+        "fastest": fastest,
+        "lowest_memory": lowest_memory,
+        "optimal": optimal,
+        "all_configs": results
+    }
--- a/tests/browser/test_browser_manager.py
+++ b/tests/browser/test_browser_manager.py
@@ -171,9 +171,9 @@ async def run_tests():
    """Run all tests sequentially."""
    results = []
    
-    # results.append(await test_basic_browser_manager())
-    # results.append(await test_custom_browser_config())
-    # results.append(await test_multiple_pages())
+    results.append(await test_basic_browser_manager())
+    results.append(await test_custom_browser_config())
+    results.append(await test_multiple_pages())
    results.append(await test_session_management())
    
    # Print summary
--- a/tests/browser/test_builtin_browser.py
+++ b/tests/browser/test_builtin_browser.py
--- a/tests/browser/test_parallel_crawling.py
+++ b/tests/browser/test_parallel_crawling.py
@@ -0,0 +1,902 @@
+"""
+Test examples for parallel crawling with the browser module.
+
+These examples demonstrate the functionality of parallel page creation
+and serve as functional tests for multi-page crawling performance.
+"""
+
+import asyncio
+import os
+import sys
+import time
+from typing import List
+
+# Add the project root to Python path if running directly
+if __name__ == "__main__":
+    sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))
+
+from crawl4ai.browser import BrowserManager
+from crawl4ai.async_configs import BrowserConfig, CrawlerRunConfig
+from crawl4ai.async_logger import AsyncLogger
+
+# Create a logger for clear terminal output
+logger = AsyncLogger(verbose=True, log_file=None)
+
+async def test_get_pages_basic():
+    """Test basic functionality of get_pages method."""
+    logger.info("Testing basic get_pages functionality", tag="TEST")
+    
+    browser_config = BrowserConfig(headless=True)
+    manager = BrowserManager(browser_config=browser_config, logger=logger)
+    
+    try:
+        await manager.start()
+        
+        # Request 3 pages
+        crawler_config = CrawlerRunConfig()
+        pages = await manager.get_pages(crawler_config, count=3)
+        
+        # Verify we got the correct number of pages
+        assert len(pages) == 3, f"Expected 3 pages, got {len(pages)}"
+        
+        # Verify each page is valid
+        for i, (page, context) in enumerate(pages):
+            await page.goto("https://example.com")
+            title = await page.title()
+            logger.info(f"Page {i+1} title: {title}", tag="TEST")
+            assert title, f"Page {i+1} has no title"
+        
+        await manager.close()
+        logger.success("Basic get_pages test completed successfully", tag="TEST")
+        return True
+    except Exception as e:
+        logger.error(f"Test failed: {str(e)}", tag="TEST")
+        try:
+            await manager.close()
+        except:
+            pass
+        return False
+
+async def test_parallel_approaches_comparison():
+    """Compare two parallel crawling approaches:
+    1. Create a page for each URL on-demand (get_page + gather)
+    2. Get all pages upfront with get_pages, then use them (get_pages + gather)
+    """
+    logger.info("Comparing different parallel crawling approaches", tag="TEST")
+    
+    urls = [
+        "https://example.com/page1",
+        "https://crawl4ai.com",
+        "https://kidocode.com",
+        "https://bbc.com",
+        # "https://example.com/page1",
+        # "https://example.com/page2",
+        # "https://example.com/page3",
+        # "https://example.com/page4",
+    ]
+    
+    browser_config = BrowserConfig(headless=False)
+    manager = BrowserManager(browser_config=browser_config, logger=logger)
+    
+    try:
+        await manager.start()
+        
+        # Approach 1: Create a page for each URL on-demand and run in parallel
+        logger.info("Testing approach 1: get_page for each URL + gather", tag="TEST")
+        start_time = time.time()
+        
+        async def fetch_title_approach1(url):
+            """Create a new page for each URL, go to the URL, and get title"""
+            crawler_config = CrawlerRunConfig(url=url)
+            page, context = await manager.get_page(crawler_config)
+            try:
+                await page.goto(url)
+                title = await page.title()
+                return title
+            finally:
+                await page.close()
+        
+        # Run fetch_title_approach1 for each URL in parallel
+        tasks = [fetch_title_approach1(url) for url in urls]
+        approach1_results = await asyncio.gather(*tasks)
+        
+        approach1_time = time.time() - start_time
+        logger.info(f"Approach 1 time (get_page + gather): {approach1_time:.2f}s", tag="TEST")
+        
+        # Approach 2: Get all pages upfront with get_pages, then use them in parallel
+        logger.info("Testing approach 2: get_pages upfront + gather", tag="TEST")
+        start_time = time.time()
+        
+        # Get all pages upfront
+        crawler_config = CrawlerRunConfig()
+        pages = await manager.get_pages(crawler_config, count=len(urls))
+        
+        async def fetch_title_approach2(page_ctx, url):
+            """Use a pre-created page to go to URL and get title"""
+            page, _ = page_ctx
+            try:
+                await page.goto(url)
+                title = await page.title()
+                return title
+            finally:
+                await page.close()
+        
+        # Use the pre-created pages to fetch titles in parallel
+        tasks = [fetch_title_approach2(page_ctx, url) for page_ctx, url in zip(pages, urls)]
+        approach2_results = await asyncio.gather(*tasks)
+        
+        approach2_time = time.time() - start_time
+        logger.info(f"Approach 2 time (get_pages + gather): {approach2_time:.2f}s", tag="TEST")
+        
+        # Compare results and performance
+        speedup = approach1_time / approach2_time if approach2_time > 0 else 0
+        if speedup > 1:
+            logger.success(f"Approach 2 (get_pages upfront) was {speedup:.2f}x faster", tag="TEST")
+        else:
+            logger.info(f"Approach 1 (get_page + gather) was {1/speedup:.2f}x faster", tag="TEST")
+        
+        # Verify same content was retrieved in both approaches
+        assert len(approach1_results) == len(approach2_results), "Result count mismatch"
+        
+        # Sort results for comparison since parallel execution might complete in different order
+        assert sorted(approach1_results) == sorted(approach2_results), "Results content mismatch"
+        
+        await manager.close()
+        return True
+        
+    except Exception as e:
+        logger.error(f"Test failed: {str(e)}", tag="TEST")
+        try:
+            await manager.close()
+        except:
+            pass
+        return False
+
+async def test_multi_browser_scaling(num_browsers=3, pages_per_browser=5):
+    """Test performance with multiple browsers and pages per browser.
+    Compares two approaches:
+    1. On-demand page creation (get_page + gather)
+    2. Pre-created pages (get_pages + gather)
+    """
+    logger.info(f"Testing multi-browser scaling with {num_browsers} browsers × {pages_per_browser} pages", tag="TEST")
+    
+    # Generate test URLs
+    total_pages = num_browsers * pages_per_browser
+    urls = [f"https://example.com/page_{i}" for i in range(total_pages)]
+    
+    # Create browser managers
+    managers = []
+    base_port = 9222
+    
+    try:
+        # Start all browsers in parallel
+        start_tasks = []
+        for i in range(num_browsers):
+            browser_config = BrowserConfig(
+                headless=True  # Using default browser mode like in test_parallel_approaches_comparison
+            )
+            manager = BrowserManager(browser_config=browser_config, logger=logger)
+            start_tasks.append(manager.start())
+            managers.append(manager)
+        
+        await asyncio.gather(*start_tasks)
+        
+        # Distribute URLs among managers
+        urls_per_manager = {}
+        for i, manager in enumerate(managers):
+            start_idx = i * pages_per_browser
+            end_idx = min(start_idx + pages_per_browser, len(urls))
+            urls_per_manager[manager] = urls[start_idx:end_idx]
+        
+        # Approach 1: Create a page for each URL on-demand and run in parallel
+        logger.info("Testing approach 1: get_page for each URL + gather", tag="TEST")
+        start_time = time.time()
+        
+        async def fetch_title_approach1(manager, url):
+            """Create a new page for the URL, go to the URL, and get title"""
+            crawler_config = CrawlerRunConfig(url=url)
+            page, context = await manager.get_page(crawler_config)
+            try:
+                await page.goto(url)
+                title = await page.title()
+                return title
+            finally:
+                await page.close()
+        
+        # Run fetch_title_approach1 for each URL in parallel
+        tasks = []
+        for manager, manager_urls in urls_per_manager.items():
+            for url in manager_urls:
+                tasks.append(fetch_title_approach1(manager, url))
+        
+        approach1_results = await asyncio.gather(*tasks)
+        
+        approach1_time = time.time() - start_time
+        logger.info(f"Approach 1 time (get_page + gather): {approach1_time:.2f}s", tag="TEST")
+        
+        # Approach 2: Get all pages upfront with get_pages, then use them in parallel
+        logger.info("Testing approach 2: get_pages upfront + gather", tag="TEST")
+        start_time = time.time()
+        
+        # Get all pages upfront for each manager
+        all_pages = []
+        for manager, manager_urls in urls_per_manager.items():
+            crawler_config = CrawlerRunConfig()
+            pages = await manager.get_pages(crawler_config, count=len(manager_urls))
+            all_pages.extend(zip(pages, manager_urls))
+        
+        async def fetch_title_approach2(page_ctx, url):
+            """Use a pre-created page to go to URL and get title"""
+            page, _ = page_ctx
+            try:
+                await page.goto(url)
+                title = await page.title()
+                return title
+            finally:
+                await page.close()
+        
+        # Use the pre-created pages to fetch titles in parallel
+        tasks = [fetch_title_approach2(page_ctx, url) for page_ctx, url in all_pages]
+        approach2_results = await asyncio.gather(*tasks)
+        
+        approach2_time = time.time() - start_time
+        logger.info(f"Approach 2 time (get_pages + gather): {approach2_time:.2f}s", tag="TEST")
+        
+        # Compare results and performance
+        speedup = approach1_time / approach2_time if approach2_time > 0 else 0
+        pages_per_second = total_pages / approach2_time
+        
+        # Show a simple summary
+        logger.info(f"📊 Summary: {num_browsers} browsers × {pages_per_browser} pages = {total_pages} total crawls", tag="TEST")
+        logger.info(f"⚡ Performance: {pages_per_second:.1f} pages/second ({pages_per_second*60:.0f} pages/minute)", tag="TEST")
+        logger.info(f"🚀 Total crawl time: {approach2_time:.2f} seconds", tag="TEST")
+        
+        if speedup > 1:
+            logger.success(f"✅ Approach 2 (get_pages upfront) was {speedup:.2f}x faster", tag="TEST")
+        else:
+            logger.info(f"✅ Approach 1 (get_page + gather) was {1/speedup:.2f}x faster", tag="TEST")
+        
+        # Close all managers
+        for manager in managers:
+            await manager.close()
+        
+        return True
+    
+    except Exception as e:
+        logger.error(f"Test failed: {str(e)}", tag="TEST")
+        # Clean up
+        for manager in managers:
+            try:
+                await manager.close()
+            except:
+                pass
+        return False
+
+async def grid_search_optimal_configuration(total_urls=50):
+    """Perform a grid search to find the optimal balance between number of browsers and pages per browser.
+    
+    This function tests different combinations of browser count and pages per browser,
+    while keeping the total number of URLs constant. It measures performance metrics
+    for each configuration to find the "sweet spot" that provides the best speed 
+    with reasonable memory usage.
+    
+    Args:
+        total_urls: Total number of URLs to crawl (default: 50)
+    """
+    logger.info(f"=== GRID SEARCH FOR OPTIMAL CRAWLING CONFIGURATION ({total_urls} URLs) ===", tag="TEST")
+    
+    # Generate test URLs once
+    urls = [f"https://example.com/page_{i}" for i in range(total_urls)]
+    
+    # Define grid search configurations
+    # We'll use more flexible approach: test all browser counts from 1 to min(20, total_urls)
+    # and distribute pages evenly (some browsers may have 1 more page than others)
+    configurations = []
+    
+    # Maximum number of browsers to test
+    max_browsers_to_test = min(20, total_urls)
+    
+    # Try configurations with 1 to max_browsers_to_test browsers
+    for num_browsers in range(1, max_browsers_to_test + 1):
+        base_pages_per_browser = total_urls // num_browsers
+        remainder = total_urls % num_browsers
+        
+        # Generate exact page distribution array
+        if remainder > 0:
+            # First 'remainder' browsers get one more page
+            page_distribution = [base_pages_per_browser + 1] * remainder + [base_pages_per_browser] * (num_browsers - remainder)
+            pages_distribution = f"{base_pages_per_browser+1} pages × {remainder} browsers, {base_pages_per_browser} pages × {num_browsers - remainder} browsers"
+        else:
+            # All browsers get the same number of pages
+            page_distribution = [base_pages_per_browser] * num_browsers
+            pages_distribution = f"{base_pages_per_browser} pages × {num_browsers} browsers"
+        
+        # Format the distribution as a tuple string like (4, 4, 3, 3)
+        distribution_str = str(tuple(page_distribution))
+            
+        configurations.append((num_browsers, base_pages_per_browser, pages_distribution, page_distribution, distribution_str))
+    
+    # Track results
+    results = []
+    
+    # Test each configuration
+    for num_browsers, pages_per_browser, pages_distribution, page_distribution, distribution_str in configurations:
+        logger.info("-" * 80, tag="TEST")
+        logger.info(f"Testing configuration: {num_browsers} browsers with distribution: {distribution_str}", tag="TEST")
+        logger.info(f"Details: {pages_distribution}", tag="TEST")
+        # Sleep a bit for randomness
+        await asyncio.sleep(0.5)
+        
+        try:
+            # Import psutil for memory tracking
+            try:
+                import psutil
+                process = psutil.Process()
+                initial_memory = process.memory_info().rss / (1024 * 1024)  # MB
+            except ImportError:
+                logger.warning("psutil not available, memory metrics will not be tracked", tag="TEST")
+                initial_memory = 0
+            
+            # Create and start browser managers
+            managers = []
+            start_time = time.time()
+            
+            # Start all browsers in parallel
+            start_tasks = []
+            for i in range(num_browsers):
+                browser_config = BrowserConfig(
+                    headless=True
+                )
+                manager = BrowserManager(browser_config=browser_config, logger=logger)
+                start_tasks.append(manager.start())
+                managers.append(manager)
+            
+            await asyncio.gather(*start_tasks)
+            browser_startup_time = time.time() - start_time
+            
+            # Measure memory after browser startup
+            if initial_memory > 0:
+                browser_memory = process.memory_info().rss / (1024 * 1024) - initial_memory
+            else:
+                browser_memory = 0
+            
+            # Distribute URLs among managers using the exact page distribution
+            urls_per_manager = {}
+            total_assigned = 0
+            
+            for i, manager in enumerate(managers):
+                if i < len(page_distribution):
+                    # Get the exact number of pages for this browser from our distribution
+                    manager_pages = page_distribution[i]
+                    
+                    # Get the URL slice for this manager
+                    start_idx = total_assigned
+                    end_idx = start_idx + manager_pages
+                    urls_per_manager[manager] = urls[start_idx:end_idx]
+                    total_assigned += manager_pages
+                else:
+                    # If we have more managers than our distribution (should never happen)
+                    urls_per_manager[manager] = []
+            
+            # Use the more efficient approach (pre-created pages)
+            logger.info("Running page crawling test...", tag="TEST")
+            crawl_start_time = time.time()
+            
+            # Get all pages upfront for each manager
+            all_pages = []
+            for manager, manager_urls in urls_per_manager.items():
+                if not manager_urls:  # Skip managers with no URLs
+                    continue
+                crawler_config = CrawlerRunConfig()
+                pages = await manager.get_pages(crawler_config, count=len(manager_urls))
+                all_pages.extend(zip(pages, manager_urls))
+            
+            # Measure memory after page creation
+            if initial_memory > 0:
+                pages_memory = process.memory_info().rss / (1024 * 1024) - browser_memory - initial_memory
+            else:
+                pages_memory = 0
+                
+            # Function to crawl a URL with a pre-created page
+            async def fetch_title(page_ctx, url):
+                page, _ = page_ctx
+                try:
+                    await page.goto(url)
+                    title = await page.title()
+                    return title
+                finally:
+                    await page.close()
+            
+            # Use the pre-created pages to fetch titles in parallel
+            tasks = [fetch_title(page_ctx, url) for page_ctx, url in all_pages]
+            crawl_results = await asyncio.gather(*tasks)
+            
+            crawl_time = time.time() - crawl_start_time
+            total_time = time.time() - start_time
+            
+            # Final memory measurement
+            if initial_memory > 0:
+                peak_memory = max(browser_memory + pages_memory, process.memory_info().rss / (1024 * 1024) - initial_memory)
+            else:
+                peak_memory = 0
+                
+            # Close all managers
+            for manager in managers:
+                await manager.close()
+                
+            # Calculate metrics
+            pages_per_second = total_urls / crawl_time
+            
+            # Store result metrics
+            result = {
+                "num_browsers": num_browsers,
+                "pages_per_browser": pages_per_browser,
+                "page_distribution": page_distribution,
+                "distribution_str": distribution_str,
+                "total_urls": total_urls,
+                "browser_startup_time": browser_startup_time,
+                "crawl_time": crawl_time,
+                "total_time": total_time,
+                "browser_memory": browser_memory,
+                "pages_memory": pages_memory,
+                "peak_memory": peak_memory,
+                "pages_per_second": pages_per_second,
+                # Calculate efficiency score (higher is better)
+                # This balances speed vs memory usage
+                "efficiency_score": pages_per_second / (peak_memory + 1) if peak_memory > 0 else pages_per_second,
+            }
+            
+            results.append(result)
+            
+            # Log the results
+            logger.info(f"Browser startup: {browser_startup_time:.2f}s", tag="TEST")
+            logger.info(f"Crawl time: {crawl_time:.2f}s", tag="TEST")
+            logger.info(f"Total time: {total_time:.2f}s", tag="TEST")
+            logger.info(f"Performance: {pages_per_second:.1f} pages/second", tag="TEST")
+            
+            if peak_memory > 0:
+                logger.info(f"Browser memory: {browser_memory:.1f}MB", tag="TEST")
+                logger.info(f"Pages memory: {pages_memory:.1f}MB", tag="TEST")
+                logger.info(f"Peak memory: {peak_memory:.1f}MB", tag="TEST")
+                logger.info(f"Efficiency score: {result['efficiency_score']:.6f}", tag="TEST")
+                
+        except Exception as e:
+            logger.error(f"Error testing configuration: {str(e)}", tag="TEST")
+            import traceback
+            traceback.print_exc()
+            
+            # Clean up
+            for manager in managers:
+                try:
+                    await manager.close()
+                except:
+                    pass
+                    
+    # Print summary of all configurations
+    logger.info("=" * 100, tag="TEST")
+    logger.info("GRID SEARCH RESULTS SUMMARY", tag="TEST")
+    logger.info("=" * 100, tag="TEST")
+    
+    # Rank configurations by efficiency score
+    ranked_results = sorted(results, key=lambda x: x["efficiency_score"], reverse=True)
+    
+    # Also determine rankings by different metrics
+    fastest = sorted(results, key=lambda x: x["crawl_time"])[0]
+    lowest_memory = sorted(results, key=lambda x: x["peak_memory"] if x["peak_memory"] > 0 else float('inf'))[0]
+    most_efficient = ranked_results[0]
+    
+    # Print top performers by category
+    logger.info("🏆 TOP PERFORMERS BY CATEGORY:", tag="TEST")
+    logger.info(f"⚡ Fastest: {fastest['num_browsers']} browsers × ~{fastest['pages_per_browser']} pages " + 
+                f"({fastest['crawl_time']:.2f}s, {fastest['pages_per_second']:.1f} pages/s)", tag="TEST")
+    
+    if lowest_memory["peak_memory"] > 0:
+        logger.info(f"💾 Lowest memory: {lowest_memory['num_browsers']} browsers × ~{lowest_memory['pages_per_browser']} pages " +
+                    f"({lowest_memory['peak_memory']:.1f}MB)", tag="TEST")
+    
+    logger.info(f"🌟 Most efficient: {most_efficient['num_browsers']} browsers × ~{most_efficient['pages_per_browser']} pages " +
+                f"(score: {most_efficient['efficiency_score']:.6f})", tag="TEST")
+    
+    # Print result table header
+    logger.info("\n📊 COMPLETE RANKING TABLE (SORTED BY EFFICIENCY SCORE):", tag="TEST")
+    logger.info("-" * 120, tag="TEST")
+    
+    # Define table header
+    header = f"{'Rank':<5} | {'Browsers':<8} | {'Distribution':<55} | {'Total Time(s)':<12} | {'Speed(p/s)':<12} | {'Memory(MB)':<12} | {'Efficiency':<10} | {'Notes'}"
+    logger.info(header, tag="TEST")
+    logger.info("-" * 120, tag="TEST")
+    
+    # Print each configuration in ranked order
+    for rank, result in enumerate(ranked_results, 1):
+        # Add special notes for top performers
+        notes = []
+        if result == fastest:
+            notes.append("⚡ Fastest")
+        if result == lowest_memory:
+            notes.append("💾 Lowest Memory")
+        if result == most_efficient:
+            notes.append("🌟 Most Efficient")
+        
+        notes_str = " | ".join(notes) if notes else ""
+        
+        # Format memory if available
+        memory_str = f"{result['peak_memory']:.1f}" if result['peak_memory'] > 0 else "N/A"
+        
+        # Get the distribution string
+        dist_str = result.get('distribution_str', str(tuple([result['pages_per_browser']] * result['num_browsers'])))
+        
+        # Build the row
+        row = f"{rank:<5} | {result['num_browsers']:<8} | {dist_str:<55} | {result['total_time']:.2f}s{' ':<7} | "
+        row += f"{result['pages_per_second']:.2f}{' ':<6} | {memory_str}{' ':<6} | {result['efficiency_score']:.4f}{' ':<4} | {notes_str}"
+        
+        logger.info(row, tag="TEST")
+    
+    logger.info("-" * 120, tag="TEST")
+    
+    # Generate visualization if matplotlib is available
+    try:
+        import matplotlib.pyplot as plt
+        import numpy as np
+        
+        # Extract data for plotting from ranked results
+        browser_counts = [r["num_browsers"] for r in ranked_results]
+        efficiency_scores = [r["efficiency_score"] for r in ranked_results]
+        crawl_times = [r["crawl_time"] for r in ranked_results]
+        total_times = [r["total_time"] for r in ranked_results]
+        
+        # Filter results with memory data
+        memory_results = [r for r in ranked_results if r["peak_memory"] > 0]
+        memory_browser_counts = [r["num_browsers"] for r in memory_results]
+        peak_memories = [r["peak_memory"] for r in memory_results]
+        
+        # Create figure with clean design
+        plt.figure(figsize=(14, 12), facecolor='white')
+        plt.style.use('ggplot')
+        
+        # Create grid for subplots
+        gs = plt.GridSpec(3, 1, height_ratios=[1, 1, 1], hspace=0.3)
+        
+        # Plot 1: Efficiency Score (higher is better)
+        ax1 = plt.subplot(gs[0])
+        bar_colors = ['#3498db'] * len(browser_counts)
+        
+        # Highlight the most efficient
+        most_efficient_idx = browser_counts.index(most_efficient["num_browsers"])
+        bar_colors[most_efficient_idx] = '#e74c3c'  # Red for most efficient
+        
+        bars = ax1.bar(range(len(browser_counts)), efficiency_scores, color=bar_colors)
+        ax1.set_xticks(range(len(browser_counts)))
+        ax1.set_xticklabels([f"{bc}" for bc in browser_counts], rotation=45)
+        ax1.set_xlabel('Number of Browsers')
+        ax1.set_ylabel('Efficiency Score (higher is better)')
+        ax1.set_title('Browser Configuration Efficiency (higher is better)')
+        
+        # Add value labels on top of bars
+        for bar, score in zip(bars, efficiency_scores):
+            height = bar.get_height()
+            ax1.text(bar.get_x() + bar.get_width()/2., height + 0.02*max(efficiency_scores),
+                    f'{score:.3f}', ha='center', va='bottom', rotation=90, fontsize=8)
+        
+        # Highlight best configuration
+        ax1.text(0.02, 0.90, f"🌟 Most Efficient: {most_efficient['num_browsers']} browsers with ~{most_efficient['pages_per_browser']} pages",
+                transform=ax1.transAxes, fontsize=12, verticalalignment='top',
+                bbox=dict(boxstyle='round,pad=0.5', facecolor='yellow', alpha=0.3))
+        
+        # Plot 2: Time Performance
+        ax2 = plt.subplot(gs[1])
+        
+        # Plot both total time and crawl time
+        ax2.plot(browser_counts, crawl_times, 'bo-', label='Crawl Time (s)', linewidth=2)
+        ax2.plot(browser_counts, total_times, 'go--', label='Total Time (s)', linewidth=2, alpha=0.6)
+        
+        # Mark the fastest configuration
+        fastest_idx = browser_counts.index(fastest["num_browsers"])
+        ax2.plot(browser_counts[fastest_idx], crawl_times[fastest_idx], 'ro', ms=10, 
+                label=f'Fastest: {fastest["num_browsers"]} browsers')
+        
+        ax2.set_xlabel('Number of Browsers')
+        ax2.set_ylabel('Time (seconds)')
+        ax2.set_title(f'Time Performance for {total_urls} URLs by Browser Count')
+        ax2.grid(True, linestyle='--', alpha=0.7)
+        ax2.legend(loc='upper right')
+        
+        # Plot pages per second on second y-axis
+        pages_per_second = [total_urls/t for t in crawl_times]
+        ax2_twin = ax2.twinx()
+        ax2_twin.plot(browser_counts, pages_per_second, 'r^--', label='Pages/second', alpha=0.5)
+        ax2_twin.set_ylabel('Pages per second')
+        
+        # Add note about the fastest configuration
+        ax2.text(0.02, 0.90, f"⚡ Fastest: {fastest['num_browsers']} browsers with ~{fastest['pages_per_browser']} pages" +
+                f"\n   {fastest['crawl_time']:.2f}s ({fastest['pages_per_second']:.1f} pages/s)",
+                transform=ax2.transAxes, fontsize=12, verticalalignment='top',
+                bbox=dict(boxstyle='round,pad=0.5', facecolor='lightblue', alpha=0.3))
+        
+        # Plot 3: Memory Usage (if available)
+        if memory_results:
+            ax3 = plt.subplot(gs[2])
+            
+            # Prepare data for grouped bar chart
+            memory_per_browser = [m/n for m, n in zip(peak_memories, memory_browser_counts)]
+            memory_per_page = [m/(n*p) for m, n, p in zip(
+                [r["peak_memory"] for r in memory_results],
+                [r["num_browsers"] for r in memory_results],
+                [r["pages_per_browser"] for r in memory_results])]
+            
+            x = np.arange(len(memory_browser_counts))
+            width = 0.35
+            
+            # Create grouped bars
+            ax3.bar(x - width/2, peak_memories, width, label='Total Memory (MB)', color='#9b59b6')
+            ax3.bar(x + width/2, memory_per_browser, width, label='Memory per Browser (MB)', color='#3498db')
+            
+            # Configure axis
+            ax3.set_xticks(x)
+            ax3.set_xticklabels([f"{bc}" for bc in memory_browser_counts], rotation=45)
+            ax3.set_xlabel('Number of Browsers')
+            ax3.set_ylabel('Memory (MB)')
+            ax3.set_title('Memory Usage by Browser Configuration')
+            ax3.legend(loc='upper left')
+            ax3.grid(True, linestyle='--', alpha=0.7)
+            
+            # Add second y-axis for memory per page
+            ax3_twin = ax3.twinx()
+            ax3_twin.plot(x, memory_per_page, 'ro-', label='Memory per Page (MB)')
+            ax3_twin.set_ylabel('Memory per Page (MB)')
+            
+            # Get lowest memory configuration
+            lowest_memory_idx = memory_browser_counts.index(lowest_memory["num_browsers"])
+            
+            # Add note about lowest memory configuration
+            ax3.text(0.02, 0.90, f"💾 Lowest Memory: {lowest_memory['num_browsers']} browsers with ~{lowest_memory['pages_per_browser']} pages" +
+                    f"\n   {lowest_memory['peak_memory']:.1f}MB ({lowest_memory['peak_memory']/total_urls:.2f}MB per page)",
+                    transform=ax3.transAxes, fontsize=12, verticalalignment='top',
+                    bbox=dict(boxstyle='round,pad=0.5', facecolor='lightgreen', alpha=0.3))
+        
+        # Add overall title
+        plt.suptitle(f'Browser Scaling Grid Search Results for {total_urls} URLs', fontsize=16, y=0.98)
+        
+        # Add timestamp and info at the bottom
+        plt.figtext(0.5, 0.01, f"Generated by Crawl4AI at {time.strftime('%Y-%m-%d %H:%M:%S')}", 
+                   ha="center", fontsize=10, style='italic')
+        
+        # Get current directory and save the figure there
+        import os
+        __current_file = os.path.abspath(__file__)
+        current_dir = os.path.dirname(__current_file)
+        output_file = os.path.join(current_dir, 'browser_scaling_grid_search.png')
+        
+        # Adjust layout and save figure with high DPI
+        plt.tight_layout(rect=[0, 0.03, 1, 0.97])
+        plt.savefig(output_file, dpi=200, bbox_inches='tight')
+        logger.success(f"Visualization saved to {output_file}", tag="TEST")
+        
+    except ImportError:
+        logger.warning("matplotlib not available, skipping visualization", tag="TEST")
+    
+    return most_efficient["num_browsers"], most_efficient["pages_per_browser"]
+    
+async def find_optimal_browser_config(total_urls=50, verbose=True, rate_limit_delay=0.2):
+    """Find optimal browser configuration for crawling a specific number of URLs.
+    
+    Args:
+        total_urls: Number of URLs to crawl
+        verbose: Whether to print progress
+        rate_limit_delay: Delay between page loads to avoid rate limiting
+        
+    Returns:
+        dict: Contains fastest, lowest_memory, and optimal configurations
+    """
+    if verbose:
+        print(f"\n=== Finding optimal configuration for crawling {total_urls} URLs ===\n")
+    
+    # Generate test URLs with timestamp to avoid caching
+    timestamp = int(time.time())
+    urls = [f"https://example.com/page_{i}?t={timestamp}" for i in range(total_urls)]
+    
+    # Limit browser configurations to test (1 browser to max 10)
+    max_browsers = min(10, total_urls)
+    configs_to_test = []
+    
+    # Generate configurations (browser count, pages distribution)
+    for num_browsers in range(1, max_browsers + 1):
+        base_pages = total_urls // num_browsers
+        remainder = total_urls % num_browsers
+        
+        # Create distribution array like [3, 3, 2, 2] (some browsers get one more page)
+        if remainder > 0:
+            distribution = [base_pages + 1] * remainder + [base_pages] * (num_browsers - remainder)
+        else:
+            distribution = [base_pages] * num_browsers
+            
+        configs_to_test.append((num_browsers, distribution))
+    
+    results = []
+    
+    # Test each configuration
+    for browser_count, page_distribution in configs_to_test:
+        if verbose:
+            print(f"Testing {browser_count} browsers with distribution {tuple(page_distribution)}")
+        
+        try:
+            # Track memory if possible
+            try:
+                import psutil
+                process = psutil.Process()
+                start_memory = process.memory_info().rss / (1024 * 1024)  # MB
+            except ImportError:
+                if verbose: 
+                    print("Memory tracking not available (psutil not installed)")
+                start_memory = 0
+            
+            # Start browsers in parallel
+            managers = []
+            start_tasks = []
+            start_time = time.time()
+            
+            for i in range(browser_count):
+                config = BrowserConfig(headless=True)
+                manager = BrowserManager(browser_config=config, logger=logger)
+                start_tasks.append(manager.start())
+                managers.append(manager)
+            
+            await asyncio.gather(*start_tasks)
+            
+            # Distribute URLs among browsers
+            urls_per_manager = {}
+            url_index = 0
+            
+            for i, manager in enumerate(managers):
+                pages_for_this_browser = page_distribution[i]
+                end_index = url_index + pages_for_this_browser
+                urls_per_manager[manager] = urls[url_index:end_index]
+                url_index = end_index
+            
+            # Create pages for each browser
+            all_pages = []
+            for manager, manager_urls in urls_per_manager.items():
+                if not manager_urls:
+                    continue
+                pages = await manager.get_pages(CrawlerRunConfig(), count=len(manager_urls))
+                all_pages.extend(zip(pages, manager_urls))
+            
+            # Crawl pages with delay to avoid rate limiting
+            async def crawl_page(page_ctx, url):
+                page, _ = page_ctx
+                try:
+                    await page.goto(url)
+                    if rate_limit_delay > 0:
+                        await asyncio.sleep(rate_limit_delay)
+                    title = await page.title()
+                    return title
+                finally:
+                    await page.close()
+            
+            crawl_start = time.time()
+            crawl_tasks = [crawl_page(page_ctx, url) for page_ctx, url in all_pages]
+            await asyncio.gather(*crawl_tasks)
+            crawl_time = time.time() - crawl_start
+            total_time = time.time() - start_time
+            
+            # Measure final memory usage
+            if start_memory > 0:
+                end_memory = process.memory_info().rss / (1024 * 1024)
+                memory_used = end_memory - start_memory
+            else:
+                memory_used = 0
+            
+            # Close all browsers
+            for manager in managers:
+                await manager.close()
+            
+            # Calculate metrics
+            pages_per_second = total_urls / crawl_time
+            
+            # Calculate efficiency score (higher is better)
+            # This balances speed vs memory
+            if memory_used > 0:
+                efficiency = pages_per_second / (memory_used + 1)
+            else:
+                efficiency = pages_per_second
+            
+            # Store result
+            result = {
+                "browser_count": browser_count,
+                "distribution": tuple(page_distribution),
+                "crawl_time": crawl_time,
+                "total_time": total_time,
+                "memory_used": memory_used,
+                "pages_per_second": pages_per_second, 
+                "efficiency": efficiency
+            }
+            
+            results.append(result)
+            
+            if verbose:
+                print(f"  ✓ Crawled {total_urls} pages in {crawl_time:.2f}s ({pages_per_second:.1f} pages/sec)")
+                if memory_used > 0:
+                    print(f"  ✓ Memory used: {memory_used:.1f}MB ({memory_used/total_urls:.1f}MB per page)")
+                print(f"  ✓ Efficiency score: {efficiency:.4f}")
+            
+        except Exception as e:
+            if verbose:
+                print(f"  ✗ Error: {str(e)}")
+            
+            # Clean up
+            for manager in managers:
+                try:
+                    await manager.close()
+                except:
+                    pass
+    
+    # If no successful results, return None
+    if not results:
+        return None
+    
+    # Find best configurations
+    fastest = sorted(results, key=lambda x: x["crawl_time"])[0]
+    
+    # Only consider memory if available
+    memory_results = [r for r in results if r["memory_used"] > 0]
+    if memory_results:
+        lowest_memory = sorted(memory_results, key=lambda x: x["memory_used"])[0]
+    else:
+        lowest_memory = fastest
+    
+    # Find most efficient (balanced speed vs memory)
+    optimal = sorted(results, key=lambda x: x["efficiency"], reverse=True)[0]
+    
+    # Print summary
+    if verbose:
+        print("\n=== OPTIMAL CONFIGURATIONS ===")
+        print(f"⚡ Fastest: {fastest['browser_count']} browsers {fastest['distribution']}")
+        print(f"   {fastest['crawl_time']:.2f}s, {fastest['pages_per_second']:.1f} pages/sec")
+        
+        print(f"💾 Memory-efficient: {lowest_memory['browser_count']} browsers {lowest_memory['distribution']}")
+        if lowest_memory["memory_used"] > 0:
+            print(f"   {lowest_memory['memory_used']:.1f}MB, {lowest_memory['memory_used']/total_urls:.2f}MB per page")
+        
+        print(f"🌟 Balanced optimal: {optimal['browser_count']} browsers {optimal['distribution']}")
+        print(f"   {optimal['crawl_time']:.2f}s, {optimal['pages_per_second']:.1f} pages/sec, score: {optimal['efficiency']:.4f}")
+    
+    return {
+        "fastest": fastest,
+        "lowest_memory": lowest_memory,
+        "optimal": optimal,
+        "all_configs": results
+    }
+
+async def run_tests():
+    """Run all tests sequentially."""
+    results = []
+    
+    # Find optimal configuration using our utility function
+    configs = await find_optimal_browser_config(
+        total_urls=20,  # Use a small number for faster testing
+        verbose=True,
+        rate_limit_delay=0.2  # 200ms delay between page loads to avoid rate limiting
+    )
+    
+    if configs:
+        # Show the optimal configuration
+        optimal = configs["optimal"]
+        print(f"\n🎯 Recommended configuration for production use:")
+        print(f"   {optimal['browser_count']} browsers with distribution {optimal['distribution']}")
+        print(f"   Estimated performance: {optimal['pages_per_second']:.1f} pages/second")
+        results.append(True)
+    else:
+        print("\n❌ Failed to find optimal configuration")
+        results.append(False)
+    
+    # Print summary
+    total = len(results)
+    passed = sum(results)
+    print(f"\nTests complete: {passed}/{total} passed")
+    
+    if passed == total:
+        print("All tests passed!")
+    else:
+        print(f"{total - passed} tests failed")
+
+if __name__ == "__main__":
+    asyncio.run(run_tests())