Merge branch 'main' into fix-raw-url-parsing

2025-05-30 13:03:25 +01:00
parent 27af4cc27b 897e017361
commit 58c1e17170
255 changed files with 67002 additions and 6739 deletions
--- a/tests/async/test_chunking_and_extraction_strategies.py
+++ b/tests/async/test_chunking_and_extraction_strategies.py
@@ -7,6 +7,7 @@ import json
 parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 sys.path.append(parent_dir)

+from crawl4ai import LLMConfig
 from crawl4ai.async_webcrawler import AsyncWebCrawler
 from crawl4ai.chunking_strategy import RegexChunking
 from crawl4ai.extraction_strategy import LLMExtractionStrategy
@@ -48,8 +49,7 @@ async def test_llm_extraction_strategy():
    async with AsyncWebCrawler(verbose=True) as crawler:
        url = "https://www.nbcnews.com/business"
        extraction_strategy = LLMExtractionStrategy(
-            provider="openai/gpt-4o-mini",
-            api_token=os.getenv("OPENAI_API_KEY"),
+            llm_config=LLMConfig(provider="openai/gpt-4o-mini",api_token=os.getenv("OPENAI_API_KEY")),
            instruction="Extract only content related to technology",
        )
        result = await crawler.arun(
--- a/tests/browser/docker/init.py
+++ b/tests/browser/docker/init.py
@@ -0,0 +1,4 @@
+"""Docker browser strategy tests.
+
+This package contains tests for the Docker browser strategy implementation.
+"""
--- a/tests/browser/docker/test_docker_browser.py
+++ b/tests/browser/docker/test_docker_browser.py
@@ -0,0 +1,651 @@
+"""Test examples for Docker Browser Strategy.
+
+These examples demonstrate the functionality of Docker Browser Strategy
+and serve as functional tests.
+"""
+
+import asyncio
+import os
+import sys
+import shutil
+import uuid
+
+# Add the project root to Python path if running directly
+if __name__ == "__main__":
+    sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../..')))
+
+from crawl4ai.browser import BrowserManager
+from crawl4ai.async_configs import BrowserConfig, CrawlerRunConfig
+from crawl4ai.async_logger import AsyncLogger
+from crawl4ai.browser import DockerConfig
+from crawl4ai.browser import DockerRegistry
+from crawl4ai.browser import DockerUtils
+
+# Create a logger for clear terminal output
+logger = AsyncLogger(verbose=True, log_file=None)
+
+# Global Docker utils instance
+docker_utils = DockerUtils(logger)
+
+async def test_docker_components():
+    """Test Docker utilities, registry, and image building.
+    
+    This function tests the core Docker components before running the browser tests.
+    It validates DockerRegistry, DockerUtils, and builds test images to ensure
+    everything is functioning correctly.
+    """
+    logger.info("Testing Docker components", tag="SETUP")
+    
+    # Create a test registry directory
+    registry_dir = os.path.join(os.path.dirname(__file__), "test_registry")
+    registry_file = os.path.join(registry_dir, "test_registry.json")
+    os.makedirs(registry_dir, exist_ok=True)
+    
+    try:
+        # 1. Test DockerRegistry
+        logger.info("Testing DockerRegistry...", tag="SETUP")
+        registry = DockerRegistry(registry_file)
+        
+        # Test saving and loading registry
+        test_container_id = "test-container-123"
+        registry.register_container(test_container_id, 9876, "test-hash-123")
+        registry.save()
+        
+        # Create a new registry instance that loads from the file
+        registry2 = DockerRegistry(registry_file)
+        port = registry2.get_container_host_port(test_container_id)
+        hash_value = registry2.get_container_config_hash(test_container_id)
+        
+        if port != 9876 or hash_value != "test-hash-123":
+            logger.error("DockerRegistry persistence failed", tag="SETUP")
+            return False
+            
+        # Clean up test container from registry
+        registry2.unregister_container(test_container_id)
+        logger.success("DockerRegistry works correctly", tag="SETUP")
+        
+        # 2. Test DockerUtils
+        logger.info("Testing DockerUtils...", tag="SETUP")
+        
+        # Test port detection
+        in_use = docker_utils.is_port_in_use(22)  # SSH port is usually in use
+        logger.info(f"Port 22 in use: {in_use}", tag="SETUP")
+        
+        # Get next available port
+        available_port = docker_utils.get_next_available_port(9000)
+        logger.info(f"Next available port: {available_port}", tag="SETUP")
+        
+        # Test config hash generation
+        config_dict = {"mode": "connect", "headless": True}
+        config_hash = docker_utils.generate_config_hash(config_dict)
+        logger.info(f"Generated config hash: {config_hash[:8]}...", tag="SETUP")
+        
+        # 3. Test Docker is available
+        logger.info("Checking Docker availability...", tag="SETUP")
+        if not await check_docker_available():
+            logger.error("Docker is not available - cannot continue tests", tag="SETUP")
+            return False
+            
+        # 4. Test building connect image
+        logger.info("Building connect mode Docker image...", tag="SETUP")
+        connect_image = await docker_utils.ensure_docker_image_exists(None, "connect")
+        if not connect_image:
+            logger.error("Failed to build connect mode image", tag="SETUP")
+            return False
+        logger.success(f"Successfully built connect image: {connect_image}", tag="SETUP")
+        
+        # 5. Test building launch image
+        logger.info("Building launch mode Docker image...", tag="SETUP")
+        launch_image = await docker_utils.ensure_docker_image_exists(None, "launch")
+        if not launch_image:
+            logger.error("Failed to build launch mode image", tag="SETUP")
+            return False
+        logger.success(f"Successfully built launch image: {launch_image}", tag="SETUP")
+        
+        # 6. Test creating and removing container
+        logger.info("Testing container creation and removal...", tag="SETUP")
+        container_id = await docker_utils.create_container(
+            image_name=launch_image,
+            host_port=available_port,
+            container_name="crawl4ai-test-container"
+        )
+        
+        if not container_id:
+            logger.error("Failed to create test container", tag="SETUP")
+            return False
+            
+        logger.info(f"Created test container: {container_id[:12]}", tag="SETUP")
+        
+        # Verify container is running
+        running = await docker_utils.is_container_running(container_id)
+        if not running:
+            logger.error("Test container is not running", tag="SETUP")
+            await docker_utils.remove_container(container_id)
+            return False
+            
+        # Test commands in container
+        logger.info("Testing command execution in container...", tag="SETUP")
+        returncode, stdout, stderr = await docker_utils.exec_in_container(
+            container_id, ["ls", "-la", "/"]
+        )
+        
+        if returncode != 0:
+            logger.error(f"Command execution failed: {stderr}", tag="SETUP")
+            await docker_utils.remove_container(container_id)
+            return False
+            
+        # Verify Chrome is installed in the container
+        returncode, stdout, stderr = await docker_utils.exec_in_container(
+            container_id, ["which", "chromium"]
+        )
+        
+        if returncode != 0:
+            logger.error("Chrome not found in container", tag="SETUP")
+            await docker_utils.remove_container(container_id)
+            return False
+            
+        chrome_path = stdout.strip()
+        logger.info(f"Chrome found at: {chrome_path}", tag="SETUP")
+        
+        # Test Chrome version
+        returncode, stdout, stderr = await docker_utils.exec_in_container(
+            container_id, ["chromium", "--version"]
+        )
+        
+        if returncode != 0:
+            logger.error(f"Failed to get Chrome version: {stderr}", tag="SETUP")
+            await docker_utils.remove_container(container_id)
+            return False
+            
+        logger.info(f"Chrome version: {stdout.strip()}", tag="SETUP")
+        
+        # Remove test container
+        removed = await docker_utils.remove_container(container_id)
+        if not removed:
+            logger.error("Failed to remove test container", tag="SETUP")
+            return False
+            
+        logger.success("Test container removed successfully", tag="SETUP")
+        
+        # All components tested successfully
+        logger.success("All Docker components tested successfully", tag="SETUP")
+        return True
+        
+    except Exception as e:
+        logger.error(f"Docker component tests failed: {str(e)}", tag="SETUP")
+        return False
+    finally:
+        # Clean up registry test directory
+        if os.path.exists(registry_dir):
+            shutil.rmtree(registry_dir)
+
+async def test_docker_connect_mode():
+    """Test Docker browser in connect mode.
+    
+    This tests the basic functionality of creating a browser in Docker
+    connect mode and using it for navigation.
+    """
+    logger.info("Testing Docker browser in connect mode", tag="TEST")
+    
+    # Create temp directory for user data
+    temp_dir = os.path.join(os.path.dirname(__file__), "tmp_user_data")
+    os.makedirs(temp_dir, exist_ok=True)
+    
+    try:
+        # Create Docker configuration
+        docker_config = DockerConfig(
+            mode="connect",
+            persistent=False,
+            remove_on_exit=True,
+            user_data_dir=temp_dir
+        )
+        
+        # Create browser configuration
+        browser_config = BrowserConfig(
+            browser_mode="docker",
+            headless=True,
+            docker_config=docker_config
+        )
+        
+        # Create browser manager
+        manager = BrowserManager(browser_config=browser_config, logger=logger)
+        
+        # Start the browser
+        await manager.start()
+        logger.info("Browser started successfully", tag="TEST")
+        
+        # Create crawler config
+        crawler_config = CrawlerRunConfig(url="https://example.com")
+        
+        # Get a page
+        page, context = await manager.get_page(crawler_config)
+        logger.info("Got page successfully", tag="TEST")
+        
+        # Navigate to a website
+        await page.goto("https://example.com")
+        logger.info("Navigated to example.com", tag="TEST")
+        
+        # Get page title
+        title = await page.title()
+        logger.info(f"Page title: {title}", tag="TEST")
+        
+        # Clean up
+        await manager.close()
+        logger.info("Browser closed successfully", tag="TEST")
+        
+        return True
+    except Exception as e:
+        logger.error(f"Test failed: {str(e)}", tag="TEST")
+        # Ensure cleanup
+        try:
+            await manager.close()
+        except:
+            pass
+        return False
+    finally:
+        # Clean up the temp directory
+        if os.path.exists(temp_dir):
+            shutil.rmtree(temp_dir)
+
+async def test_docker_launch_mode():
+    """Test Docker browser in launch mode.
+    
+    This tests launching a Chrome browser within a Docker container
+    on demand with custom settings.
+    """
+    logger.info("Testing Docker browser in launch mode", tag="TEST")
+    
+    # Create temp directory for user data
+    temp_dir = os.path.join(os.path.dirname(__file__), "tmp_user_data_launch")
+    os.makedirs(temp_dir, exist_ok=True)
+    
+    try:
+        # Create Docker configuration
+        docker_config = DockerConfig(
+            mode="launch",
+            persistent=False,
+            remove_on_exit=True,
+            user_data_dir=temp_dir
+        )
+        
+        # Create browser configuration
+        browser_config = BrowserConfig(
+            browser_mode="docker",
+            headless=True,
+            text_mode=True,  # Enable text mode for faster operation
+            docker_config=docker_config
+        )
+        
+        # Create browser manager
+        manager = BrowserManager(browser_config=browser_config, logger=logger)
+        
+        # Start the browser
+        await manager.start()
+        logger.info("Browser started successfully", tag="TEST")
+        
+        # Create crawler config
+        crawler_config = CrawlerRunConfig(url="https://example.com")
+        
+        # Get a page
+        page, context = await manager.get_page(crawler_config)
+        logger.info("Got page successfully", tag="TEST")
+        
+        # Navigate to a website
+        await page.goto("https://example.com")
+        logger.info("Navigated to example.com", tag="TEST")
+        
+        # Get page title
+        title = await page.title()
+        logger.info(f"Page title: {title}", tag="TEST")
+        
+        # Clean up
+        await manager.close()
+        logger.info("Browser closed successfully", tag="TEST")
+        
+        return True
+    except Exception as e:
+        logger.error(f"Test failed: {str(e)}", tag="TEST")
+        # Ensure cleanup
+        try:
+            await manager.close()
+        except:
+            pass
+        return False
+    finally:
+        # Clean up the temp directory
+        if os.path.exists(temp_dir):
+            shutil.rmtree(temp_dir)
+
+async def test_docker_persistent_storage():
+    """Test Docker browser with persistent storage.
+    
+    This tests creating localStorage data in one session and verifying
+    it persists to another session when using persistent storage.
+    """
+    logger.info("Testing Docker browser with persistent storage", tag="TEST")
+    
+    # Create a unique temp directory
+    test_id = uuid.uuid4().hex[:8]
+    temp_dir = os.path.join(os.path.dirname(__file__), f"tmp_user_data_persist_{test_id}")
+    os.makedirs(temp_dir, exist_ok=True)
+    
+    manager1 = None
+    manager2 = None
+    
+    try:
+        # Create Docker configuration with persistence
+        docker_config = DockerConfig(
+            mode="connect",
+            persistent=True,  # Keep container running between sessions
+            user_data_dir=temp_dir,
+            container_user_data_dir="/data"
+        )
+        
+        # Create browser configuration
+        browser_config = BrowserConfig(
+            browser_mode="docker",
+            headless=True,
+            docker_config=docker_config
+        )
+        
+        # Create first browser manager
+        manager1 = BrowserManager(browser_config=browser_config, logger=logger)
+        
+        # Start the browser
+        await manager1.start()
+        logger.info("First browser started successfully", tag="TEST")
+        
+        # Create crawler config
+        crawler_config = CrawlerRunConfig()
+        
+        # Get a page
+        page1, context1 = await manager1.get_page(crawler_config)
+        
+        # Navigate to example.com
+        await page1.goto("https://example.com")
+        
+        # Set localStorage item
+        test_value = f"test_value_{test_id}"
+        await page1.evaluate(f"localStorage.setItem('test_key', '{test_value}')")
+        logger.info(f"Set localStorage test_key = {test_value}", tag="TEST")
+        
+        # Close the first browser manager
+        await manager1.close()
+        logger.info("First browser closed", tag="TEST")
+        
+        # Create second browser manager with same config
+        manager2 = BrowserManager(browser_config=browser_config, logger=logger)
+        
+        # Start the browser
+        await manager2.start()
+        logger.info("Second browser started successfully", tag="TEST")
+        
+        # Get a page
+        page2, context2 = await manager2.get_page(crawler_config)
+        
+        # Navigate to same site
+        await page2.goto("https://example.com")
+        
+        # Get localStorage item
+        value = await page2.evaluate("localStorage.getItem('test_key')")
+        logger.info(f"Retrieved localStorage test_key = {value}", tag="TEST")
+        
+        # Check if persistence worked
+        if value == test_value:
+            logger.success("Storage persistence verified!", tag="TEST")
+        else:
+            logger.error(f"Storage persistence failed! Expected {test_value}, got {value}", tag="TEST")
+        
+        # Clean up
+        await manager2.close()
+        logger.info("Second browser closed successfully", tag="TEST")
+        
+        return value == test_value
+    except Exception as e:
+        logger.error(f"Test failed: {str(e)}", tag="TEST")
+        # Ensure cleanup
+        try:
+            if manager1:
+                await manager1.close()
+            if manager2:
+                await manager2.close()
+        except:
+            pass
+        return False
+    finally:
+        # Clean up the temp directory
+        if os.path.exists(temp_dir):
+            shutil.rmtree(temp_dir)
+
+async def test_docker_parallel_pages():
+    """Test Docker browser with parallel page creation.
+    
+    This tests the ability to create and use multiple pages in parallel
+    from a single Docker browser instance.
+    """
+    logger.info("Testing Docker browser with parallel pages", tag="TEST")
+    
+    try:
+        # Create Docker configuration
+        docker_config = DockerConfig(
+            mode="connect",
+            persistent=False,
+            remove_on_exit=True
+        )
+        
+        # Create browser configuration
+        browser_config = BrowserConfig(
+            browser_mode="docker",
+            headless=True,
+            docker_config=docker_config
+        )
+        
+        # Create browser manager
+        manager = BrowserManager(browser_config=browser_config, logger=logger)
+        
+        # Start the browser
+        await manager.start()
+        logger.info("Browser started successfully", tag="TEST")
+        
+        # Create crawler config
+        crawler_config = CrawlerRunConfig()
+        
+        # Get multiple pages
+        page_count = 3
+        pages = await manager.get_pages(crawler_config, count=page_count)
+        logger.info(f"Got {len(pages)} pages successfully", tag="TEST")
+        
+        if len(pages) != page_count:
+            logger.error(f"Expected {page_count} pages, got {len(pages)}", tag="TEST")
+            await manager.close()
+            return False
+            
+        # Navigate to different sites with each page
+        tasks = []
+        for i, (page, _) in enumerate(pages):
+            tasks.append(page.goto(f"https://example.com?page={i}"))
+            
+        # Wait for all navigations to complete
+        await asyncio.gather(*tasks)
+        logger.info("All pages navigated successfully", tag="TEST")
+        
+        # Get titles from all pages
+        titles = []
+        for i, (page, _) in enumerate(pages):
+            title = await page.title()
+            titles.append(title)
+            logger.info(f"Page {i+1} title: {title}", tag="TEST")
+        
+        # Clean up
+        await manager.close()
+        logger.info("Browser closed successfully", tag="TEST")
+        
+        return True
+    except Exception as e:
+        logger.error(f"Test failed: {str(e)}", tag="TEST")
+        # Ensure cleanup
+        try:
+            await manager.close()
+        except:
+            pass
+        return False
+
+async def test_docker_registry_reuse():
+    """Test Docker container reuse via registry.
+    
+    This tests that containers with matching configurations
+    are reused rather than creating new ones.
+    """
+    logger.info("Testing Docker container reuse via registry", tag="TEST")
+    
+    # Create registry for this test
+    registry_dir = os.path.join(os.path.dirname(__file__), "registry_reuse_test")
+    registry_file = os.path.join(registry_dir, "registry.json")
+    os.makedirs(registry_dir, exist_ok=True)
+    
+    manager1 = None
+    manager2 = None
+    container_id1 = None
+    
+    try:
+        # Create identical Docker configurations with custom registry
+        docker_config1 = DockerConfig(
+            mode="connect",
+            persistent=True,  # Keep container running after closing
+            registry_file=registry_file
+        )
+        
+        # Create first browser configuration
+        browser_config1 = BrowserConfig(
+            browser_mode="docker",
+            headless=True,
+            docker_config=docker_config1
+        )
+        
+        # Create first browser manager
+        manager1 = BrowserManager(browser_config=browser_config1, logger=logger)
+        
+        # Start the first browser
+        await manager1.start()
+        logger.info("First browser started successfully", tag="TEST")
+        
+        # Get container ID from the strategy
+        docker_strategy1 = manager1.strategy
+        container_id1 = docker_strategy1.container_id
+        logger.info(f"First browser container ID: {container_id1[:12]}", tag="TEST")
+        
+        # Close the first manager but keep container running
+        await manager1.close()
+        logger.info("First browser closed", tag="TEST")
+        
+        # Create second Docker configuration identical to first
+        docker_config2 = DockerConfig(
+            mode="connect",
+            persistent=True,
+            registry_file=registry_file
+        )
+        
+        # Create second browser configuration
+        browser_config2 = BrowserConfig(
+            browser_mode="docker",
+            headless=True,
+            docker_config=docker_config2
+        )
+        
+        # Create second browser manager
+        manager2 = BrowserManager(browser_config=browser_config2, logger=logger)
+        
+        # Start the second browser - should reuse existing container
+        await manager2.start()
+        logger.info("Second browser started successfully", tag="TEST")
+        
+        # Get container ID from the second strategy
+        docker_strategy2 = manager2.strategy
+        container_id2 = docker_strategy2.container_id
+        logger.info(f"Second browser container ID: {container_id2[:12]}", tag="TEST")
+        
+        # Verify container reuse
+        if container_id1 == container_id2:
+            logger.success("Container reuse successful - using same container!", tag="TEST")
+        else:
+            logger.error("Container reuse failed - new container created!", tag="TEST")
+        
+        # Clean up
+        docker_strategy2.docker_config.persistent = False
+        docker_strategy2.docker_config.remove_on_exit = True
+        await manager2.close()
+        logger.info("Second browser closed and container removed", tag="TEST")
+        
+        return container_id1 == container_id2
+    except Exception as e:
+        logger.error(f"Test failed: {str(e)}", tag="TEST")
+        # Ensure cleanup
+        try:
+            if manager1:
+                await manager1.close()
+            if manager2:
+                await manager2.close()
+            # Make sure container is removed
+            if container_id1:
+                await docker_utils.remove_container(container_id1, force=True)
+        except:
+            pass
+        return False
+    finally:
+        # Clean up registry directory
+        if os.path.exists(registry_dir):
+            shutil.rmtree(registry_dir)
+
+async def run_tests():
+    """Run all tests sequentially."""
+    results = []
+    
+    logger.info("Starting Docker Browser Strategy tests", tag="TEST")
+    
+    # Check if Docker is available
+    if not await check_docker_available():
+        logger.error("Docker is not available - skipping tests", tag="TEST")
+        return
+    
+    # First test Docker components
+    # setup_result = await test_docker_components()
+    # if not setup_result:
+    #     logger.error("Docker component tests failed - skipping browser tests", tag="TEST")
+    #     return
+    
+    # Run browser tests
+    results.append(await test_docker_connect_mode())
+    results.append(await test_docker_launch_mode())
+    results.append(await test_docker_persistent_storage())
+    results.append(await test_docker_parallel_pages())
+    results.append(await test_docker_registry_reuse())
+    
+    # Print summary
+    total = len(results)
+    passed = sum(1 for r in results if r)
+    logger.info(f"Tests complete: {passed}/{total} passed", tag="SUMMARY")
+    
+    if passed == total:
+        logger.success("All tests passed!", tag="SUMMARY")
+    else:
+        logger.error(f"{total - passed} tests failed", tag="SUMMARY")
+
+async def check_docker_available() -> bool:
+    """Check if Docker is available on the system.
+    
+    Returns:
+        bool: True if Docker is available, False otherwise
+    """
+    try:
+        proc = await asyncio.create_subprocess_exec(
+            "docker", "--version",
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.PIPE
+        )
+        stdout, _ = await proc.communicate()
+        return proc.returncode == 0 and stdout
+    except:
+        return False
+
+if __name__ == "__main__":
+    asyncio.run(run_tests())
--- a/tests/browser/manager/demo_browser_manager.py
+++ b/tests/browser/manager/demo_browser_manager.py
@@ -0,0 +1,525 @@
+"""Demo script for testing the enhanced BrowserManager.
+
+This script demonstrates the browser pooling capabilities of the enhanced
+BrowserManager with various configurations and usage patterns.
+"""
+
+import asyncio
+import time
+import random
+
+from crawl4ai.browser.manager import BrowserManager, UnavailableBehavior
+from crawl4ai.async_configs import BrowserConfig, CrawlerRunConfig
+from crawl4ai.async_logger import AsyncLogger
+
+import playwright
+
+SAFE_URLS = [
+    "https://example.com",
+    "https://example.com/page1",
+    "https://httpbin.org/get",
+    "https://httpbin.org/html",
+    "https://httpbin.org/ip",
+    "https://httpbin.org/user-agent",
+    "https://httpbin.org/headers",
+    "https://httpbin.org/cookies",
+    "https://httpstat.us/200",
+    "https://httpstat.us/301",
+    "https://httpstat.us/404",
+    "https://httpstat.us/500",
+    "https://jsonplaceholder.typicode.com/posts/1",
+    "https://jsonplaceholder.typicode.com/posts/2",
+    "https://jsonplaceholder.typicode.com/posts/3",
+    "https://jsonplaceholder.typicode.com/posts/4",
+    "https://jsonplaceholder.typicode.com/posts/5",
+    "https://jsonplaceholder.typicode.com/comments/1",
+    "https://jsonplaceholder.typicode.com/comments/2",
+    "https://jsonplaceholder.typicode.com/users/1",
+    "https://jsonplaceholder.typicode.com/users/2",
+    "https://jsonplaceholder.typicode.com/albums/1",
+    "https://jsonplaceholder.typicode.com/albums/2",
+    "https://jsonplaceholder.typicode.com/photos/1",
+    "https://jsonplaceholder.typicode.com/photos/2",
+    "https://jsonplaceholder.typicode.com/todos/1",
+    "https://jsonplaceholder.typicode.com/todos/2",
+    "https://www.iana.org",
+    "https://www.iana.org/domains",
+    "https://www.iana.org/numbers",
+    "https://www.iana.org/protocols",
+    "https://www.iana.org/about",
+    "https://www.iana.org/time-zones",
+    "https://www.data.gov",
+    "https://catalog.data.gov/dataset",
+    "https://www.archives.gov",
+    "https://www.usa.gov",
+    "https://www.loc.gov",
+    "https://www.irs.gov",
+    "https://www.census.gov",
+    "https://www.bls.gov",
+    "https://www.gpo.gov",
+    "https://www.w3.org",
+    "https://www.w3.org/standards",
+    "https://www.w3.org/WAI",
+    "https://www.rfc-editor.org",
+    "https://www.ietf.org",
+    "https://www.icann.org",
+    "https://www.internetsociety.org",
+    "https://www.python.org"
+]
+
+async def basic_pooling_demo():
+    """Demonstrate basic browser pooling functionality."""
+    print("\n=== Basic Browser Pooling Demo ===")
+    
+    # Create logger
+    logger = AsyncLogger(verbose=True)
+    
+    # Create browser configurations
+    config1 = BrowserConfig(
+        browser_type="chromium",
+        headless=True,
+        browser_mode="playwright"
+    )
+    
+    config2 = BrowserConfig(
+        browser_type="chromium", 
+        headless=True,
+        browser_mode="cdp"
+    )
+    
+    # Create browser manager with on-demand behavior
+    manager = BrowserManager(
+        browser_config=config1,
+        logger=logger,
+        unavailable_behavior=UnavailableBehavior.ON_DEMAND,
+        max_browsers_per_config=3
+    )
+    
+    try:
+        # Initialize pool with both configurations
+        print("Initializing browser pool...")
+        await manager.initialize_pool(
+            browser_configs=[config1, config2],
+            browsers_per_config=2
+        )
+        
+        # Display initial pool status
+        status = await manager.get_pool_status()
+        print(f"Initial pool status: {status}")
+        
+        # Create crawler run configurations
+        run_config1 = CrawlerRunConfig()
+        run_config2 = CrawlerRunConfig()
+        
+        # Simulate concurrent page requests
+        print("\nGetting pages for parallel crawling...")
+        
+        # Function to simulate crawling
+        async def simulate_crawl(index: int, config: BrowserConfig, run_config: CrawlerRunConfig):
+            print(f"Crawler {index}: Requesting page...")
+            page, context, strategy = await manager.get_page(run_config, config)
+            print(f"Crawler {index}: Got page, navigating to example.com...")
+            
+            try:
+                await page.goto("https://example.com")
+                title = await page.title()
+                print(f"Crawler {index}: Page title: {title}")
+                
+                # Simulate work
+                await asyncio.sleep(random.uniform(1, 3))
+                print(f"Crawler {index}: Work completed, releasing page...")
+                
+                # Check dynamic page content
+                content = await page.content()
+                content_length = len(content)
+                print(f"Crawler {index}: Page content length: {content_length}")
+                
+            except Exception as e:
+                print(f"Crawler {index}: Error: {str(e)}")
+            finally:
+                # Release the page
+                await manager.release_page(page, strategy, config)
+                print(f"Crawler {index}: Page released")
+        
+        # Create 5 parallel crawls
+        crawl_tasks = []
+        for i in range(5):
+            # Alternate between configurations
+            config = config1 if i % 2 == 0 else config2
+            run_config = run_config1 if i % 2 == 0 else run_config2
+            
+            task = asyncio.create_task(simulate_crawl(i+1, config, run_config))
+            crawl_tasks.append(task)
+        
+        # Wait for all crawls to complete
+        await asyncio.gather(*crawl_tasks)
+        
+        # Display final pool status
+        status = await manager.get_pool_status()
+        print(f"\nFinal pool status: {status}")
+        
+    finally:
+        # Clean up
+        print("\nClosing browser manager...")
+        await manager.close()
+        print("Browser manager closed")
+
+
+async def prewarm_pages_demo():
+    """Demonstrate page pre-warming functionality."""
+    print("\n=== Page Pre-warming Demo ===")
+    
+    # Create logger
+    logger = AsyncLogger(verbose=True)
+    
+    # Create browser configuration
+    config = BrowserConfig(
+        browser_type="chromium",
+        headless=True,
+        browser_mode="playwright"
+    )
+    
+    # Create crawler run configurations for pre-warming
+    run_config1 = CrawlerRunConfig(
+        user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
+    )
+    
+    run_config2 = CrawlerRunConfig(
+        user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Safari/605.1.15"
+    )
+    
+    # Create page pre-warm configurations
+    page_configs = [
+        (config, run_config1, 2),  # 2 pages with run_config1
+        (config, run_config2, 3)   # 3 pages with run_config2
+    ]
+    
+    # Create browser manager
+    manager = BrowserManager(
+        browser_config=config,
+        logger=logger,
+        unavailable_behavior=UnavailableBehavior.EXCEPTION
+    )
+    
+    try:
+        # Initialize pool with pre-warmed pages
+        print("Initializing browser pool with pre-warmed pages...")
+        await manager.initialize_pool(
+            browser_configs=[config],
+            browsers_per_config=2,
+            page_configs=page_configs
+        )
+        
+        # Display pool status
+        status = await manager.get_pool_status()
+        print(f"Pool status after pre-warming: {status}")
+        
+        # Simulate using pre-warmed pages
+        print("\nUsing pre-warmed pages...")
+        
+        async def use_prewarm_page(index: int, run_config: CrawlerRunConfig):
+            print(f"Task {index}: Requesting pre-warmed page...")
+            page, context, strategy = await manager.get_page(run_config, config)
+            
+            try:
+                print(f"Task {index}: Got page, navigating to example.com...")
+                await page.goto("https://example.com")
+                
+                # Verify user agent was applied correctly
+                user_agent = await page.evaluate("() => navigator.userAgent")
+                print(f"Task {index}: User agent: {user_agent}")
+                
+                # Get page title
+                title = await page.title()
+                print(f"Task {index}: Page title: {title}")
+                
+                # Simulate work
+                await asyncio.sleep(1)
+            finally:
+                # Release the page
+                print(f"Task {index}: Releasing page...")
+                await manager.release_page(page, strategy, config)
+        
+        # Create tasks to use pre-warmed pages
+        tasks = []
+        # Use run_config1 pages
+        for i in range(2):
+            tasks.append(asyncio.create_task(use_prewarm_page(i+1, run_config1)))
+        
+        # Use run_config2 pages
+        for i in range(3):
+            tasks.append(asyncio.create_task(use_prewarm_page(i+3, run_config2)))
+        
+        # Wait for all tasks to complete
+        await asyncio.gather(*tasks)
+        
+        # Try to use more pages than we pre-warmed (should raise exception)
+        print("\nTrying to use more pages than pre-warmed...")
+        try:
+            page, context, strategy = await manager.get_page(run_config1, config)
+            try:
+                print("Got extra page (unexpected)")
+                await page.goto("https://example.com")
+            finally:
+                await manager.release_page(page, strategy, config)
+        except Exception as e:
+            print(f"Expected exception when requesting more pages: {str(e)}")
+        
+    finally:
+        # Clean up
+        print("\nClosing browser manager...")
+        await manager.close()
+        print("Browser manager closed")
+
+
+async def prewarm_on_demand_demo():
+    """Demonstrate pre-warming with on-demand browser creation."""
+    print("\n=== Pre-warming with On-Demand Browser Creation Demo ===")
+    
+    # Create logger
+    logger = AsyncLogger(verbose=True)
+    
+    # Create browser configuration
+    config = BrowserConfig(
+        browser_type="chromium",
+        headless=True,
+        browser_mode="playwright"
+    )
+    
+    # Create crawler run configurations
+    run_config = CrawlerRunConfig(
+        user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
+    )
+    
+    # Create page pre-warm configurations - just pre-warm 2 pages
+    page_configs = [
+        (config, run_config, 2)
+    ]
+    
+    # Create browser manager with ON_DEMAND behavior
+    manager = BrowserManager(
+        browser_config=config,
+        logger=logger,
+        unavailable_behavior=UnavailableBehavior.ON_DEMAND,
+        max_browsers_per_config=5  # Allow up to 5 browsers
+    )
+    
+    try:
+        # Initialize pool with pre-warmed pages
+        print("Initializing browser pool with pre-warmed pages...")
+        await manager.initialize_pool(
+            browser_configs=[config],
+            browsers_per_config=1,  # Start with just 1 browser
+            page_configs=page_configs
+        )
+        
+        # Display initial pool status
+        status = await manager.get_pool_status()
+        print(f"Initial pool status: {status}")
+        
+        # Simulate using more pages than pre-warmed - should create browsers on demand
+        print("\nUsing more pages than pre-warmed (should create on demand)...")
+        
+        async def use_page(index: int):
+            print(f"Task {index}: Requesting page...")
+            page, context, strategy = await manager.get_page(run_config, config)
+            
+            try:
+                print(f"Task {index}: Got page, navigating to example.com...")
+                await page.goto("https://example.com")
+                
+                # Get page title
+                title = await page.title()
+                print(f"Task {index}: Page title: {title}")
+                
+                # Simulate work for a varying amount of time
+                work_time = 1 + (index * 0.5)  # Stagger completion times
+                print(f"Task {index}: Working for {work_time} seconds...")
+                await asyncio.sleep(work_time)
+                print(f"Task {index}: Work completed")
+            finally:
+                # Release the page
+                print(f"Task {index}: Releasing page...")
+                await manager.release_page(page, strategy, config)
+        
+        # Create more tasks than pre-warmed pages
+        tasks = []
+        for i in range(5):  # Try to use 5 pages when only 2 are pre-warmed
+            tasks.append(asyncio.create_task(use_page(i+1)))
+        
+        # Wait for all tasks to complete
+        await asyncio.gather(*tasks)
+        
+        # Display final pool status - should show on-demand created browsers
+        status = await manager.get_pool_status()
+        print(f"\nFinal pool status: {status}")
+        
+    finally:
+        # Clean up
+        print("\nClosing browser manager...")
+        await manager.close()
+        print("Browser manager closed")
+
+
+async def high_volume_demo():
+    """Demonstrate high-volume access to pre-warmed pages."""
+    print("\n=== High Volume Pre-warmed Pages Demo ===")
+    
+    # Create logger
+    logger = AsyncLogger(verbose=True)
+    
+    # Create browser configuration
+    config = BrowserConfig(
+        browser_type="chromium",
+        headless=True,
+        browser_mode="playwright"
+    )
+    
+    # Create crawler run configuration
+    run_config = CrawlerRunConfig(
+        user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
+    )
+    
+    # Set up dimensions
+    browser_count = 10
+    pages_per_browser = 5
+    total_pages = browser_count * pages_per_browser
+    
+    # Create page pre-warm configuration
+    page_configs = [
+        (config, run_config, total_pages)
+    ]
+    
+    print(f"Preparing {browser_count} browsers with {pages_per_browser} pages each ({total_pages} total pages)")
+    
+    # Create browser manager with ON_DEMAND behavior as fallback
+    # No need to specify max_browsers_per_config as it will be calculated automatically
+    manager = BrowserManager(
+        browser_config=config,
+        logger=logger,
+        unavailable_behavior=UnavailableBehavior.ON_DEMAND
+    )
+    
+    try:
+        # Initialize pool with browsers and pre-warmed pages
+        print(f"Pre-warming {total_pages} pages...")
+        start_time = time.time()
+        await manager.initialize_pool(
+            browser_configs=[config],
+            browsers_per_config=browser_count,
+            page_configs=page_configs
+        )
+        warmup_time = time.time() - start_time
+        print(f"Pre-warming completed in {warmup_time:.2f} seconds")
+        
+        # Display pool status
+        status = await manager.get_pool_status()
+        print(f"Pool status after pre-warming: {status}")
+        
+        # Simulate using all pre-warmed pages simultaneously
+        print(f"\nSending {total_pages} crawl requests simultaneously...")
+        
+        async def crawl_page(index: int):
+            # url = f"https://example.com/page{index}"
+            url = SAFE_URLS[index % len(SAFE_URLS)]
+            print(f"Page {index}: Requesting page...")            
+            # Measure time to acquire page
+            page_start = time.time()
+            page, context, strategy = await manager.get_page(run_config, config)
+            page_acquisition_time = time.time() - page_start
+            
+            try:
+                # Navigate to the URL
+                nav_start = time.time()
+                await page.goto(url, timeout=5000)
+                navigation_time = time.time() - nav_start
+                
+                # Get the page title
+                title = await page.title()
+                
+                return {
+                    "index": index,
+                    "url": url,
+                    "title": title,
+                    "page_acquisition_time": page_acquisition_time,
+                    "navigation_time": navigation_time
+                }
+            except playwright._impl._errors.TimeoutError as e:
+                # print(f"Page {index}: Navigation timed out - {e}")
+                return {
+                    "index": index,
+                    "url": url,
+                    "title": "Navigation timed out",
+                    "page_acquisition_time": page_acquisition_time,
+                    "navigation_time": 0
+                }
+            finally:
+                # Release the page
+                await manager.release_page(page, strategy, config)
+        
+        # Create and execute all tasks simultaneously
+        start_time = time.time()
+
+        # Non-parallel way
+        # for i in range(total_pages):
+        #     await crawl_page(i+1)
+
+        tasks = [crawl_page(i+1) for i in range(total_pages)]
+        results = await asyncio.gather(*tasks)
+        total_time = time.time() - start_time
+        
+        # # Print all titles
+        # for result in results:
+        #     print(f"Page {result['index']} ({result['url']}): Title: {result['title']}")
+        #     print(f"  Page acquisition time: {result['page_acquisition_time']:.4f}s")
+        #     print(f"  Navigation time: {result['navigation_time']:.4f}s")
+        #     print(f"  Total time: {result['page_acquisition_time'] + result['navigation_time']:.4f}s")
+        #     print("-" * 40)
+        
+        # Report results
+        print(f"\nAll {total_pages} crawls completed in {total_time:.2f} seconds")
+        
+        # Calculate statistics
+        acquisition_times = [r["page_acquisition_time"] for r in results]
+        navigation_times = [r["navigation_time"] for r in results]
+        
+        avg_acquisition = sum(acquisition_times) / len(acquisition_times)
+        max_acquisition = max(acquisition_times)
+        min_acquisition = min(acquisition_times)
+        
+        avg_navigation = sum(navigation_times) / len(navigation_times)
+        max_navigation = max(navigation_times)
+        min_navigation = min(navigation_times)
+        
+        print("\nPage acquisition times:")
+        print(f"  Average: {avg_acquisition:.4f}s")
+        print(f"  Min: {min_acquisition:.4f}s")
+        print(f"  Max: {max_acquisition:.4f}s")
+        
+        print("\nPage navigation times:")
+        print(f"  Average: {avg_navigation:.4f}s")
+        print(f"  Min: {min_navigation:.4f}s")
+        print(f"  Max: {max_navigation:.4f}s")
+        
+        # Display final pool status
+        status = await manager.get_pool_status()
+        print(f"\nFinal pool status: {status}")
+        
+    finally:
+        # Clean up
+        print("\nClosing browser manager...")
+        await manager.close()
+        print("Browser manager closed")
+
+
+async def main():
+    """Run all demos."""
+    # await basic_pooling_demo()
+    # await prewarm_pages_demo()
+    # await prewarm_on_demand_demo()
+    await high_volume_demo()
+    # Additional demo functions can be added here
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/tests/browser/test_browser_manager.py
+++ b/tests/browser/test_browser_manager.py
@@ -0,0 +1,190 @@
+"""Test examples for BrowserManager.
+
+These examples demonstrate the functionality of BrowserManager
+and serve as functional tests.
+"""
+
+import asyncio
+import os
+import sys
+from typing import List
+
+# Add the project root to Python path if running directly
+if __name__ == "__main__":
+    sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))
+
+from crawl4ai.browser import BrowserManager
+from crawl4ai.async_configs import BrowserConfig, CrawlerRunConfig
+from crawl4ai.async_logger import AsyncLogger
+
+# Create a logger for clear terminal output
+logger = AsyncLogger(verbose=True, log_file=None)
+
+async def test_basic_browser_manager():
+    """Test basic BrowserManager functionality with default configuration."""
+    logger.info("Starting test_basic_browser_manager", tag="TEST")
+    
+    try:
+        # Create a browser manager with default config
+        manager = BrowserManager(logger=logger)
+        
+        # Start the browser
+        await manager.start()
+        logger.info("Browser started successfully", tag="TEST")
+        
+        # Get a page
+        crawler_config = CrawlerRunConfig(url="https://example.com")
+        page, context = await manager.get_page(crawler_config)
+        logger.info("Page created successfully", tag="TEST")
+        
+        # Navigate to a website
+        await page.goto("https://example.com")
+        title = await page.title()
+        logger.info(f"Page title: {title}", tag="TEST")
+        
+        # Clean up
+        await manager.close()
+        logger.success("test_basic_browser_manager completed successfully", tag="TEST")
+        return True
+    except Exception as e:
+        logger.error(f"test_basic_browser_manager failed: {str(e)}", tag="TEST")
+        return False
+
+async def test_custom_browser_config():
+    """Test BrowserManager with custom browser configuration."""
+    logger.info("Starting test_custom_browser_config", tag="TEST")
+    
+    try:
+        # Create a custom browser config
+        browser_config = BrowserConfig(
+            browser_type="chromium",
+            headless=True,
+            viewport_width=1280,
+            viewport_height=800,
+            light_mode=True
+        )
+        
+        # Create browser manager with the config
+        manager = BrowserManager(browser_config=browser_config, logger=logger)
+        
+        # Start the browser
+        await manager.start()
+        logger.info("Browser started successfully with custom config", tag="TEST")
+        
+        # Get a page
+        crawler_config = CrawlerRunConfig(url="https://example.com")
+        page, context = await manager.get_page(crawler_config)
+        
+        # Navigate to a website
+        await page.goto("https://example.com")
+        title = await page.title()
+        logger.info(f"Page title: {title}", tag="TEST")
+        
+        # Verify viewport size
+        viewport_size = await page.evaluate("() => ({ width: window.innerWidth, height: window.innerHeight })")
+        logger.info(f"Viewport size: {viewport_size}", tag="TEST")
+        
+        # Clean up
+        await manager.close()
+        logger.success("test_custom_browser_config completed successfully", tag="TEST")
+        return True
+    except Exception as e:
+        logger.error(f"test_custom_browser_config failed: {str(e)}", tag="TEST")
+        return False
+
+async def test_multiple_pages():
+    """Test BrowserManager with multiple pages."""
+    logger.info("Starting test_multiple_pages", tag="TEST")
+    
+    try:
+        # Create browser manager
+        manager = BrowserManager(logger=logger)
+        
+        # Start the browser
+        await manager.start()
+        logger.info("Browser started successfully", tag="TEST")
+        
+        # Create multiple pages
+        pages = []
+        urls = ["https://example.com", "https://example.org", "https://mozilla.org"]
+        
+        for i, url in enumerate(urls):
+            crawler_config = CrawlerRunConfig(url=url)
+            page, context = await manager.get_page(crawler_config)
+            await page.goto(url)
+            pages.append((page, url))
+            logger.info(f"Created page {i+1} for {url}", tag="TEST")
+        
+        # Verify all pages are loaded correctly
+        for i, (page, url) in enumerate(pages):
+            title = await page.title()
+            logger.info(f"Page {i+1} title: {title}", tag="TEST")
+        
+        # Clean up
+        await manager.close()
+        logger.success("test_multiple_pages completed successfully", tag="TEST")
+        return True
+    except Exception as e:
+        logger.error(f"test_multiple_pages failed: {str(e)}", tag="TEST")
+        return False
+
+async def test_session_management():
+    """Test session management in BrowserManager."""
+    logger.info("Starting test_session_management", tag="TEST")
+    
+    try:
+        # Create browser manager
+        manager = BrowserManager(logger=logger)
+        
+        # Start the browser
+        await manager.start()
+        logger.info("Browser started successfully", tag="TEST")
+        
+        # Create a session
+        session_id = "test_session_1"
+        crawler_config = CrawlerRunConfig(url="https://example.com", session_id=session_id)
+        page1, context1 = await manager.get_page(crawler_config)
+        await page1.goto("https://example.com")
+        logger.info(f"Created session with ID: {session_id}", tag="TEST")
+        
+        # Get the same session again
+        page2, context2 = await manager.get_page(crawler_config)
+        
+        # Verify it's the same page/context
+        is_same_page = page1 == page2
+        is_same_context = context1 == context2
+        logger.info(f"Same page: {is_same_page}, Same context: {is_same_context}", tag="TEST")
+        
+        # Kill the session
+        await manager.kill_session(session_id)
+        logger.info(f"Killed session with ID: {session_id}", tag="TEST")
+        
+        # Clean up
+        await manager.close()
+        logger.success("test_session_management completed successfully", tag="TEST")
+        return True
+    except Exception as e:
+        logger.error(f"test_session_management failed: {str(e)}", tag="TEST")
+        return False
+
+async def run_tests():
+    """Run all tests sequentially."""
+    results = []
+    
+    results.append(await test_basic_browser_manager())
+    results.append(await test_custom_browser_config())
+    results.append(await test_multiple_pages())
+    results.append(await test_session_management())
+    
+    # Print summary
+    total = len(results)
+    passed = sum(results)
+    logger.info(f"Tests complete: {passed}/{total} passed", tag="SUMMARY")
+    
+    if passed == total:
+        logger.success("All tests passed!", tag="SUMMARY")
+    else:
+        logger.error(f"{total - passed} tests failed", tag="SUMMARY")
+
+if __name__ == "__main__":
+    asyncio.run(run_tests())
--- a/tests/browser/test_builtin_browser.py
+++ b/tests/browser/test_builtin_browser.py
@@ -0,0 +1,809 @@
+"""
+Test script for builtin browser functionality in the browser module.
+
+This script tests:
+1. Creating a builtin browser
+2. Getting browser information
+3. Killing the browser
+4. Restarting the browser
+5. Testing operations with different browser strategies
+6. Testing edge cases
+"""
+
+import asyncio
+import os
+import sys
+import time
+from typing import List, Dict, Any
+from colorama import Fore, Style, init
+
+# Add the project root to the path for imports
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../..")))
+
+from rich.console import Console
+from rich.table import Table
+from rich.panel import Panel
+from rich.text import Text
+from rich.box import Box, SIMPLE
+
+from crawl4ai.browser import BrowserManager
+from crawl4ai.browser.strategies import BuiltinBrowserStrategy
+from crawl4ai.async_configs import BrowserConfig, CrawlerRunConfig
+from crawl4ai.async_logger import AsyncLogger
+
+# Initialize colorama for cross-platform colored terminal output
+init()
+
+# Define colors for pretty output
+SUCCESS = Fore.GREEN
+WARNING = Fore.YELLOW
+ERROR = Fore.RED
+INFO = Fore.CYAN
+RESET = Fore.RESET
+
+# Create logger
+logger = AsyncLogger(verbose=True)
+
+
+async def test_builtin_browser_creation():
+    """Test creating a builtin browser using the BrowserManager with BuiltinBrowserStrategy"""
+    print(f"\n{INFO}========== Testing Builtin Browser Creation =========={RESET}")
+
+    # Step 1: Create a BrowserManager with builtin mode
+    print(f"\n{INFO}1. Creating BrowserManager with builtin mode{RESET}")
+    browser_config = BrowserConfig(browser_mode="builtin", headless=True, verbose=True)
+    manager = BrowserManager(browser_config=browser_config, logger=logger)
+
+    # Step 2: Check if we have a BuiltinBrowserStrategy
+    print(f"\n{INFO}2. Checking if we have a BuiltinBrowserStrategy{RESET}")
+    if isinstance(manager.strategy, BuiltinBrowserStrategy):
+        print(
+            f"{SUCCESS}Correct strategy type: {manager.strategy.__class__.__name__}{RESET}"
+        )
+    else:
+        print(
+            f"{ERROR}Wrong strategy type: {manager.strategy.__class__.__name__}{RESET}"
+        )
+        return None
+
+    # Step 3: Start the manager to launch or connect to builtin browser
+    print(f"\n{INFO}3. Starting the browser manager{RESET}")
+    try:
+        await manager.start()
+        print(f"{SUCCESS}Browser manager started successfully{RESET}")
+    except Exception as e:
+        print(f"{ERROR}Failed to start browser manager: {str(e)}{RESET}")
+        return None
+
+    # Step 4: Get browser info from the strategy
+    print(f"\n{INFO}4. Getting browser information{RESET}")
+    browser_info = manager.strategy.get_browser_info()
+    if browser_info:
+        print(f"{SUCCESS}Browser info retrieved:{RESET}")
+        for key, value in browser_info.items():
+            if key != "config":  # Skip the verbose config section
+                print(f"  {key}: {value}")
+
+        cdp_url = browser_info.get("cdp_url")
+        print(f"{SUCCESS}CDP URL: {cdp_url}{RESET}")
+    else:
+        print(f"{ERROR}Failed to get browser information{RESET}")
+        cdp_url = None
+
+    # Save manager for later tests
+    return manager, cdp_url
+
+
+async def test_page_operations(manager: BrowserManager):
+    """Test page operations with the builtin browser"""
+    print(
+        f"\n{INFO}========== Testing Page Operations with Builtin Browser =========={RESET}"
+    )
+
+    # Step 1: Get a single page
+    print(f"\n{INFO}1. Getting a single page{RESET}")
+    try:
+        crawler_config = CrawlerRunConfig()
+        page, context = await manager.get_page(crawler_config)
+        print(f"{SUCCESS}Got page successfully{RESET}")
+
+        # Navigate to a test URL
+        await page.goto("https://example.com")
+        title = await page.title()
+        print(f"{SUCCESS}Page title: {title}{RESET}")
+
+        # Close the page
+        await page.close()
+        print(f"{SUCCESS}Page closed successfully{RESET}")
+    except Exception as e:
+        print(f"{ERROR}Page operation failed: {str(e)}{RESET}")
+        return False
+
+    # Step 2: Get multiple pages
+    print(f"\n{INFO}2. Getting multiple pages with get_pages(){RESET}")
+    try:
+        # Request 3 pages
+        crawler_config = CrawlerRunConfig()
+        pages = await manager.get_pages(crawler_config, count=3)
+        print(f"{SUCCESS}Got {len(pages)} pages{RESET}")
+
+        # Test each page
+        for i, (page, context) in enumerate(pages):
+            await page.goto(f"https://example.com?test={i}")
+            title = await page.title()
+            print(f"{SUCCESS}Page {i + 1} title: {title}{RESET}")
+            await page.close()
+
+        print(f"{SUCCESS}All pages tested and closed successfully{RESET}")
+    except Exception as e:
+        print(f"{ERROR}Multiple page operation failed: {str(e)}{RESET}")
+        return False
+
+    return True
+
+
+async def test_browser_status_management(manager: BrowserManager):
+    """Test browser status and management operations"""
+    print(f"\n{INFO}========== Testing Browser Status and Management =========={RESET}")
+
+    # Step 1: Get browser status
+    print(f"\n{INFO}1. Getting browser status{RESET}")
+    try:
+        status = await manager.strategy.get_builtin_browser_status()
+        print(f"{SUCCESS}Browser status:{RESET}")
+        print(f"  Running: {status['running']}")
+        print(f"  CDP URL: {status['cdp_url']}")
+    except Exception as e:
+        print(f"{ERROR}Failed to get browser status: {str(e)}{RESET}")
+        return False
+
+    # Step 2: Test killing the browser
+    print(f"\n{INFO}2. Testing killing the browser{RESET}")
+    try:
+        result = await manager.strategy.kill_builtin_browser()
+        if result:
+            print(f"{SUCCESS}Browser killed successfully{RESET}")
+        else:
+            print(f"{ERROR}Failed to kill browser{RESET}")
+    except Exception as e:
+        print(f"{ERROR}Browser kill operation failed: {str(e)}{RESET}")
+        return False
+
+    # Step 3: Check status after kill
+    print(f"\n{INFO}3. Checking status after kill{RESET}")
+    try:
+        status = await manager.strategy.get_builtin_browser_status()
+        if not status["running"]:
+            print(f"{SUCCESS}Browser is correctly reported as not running{RESET}")
+        else:
+            print(f"{ERROR}Browser is incorrectly reported as still running{RESET}")
+    except Exception as e:
+        print(f"{ERROR}Failed to get browser status: {str(e)}{RESET}")
+        return False
+
+    # Step 4: Launch a new browser
+    print(f"\n{INFO}4. Launching a new browser{RESET}")
+    try:
+        cdp_url = await manager.strategy.launch_builtin_browser(
+            browser_type="chromium", headless=True
+        )
+        if cdp_url:
+            print(f"{SUCCESS}New browser launched at: {cdp_url}{RESET}")
+        else:
+            print(f"{ERROR}Failed to launch new browser{RESET}")
+            return False
+    except Exception as e:
+        print(f"{ERROR}Browser launch failed: {str(e)}{RESET}")
+        return False
+
+    return True
+
+
+async def test_multiple_managers():
+    """Test creating multiple BrowserManagers that use the same builtin browser"""
+    print(f"\n{INFO}========== Testing Multiple Browser Managers =========={RESET}")
+
+    # Step 1: Create first manager
+    print(f"\n{INFO}1. Creating first browser manager{RESET}")
+    browser_config1 = BrowserConfig(browser_mode="builtin", headless=True)
+    manager1 = BrowserManager(browser_config=browser_config1, logger=logger)
+
+    # Step 2: Create second manager
+    print(f"\n{INFO}2. Creating second browser manager{RESET}")
+    browser_config2 = BrowserConfig(browser_mode="builtin", headless=True)
+    manager2 = BrowserManager(browser_config=browser_config2, logger=logger)
+
+    # Step 3: Start both managers (should connect to the same builtin browser)
+    print(f"\n{INFO}3. Starting both managers{RESET}")
+    try:
+        await manager1.start()
+        print(f"{SUCCESS}First manager started{RESET}")
+
+        await manager2.start()
+        print(f"{SUCCESS}Second manager started{RESET}")
+
+        # Check if they got the same CDP URL
+        cdp_url1 = manager1.strategy.config.cdp_url
+        cdp_url2 = manager2.strategy.config.cdp_url
+
+        if cdp_url1 == cdp_url2:
+            print(
+                f"{SUCCESS}Both managers connected to the same browser: {cdp_url1}{RESET}"
+            )
+        else:
+            print(
+                f"{WARNING}Managers connected to different browsers: {cdp_url1} and {cdp_url2}{RESET}"
+            )
+    except Exception as e:
+        print(f"{ERROR}Failed to start managers: {str(e)}{RESET}")
+        return False
+
+    # Step 4: Test using both managers
+    print(f"\n{INFO}4. Testing operations with both managers{RESET}")
+    try:
+        # First manager creates a page
+        page1, ctx1 = await manager1.get_page(CrawlerRunConfig())
+        await page1.goto("https://example.com")
+        title1 = await page1.title()
+        print(f"{SUCCESS}Manager 1 page title: {title1}{RESET}")
+
+        # Second manager creates a page
+        page2, ctx2 = await manager2.get_page(CrawlerRunConfig())
+        await page2.goto("https://example.org")
+        title2 = await page2.title()
+        print(f"{SUCCESS}Manager 2 page title: {title2}{RESET}")
+
+        # Clean up
+        await page1.close()
+        await page2.close()
+    except Exception as e:
+        print(f"{ERROR}Failed to use both managers: {str(e)}{RESET}")
+        return False
+
+    # Step 5: Close both managers
+    print(f"\n{INFO}5. Closing both managers{RESET}")
+    try:
+        await manager1.close()
+        print(f"{SUCCESS}First manager closed{RESET}")
+
+        await manager2.close()
+        print(f"{SUCCESS}Second manager closed{RESET}")
+    except Exception as e:
+        print(f"{ERROR}Failed to close managers: {str(e)}{RESET}")
+        return False
+
+    return True
+
+
+async def test_edge_cases():
+    """Test edge cases like multiple starts, killing browser during operations, etc."""
+    print(f"\n{INFO}========== Testing Edge Cases =========={RESET}")
+
+    # Step 1: Test multiple starts with the same manager
+    print(f"\n{INFO}1. Testing multiple starts with the same manager{RESET}")
+    browser_config = BrowserConfig(browser_mode="builtin", headless=True)
+    manager = BrowserManager(browser_config=browser_config, logger=logger)
+
+    try:
+        await manager.start()
+        print(f"{SUCCESS}First start successful{RESET}")
+
+        # Try to start again
+        await manager.start()
+        print(f"{SUCCESS}Second start completed without errors{RESET}")
+
+        # Test if it's still functional
+        page, context = await manager.get_page(CrawlerRunConfig())
+        await page.goto("https://example.com")
+        title = await page.title()
+        print(
+            f"{SUCCESS}Page operations work after multiple starts. Title: {title}{RESET}"
+        )
+        await page.close()
+    except Exception as e:
+        print(f"{ERROR}Multiple starts test failed: {str(e)}{RESET}")
+        return False
+    finally:
+        await manager.close()
+
+    # Step 2: Test killing the browser while manager is active
+    print(f"\n{INFO}2. Testing killing the browser while manager is active{RESET}")
+    manager = BrowserManager(browser_config=browser_config, logger=logger)
+
+    try:
+        await manager.start()
+        print(f"{SUCCESS}Manager started{RESET}")
+
+        # Kill the browser directly
+        print(f"{INFO}Killing the browser...{RESET}")
+        await manager.strategy.kill_builtin_browser()
+        print(f"{SUCCESS}Browser killed{RESET}")
+
+        # Try to get a page (should fail or launch a new browser)
+        try:
+            page, context = await manager.get_page(CrawlerRunConfig())
+            print(
+                f"{WARNING}Page request succeeded despite killed browser (might have auto-restarted){RESET}"
+            )
+            title = await page.title()
+            print(f"{SUCCESS}Got page title: {title}{RESET}")
+            await page.close()
+        except Exception as e:
+            print(
+                f"{SUCCESS}Page request failed as expected after browser was killed: {str(e)}{RESET}"
+            )
+    except Exception as e:
+        print(f"{ERROR}Kill during operation test failed: {str(e)}{RESET}")
+        return False
+    finally:
+        await manager.close()
+
+    return True
+
+
+async def cleanup_browsers():
+    """Clean up any remaining builtin browsers"""
+    print(f"\n{INFO}========== Cleaning Up Builtin Browsers =========={RESET}")
+
+    browser_config = BrowserConfig(browser_mode="builtin", headless=True)
+    manager = BrowserManager(browser_config=browser_config, logger=logger)
+
+    try:
+        # No need to start, just access the strategy directly
+        strategy = manager.strategy
+        if isinstance(strategy, BuiltinBrowserStrategy):
+            result = await strategy.kill_builtin_browser()
+            if result:
+                print(f"{SUCCESS}Successfully killed all builtin browsers{RESET}")
+            else:
+                print(f"{WARNING}No builtin browsers found to kill{RESET}")
+        else:
+            print(f"{ERROR}Wrong strategy type: {strategy.__class__.__name__}{RESET}")
+    except Exception as e:
+        print(f"{ERROR}Cleanup failed: {str(e)}{RESET}")
+    finally:
+        # Just to be safe
+        try:
+            await manager.close()
+        except:
+            pass
+
+
+async def test_performance_scaling():
+    """Test performance with multiple browsers and pages.
+
+    This test creates multiple browsers on different ports,
+    spawns multiple pages per browser, and measures performance metrics.
+    """
+    print(f"\n{INFO}========== Testing Performance Scaling =========={RESET}")
+
+    # Configuration parameters
+    num_browsers = 10
+    pages_per_browser = 10
+    total_pages = num_browsers * pages_per_browser
+    base_port = 9222
+
+    # Set up a measuring mechanism for memory
+    import psutil
+    import gc
+
+    # Force garbage collection before starting
+    gc.collect()
+    process = psutil.Process()
+    initial_memory = process.memory_info().rss / 1024 / 1024  # in MB
+    peak_memory = initial_memory
+
+    # Report initial configuration
+    print(
+        f"{INFO}Test configuration: {num_browsers} browsers × {pages_per_browser} pages = {total_pages} total crawls{RESET}"
+    )
+
+    # List to track managers
+    managers: List[BrowserManager] = []
+    all_pages = []
+
+
+
+    # Get crawl4ai home directory
+    crawl4ai_home = os.path.expanduser("~/.crawl4ai")
+    temp_dir = os.path.join(crawl4ai_home, "temp")
+    os.makedirs(temp_dir, exist_ok=True)
+
+    # Create all managers but don't start them yet
+    manager_configs = []
+    for i in range(num_browsers):
+        port = base_port + i
+        browser_config = BrowserConfig(
+            browser_mode="builtin",
+            headless=True,
+            debugging_port=port,
+            user_data_dir=os.path.join(temp_dir, f"browser_profile_{i}"),
+        )
+        manager = BrowserManager(browser_config=browser_config, logger=logger)
+        manager.strategy.shutting_down = True
+        manager_configs.append((manager, i, port))
+
+    # Define async function to start a single manager
+    async def start_manager(manager, index, port):
+        try:
+            await manager.start()
+            return manager
+        except Exception as e:
+            print(
+                f"{ERROR}Failed to start browser {index + 1} on port {port}: {str(e)}{RESET}"
+            )
+            return None
+
+    # Start all managers in parallel
+    start_tasks = [
+        start_manager(manager, i, port) for manager, i, port in manager_configs
+    ]
+    started_managers = await asyncio.gather(*start_tasks)
+
+    # Filter out None values (failed starts) and add to managers list
+    managers = [m for m in started_managers if m is not None]
+
+    if len(managers) == 0:
+        print(f"{ERROR}All browser managers failed to start. Aborting test.{RESET}")
+        return False
+
+    if len(managers) < num_browsers:
+        print(
+            f"{WARNING}Only {len(managers)} out of {num_browsers} browser managers started successfully{RESET}"
+        )
+
+    # Create pages for each browser
+    for i, manager in enumerate(managers):
+        try:
+            pages = await manager.get_pages(CrawlerRunConfig(), count=pages_per_browser)
+            all_pages.extend(pages)
+        except Exception as e:
+            print(f"{ERROR}Failed to create pages for browser {i + 1}: {str(e)}{RESET}")
+
+    # Check memory after page creation
+    gc.collect()
+    current_memory = process.memory_info().rss / 1024 / 1024
+    peak_memory = max(peak_memory, current_memory)
+
+    # Ask for confirmation before loading
+    confirmation = input(
+        f"{WARNING}Do you want to proceed with loading pages? (y/n): {RESET}"
+    )
+    # Step 1: Create and start multiple browser managers in parallel
+    start_time = time.time()
+    
+    if confirmation.lower() == "y":
+        load_start_time = time.time()
+
+        # Function to load a single page
+        async def load_page(page_ctx, index):
+            page, _ = page_ctx
+            try:
+                await page.goto(f"https://example.com/page{index}", timeout=30000)
+                title = await page.title()
+                return title
+            except Exception as e:
+                return f"Error: {str(e)}"
+
+        # Load all pages concurrently
+        load_tasks = [load_page(page_ctx, i) for i, page_ctx in enumerate(all_pages)]
+        load_results = await asyncio.gather(*load_tasks, return_exceptions=True)
+
+        # Count successes and failures
+        successes = sum(
+            1 for r in load_results if isinstance(r, str) and not r.startswith("Error")
+        )
+        failures = len(load_results) - successes
+
+        load_time = time.time() - load_start_time
+        total_test_time = time.time() - start_time
+
+        # Check memory after loading (peak memory)
+        gc.collect()
+        current_memory = process.memory_info().rss / 1024 / 1024
+        peak_memory = max(peak_memory, current_memory)
+
+        # Calculate key metrics
+        memory_per_page = peak_memory / successes if successes > 0 else 0
+        time_per_crawl = total_test_time / successes if successes > 0 else 0
+        crawls_per_second = successes / total_test_time if total_test_time > 0 else 0
+        crawls_per_minute = crawls_per_second * 60
+        crawls_per_hour = crawls_per_minute * 60
+
+        # Print simplified performance summary
+        from rich.console import Console
+        from rich.table import Table
+
+        console = Console()
+
+        # Create a simple summary table
+        table = Table(title="CRAWL4AI PERFORMANCE SUMMARY")
+
+        table.add_column("Metric", style="cyan")
+        table.add_column("Value", style="green")
+
+        table.add_row("Total Crawls Completed", f"{successes}")
+        table.add_row("Total Time", f"{total_test_time:.2f} seconds")
+        table.add_row("Time Per Crawl", f"{time_per_crawl:.2f} seconds")
+        table.add_row("Crawling Speed", f"{crawls_per_second:.2f} crawls/second")
+        table.add_row("Projected Rate (1 minute)", f"{crawls_per_minute:.0f} crawls")
+        table.add_row("Projected Rate (1 hour)", f"{crawls_per_hour:.0f} crawls")
+        table.add_row("Peak Memory Usage", f"{peak_memory:.2f} MB")
+        table.add_row("Memory Per Crawl", f"{memory_per_page:.2f} MB")
+
+        # Display the table
+        console.print(table)
+
+    # Ask confirmation before cleanup
+    confirmation = input(
+        f"{WARNING}Do you want to proceed with cleanup? (y/n): {RESET}"
+    )
+    if confirmation.lower() != "y":
+        print(f"{WARNING}Cleanup aborted by user{RESET}")
+        return False
+
+    # Close all pages
+    for page, _ in all_pages:
+        try:
+            await page.close()
+        except:
+            pass
+
+    # Close all managers
+    for manager in managers:
+        try:
+            await manager.close()
+        except:
+            pass
+
+    # Remove the temp directory
+    import shutil
+
+    if os.path.exists(temp_dir):
+        shutil.rmtree(temp_dir)
+
+    return True
+
+
+async def test_performance_scaling_lab( num_browsers: int = 10, pages_per_browser: int = 10):
+    """Test performance with multiple browsers and pages.
+
+    This test creates multiple browsers on different ports,
+    spawns multiple pages per browser, and measures performance metrics.
+    """
+    print(f"\n{INFO}========== Testing Performance Scaling =========={RESET}")
+
+    # Configuration parameters
+    num_browsers = num_browsers
+    pages_per_browser = pages_per_browser
+    total_pages = num_browsers * pages_per_browser
+    base_port = 9222
+
+    # Set up a measuring mechanism for memory
+    import psutil
+    import gc
+
+    # Force garbage collection before starting
+    gc.collect()
+    process = psutil.Process()
+    initial_memory = process.memory_info().rss / 1024 / 1024  # in MB
+    peak_memory = initial_memory
+
+    # Report initial configuration
+    print(
+        f"{INFO}Test configuration: {num_browsers} browsers × {pages_per_browser} pages = {total_pages} total crawls{RESET}"
+    )
+
+    # List to track managers
+    managers: List[BrowserManager] = []
+    all_pages = []
+
+    # Get crawl4ai home directory
+    crawl4ai_home = os.path.expanduser("~/.crawl4ai")
+    temp_dir = os.path.join(crawl4ai_home, "temp")
+    os.makedirs(temp_dir, exist_ok=True)
+
+    # Create all managers but don't start them yet
+    manager_configs = []
+    for i in range(num_browsers):
+        port = base_port + i
+        browser_config = BrowserConfig(
+            browser_mode="builtin",
+            headless=True,
+            debugging_port=port,
+            user_data_dir=os.path.join(temp_dir, f"browser_profile_{i}"),
+        )
+        manager = BrowserManager(browser_config=browser_config, logger=logger)
+        manager.strategy.shutting_down = True
+        manager_configs.append((manager, i, port))
+
+    # Define async function to start a single manager
+    async def start_manager(manager, index, port):
+        try:
+            await manager.start()
+            return manager
+        except Exception as e:
+            print(
+                f"{ERROR}Failed to start browser {index + 1} on port {port}: {str(e)}{RESET}"
+            )
+            return None
+
+    # Start all managers in parallel
+    start_tasks = [
+        start_manager(manager, i, port) for manager, i, port in manager_configs
+    ]
+    started_managers = await asyncio.gather(*start_tasks)
+
+    # Filter out None values (failed starts) and add to managers list
+    managers = [m for m in started_managers if m is not None]
+
+    if len(managers) == 0:
+        print(f"{ERROR}All browser managers failed to start. Aborting test.{RESET}")
+        return False
+
+    if len(managers) < num_browsers:
+        print(
+            f"{WARNING}Only {len(managers)} out of {num_browsers} browser managers started successfully{RESET}"
+        )
+
+    # Create pages for each browser
+    for i, manager in enumerate(managers):
+        try:
+            pages = await manager.get_pages(CrawlerRunConfig(), count=pages_per_browser)
+            all_pages.extend(pages)
+        except Exception as e:
+            print(f"{ERROR}Failed to create pages for browser {i + 1}: {str(e)}{RESET}")
+
+    # Check memory after page creation
+    gc.collect()
+    current_memory = process.memory_info().rss / 1024 / 1024
+    peak_memory = max(peak_memory, current_memory)
+
+    # Ask for confirmation before loading
+    confirmation = input(
+        f"{WARNING}Do you want to proceed with loading pages? (y/n): {RESET}"
+    )
+    # Step 1: Create and start multiple browser managers in parallel
+    start_time = time.time()
+    
+    if confirmation.lower() == "y":
+        load_start_time = time.time()
+
+        # Function to load a single page
+        async def load_page(page_ctx, index):
+            page, _ = page_ctx
+            try:
+                await page.goto(f"https://example.com/page{index}", timeout=30000)
+                title = await page.title()
+                return title
+            except Exception as e:
+                return f"Error: {str(e)}"
+
+        # Load all pages concurrently
+        load_tasks = [load_page(page_ctx, i) for i, page_ctx in enumerate(all_pages)]
+        load_results = await asyncio.gather(*load_tasks, return_exceptions=True)
+
+        # Count successes and failures
+        successes = sum(
+            1 for r in load_results if isinstance(r, str) and not r.startswith("Error")
+        )
+        failures = len(load_results) - successes
+
+        load_time = time.time() - load_start_time
+        total_test_time = time.time() - start_time
+
+        # Check memory after loading (peak memory)
+        gc.collect()
+        current_memory = process.memory_info().rss / 1024 / 1024
+        peak_memory = max(peak_memory, current_memory)
+
+        # Calculate key metrics
+        memory_per_page = peak_memory / successes if successes > 0 else 0
+        time_per_crawl = total_test_time / successes if successes > 0 else 0
+        crawls_per_second = successes / total_test_time if total_test_time > 0 else 0
+        crawls_per_minute = crawls_per_second * 60
+        crawls_per_hour = crawls_per_minute * 60
+
+        # Print simplified performance summary
+        from rich.console import Console
+        from rich.table import Table
+
+        console = Console()
+
+        # Create a simple summary table
+        table = Table(title="CRAWL4AI PERFORMANCE SUMMARY")
+
+        table.add_column("Metric", style="cyan")
+        table.add_column("Value", style="green")
+
+        table.add_row("Total Crawls Completed", f"{successes}")
+        table.add_row("Total Time", f"{total_test_time:.2f} seconds")
+        table.add_row("Time Per Crawl", f"{time_per_crawl:.2f} seconds")
+        table.add_row("Crawling Speed", f"{crawls_per_second:.2f} crawls/second")
+        table.add_row("Projected Rate (1 minute)", f"{crawls_per_minute:.0f} crawls")
+        table.add_row("Projected Rate (1 hour)", f"{crawls_per_hour:.0f} crawls")
+        table.add_row("Peak Memory Usage", f"{peak_memory:.2f} MB")
+        table.add_row("Memory Per Crawl", f"{memory_per_page:.2f} MB")
+
+        # Display the table
+        console.print(table)
+
+    # Ask confirmation before cleanup
+    confirmation = input(
+        f"{WARNING}Do you want to proceed with cleanup? (y/n): {RESET}"
+    )
+    if confirmation.lower() != "y":
+        print(f"{WARNING}Cleanup aborted by user{RESET}")
+        return False
+
+    # Close all pages
+    for page, _ in all_pages:
+        try:
+            await page.close()
+        except:
+            pass
+
+    # Close all managers
+    for manager in managers:
+        try:
+            await manager.close()
+        except:
+            pass
+
+    # Remove the temp directory
+    import shutil
+
+    if os.path.exists(temp_dir):
+        shutil.rmtree(temp_dir)
+
+    return True
+
+
+
+async def main():
+    """Run all tests"""
+    try:
+        print(f"{INFO}Starting builtin browser tests with browser module{RESET}")
+
+        # # Run browser creation test
+        # manager, cdp_url = await test_builtin_browser_creation()
+        # if not manager:
+        #     print(f"{ERROR}Browser creation failed, cannot continue tests{RESET}")
+        #     return
+
+        # # Run page operations test
+        # await test_page_operations(manager)
+
+        # # Run browser status and management test
+        # await test_browser_status_management(manager)
+
+        # # Close manager before multiple manager test
+        # await manager.close()
+
+        # Run multiple managers test
+        await test_multiple_managers()
+
+        # Run performance scaling test
+        await test_performance_scaling()
+
+        # Run cleanup test
+        await cleanup_browsers()
+
+        # Run edge cases test
+        await test_edge_cases()
+
+        print(f"\n{SUCCESS}All tests completed!{RESET}")
+
+    except Exception as e:
+        print(f"\n{ERROR}Test failed with error: {str(e)}{RESET}")
+        import traceback
+
+        traceback.print_exc()
+    finally:
+        # Clean up: kill any remaining builtin browsers
+        await cleanup_browsers()
+        print(f"{SUCCESS}Test cleanup complete{RESET}")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/tests/browser/test_builtin_strategy.py
+++ b/tests/browser/test_builtin_strategy.py
@@ -0,0 +1,160 @@
+"""Test examples for BuiltinBrowserStrategy.
+
+These examples demonstrate the functionality of BuiltinBrowserStrategy
+and serve as functional tests.
+"""
+
+import asyncio
+import os
+import sys
+
+# Add the project root to Python path if running directly
+if __name__ == "__main__":
+    sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))
+
+from crawl4ai.browser import BrowserManager
+from crawl4ai.async_configs import BrowserConfig, CrawlerRunConfig
+from crawl4ai.async_logger import AsyncLogger
+
+# Create a logger for clear terminal output
+logger = AsyncLogger(verbose=True, log_file=None)
+
+async def test_builtin_browser():
+    """Test using a builtin browser that persists between sessions."""
+    logger.info("Testing builtin browser", tag="TEST")
+    
+    browser_config = BrowserConfig(
+        browser_mode="builtin",
+        headless=True
+    )
+    
+    manager = BrowserManager(browser_config=browser_config, logger=logger)
+    
+    try:
+        # Start should connect to existing builtin browser or create one
+        await manager.start()
+        logger.info("Connected to builtin browser", tag="TEST")
+        
+        # Test page creation
+        crawler_config = CrawlerRunConfig()
+        page, context = await manager.get_page(crawler_config)
+        
+        # Test navigation
+        await page.goto("https://example.com")
+        title = await page.title()
+        logger.info(f"Page title: {title}", tag="TEST")
+        
+        # Close manager (should not close the builtin browser)
+        await manager.close()
+        logger.info("First session closed", tag="TEST")
+        
+        # Create a second manager to verify browser persistence
+        logger.info("Creating second session to verify persistence", tag="TEST")
+        manager2 = BrowserManager(browser_config=browser_config, logger=logger)
+        
+        await manager2.start()
+        logger.info("Connected to existing builtin browser", tag="TEST")
+        
+        page2, context2 = await manager2.get_page(crawler_config)
+        await page2.goto("https://example.org")
+        title2 = await page2.title()
+        logger.info(f"Second session page title: {title2}", tag="TEST")
+        
+        await manager2.close()
+        logger.info("Second session closed successfully", tag="TEST")
+        
+        return True
+    except Exception as e:
+        logger.error(f"Test failed: {str(e)}", tag="TEST")
+        try:
+            await manager.close()
+        except:
+            pass
+        return False
+
+async def test_builtin_browser_status():
+    """Test getting status of the builtin browser."""
+    logger.info("Testing builtin browser status", tag="TEST")
+    
+    from crawl4ai.browser.strategies import BuiltinBrowserStrategy
+    
+    browser_config = BrowserConfig(
+        browser_mode="builtin",
+        headless=True
+    )
+    
+    # Create strategy directly to access its status methods
+    strategy = BuiltinBrowserStrategy(browser_config, logger)
+    
+    try:
+        # Get status before starting (should be not running)
+        status_before = await strategy.get_builtin_browser_status()
+        logger.info(f"Initial status: {status_before}", tag="TEST")
+        
+        # Start the browser
+        await strategy.start()
+        logger.info("Browser started successfully", tag="TEST")
+        
+        # Get status after starting
+        status_after = await strategy.get_builtin_browser_status()
+        logger.info(f"Status after start: {status_after}", tag="TEST")
+        
+        # Create a page to verify functionality
+        crawler_config = CrawlerRunConfig()
+        page, context = await strategy.get_page(crawler_config)
+        await page.goto("https://example.com")
+        title = await page.title()
+        logger.info(f"Page title: {title}", tag="TEST")
+        
+        # Close strategy (should not kill the builtin browser)
+        await strategy.close()
+        logger.info("Strategy closed successfully", tag="TEST")
+        
+        # Create a new strategy object
+        strategy2 = BuiltinBrowserStrategy(browser_config, logger)
+        
+        # Get status again (should still be running)
+        status_final = await strategy2.get_builtin_browser_status()
+        logger.info(f"Final status: {status_final}", tag="TEST")
+        
+        # Verify that the status shows the browser is running
+        is_running = status_final.get('running', False)
+        logger.info(f"Builtin browser persistence confirmed: {is_running}", tag="TEST")
+        
+        # Kill the builtin browser to clean up
+        logger.info("Killing builtin browser", tag="TEST")
+        success = await strategy2.kill_builtin_browser()
+        logger.info(f"Killed builtin browser successfully: {success}", tag="TEST")
+        
+        return is_running and success
+    except Exception as e:
+        logger.error(f"Test failed: {str(e)}", tag="TEST")
+        try:
+            await strategy.close()
+            
+            # Try to kill the builtin browser to clean up
+            strategy2 = BuiltinBrowserStrategy(browser_config, logger)
+            await strategy2.kill_builtin_browser()
+        except:
+            pass
+        return False
+
+async def run_tests():
+    """Run all tests sequentially."""
+    results = []
+    
+    results.append(await test_builtin_browser())
+    results.append(await test_builtin_browser_status())
+    
+    # Print summary
+    total = len(results)
+    passed = sum(results)
+    logger.info(f"Tests complete: {passed}/{total} passed", tag="SUMMARY")
+    
+    if passed == total:
+        logger.success("All tests passed!", tag="SUMMARY")
+    else:
+        logger.error(f"{total - passed} tests failed", tag="SUMMARY")
+
+if __name__ == "__main__":
+    asyncio.run(run_tests())
--- a/tests/browser/test_cdp_strategy.py
+++ b/tests/browser/test_cdp_strategy.py
@@ -0,0 +1,228 @@
+"""Test examples for CDPBrowserStrategy.
+
+These examples demonstrate the functionality of CDPBrowserStrategy
+and serve as functional tests.
+"""
+
+import asyncio
+import os
+import sys
+
+# Add the project root to Python path if running directly
+if __name__ == "__main__":
+    sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))
+
+from crawl4ai.browser import BrowserManager
+from crawl4ai.async_configs import BrowserConfig, CrawlerRunConfig
+from crawl4ai.async_logger import AsyncLogger
+
+# Create a logger for clear terminal output
+logger = AsyncLogger(verbose=True, log_file=None)
+
+async def test_cdp_launch_connect():
+    """Test launching a browser and connecting via CDP."""
+    logger.info("Testing launch and connect via CDP", tag="TEST")
+    
+    browser_config = BrowserConfig(
+        use_managed_browser=True,
+        browser_mode="cdp",
+        headless=True
+    )
+    
+    manager = BrowserManager(browser_config=browser_config, logger=logger)
+    
+    try:
+        await manager.start()
+        logger.info("Browser launched and connected via CDP", tag="TEST")
+        
+        # Test with multiple pages
+        pages = []
+        for i in range(3):
+            crawler_config = CrawlerRunConfig()
+            page, context = await manager.get_page(crawler_config)
+            await page.goto(f"https://example.com?test={i}")
+            pages.append(page)
+            logger.info(f"Created page {i+1}", tag="TEST")
+        
+        # Verify all pages are working
+        for i, page in enumerate(pages):
+            title = await page.title()
+            logger.info(f"Page {i+1} title: {title}", tag="TEST")
+        
+        await manager.close()
+        logger.info("Browser closed successfully", tag="TEST")
+        
+        return True
+    except Exception as e:
+        logger.error(f"Test failed: {str(e)}", tag="TEST")
+        try:
+            await manager.close()
+        except:
+            pass
+        return False
+
+async def test_cdp_with_user_data_dir():
+    """Test CDP browser with a user data directory."""
+    logger.info("Testing CDP browser with user data directory", tag="TEST")
+    
+    # Create a temporary user data directory
+    import tempfile
+    user_data_dir = tempfile.mkdtemp(prefix="crawl4ai-test-")
+    logger.info(f"Created temporary user data directory: {user_data_dir}", tag="TEST")
+    
+    browser_config = BrowserConfig(
+        headless=True,
+        browser_mode="cdp",
+        user_data_dir=user_data_dir
+    )
+    
+    manager = BrowserManager(browser_config=browser_config, logger=logger)
+    
+    try:
+        await manager.start()
+        logger.info("Browser launched with user data directory", tag="TEST")
+        
+        # Navigate to a page and store some data
+        crawler_config = CrawlerRunConfig()
+        page, context = await manager.get_page(crawler_config)
+        
+        # Set a cookie
+        await context.add_cookies([{
+            "name": "test_cookie",
+            "value": "test_value",
+            "url": "https://example.com"
+        }])
+        
+        # Visit the site
+        await page.goto("https://example.com")
+        
+        # Verify cookie was set
+        cookies = await context.cookies(["https://example.com"])
+        has_test_cookie = any(cookie["name"] == "test_cookie" for cookie in cookies)
+        logger.info(f"Cookie set successfully: {has_test_cookie}", tag="TEST")
+        
+        # Close the browser
+        await manager.close()
+        logger.info("First browser session closed", tag="TEST")
+        
+        # Start a new browser with the same user data directory
+        logger.info("Starting second browser session with same user data directory", tag="TEST")
+        manager2 = BrowserManager(browser_config=browser_config, logger=logger)
+        await manager2.start()
+        
+        # Get a new page and check if the cookie persists
+        page2, context2 = await manager2.get_page(crawler_config)
+        await page2.goto("https://example.com")
+        
+        # Verify cookie persisted
+        cookies2 = await context2.cookies(["https://example.com"])
+        has_test_cookie2 = any(cookie["name"] == "test_cookie" for cookie in cookies2)
+        logger.info(f"Cookie persisted across sessions: {has_test_cookie2}", tag="TEST")
+        
+        # Clean up
+        await manager2.close()
+        
+        # Remove temporary directory
+        import shutil
+        shutil.rmtree(user_data_dir, ignore_errors=True)
+        logger.info(f"Removed temporary user data directory", tag="TEST")
+        
+        return has_test_cookie and has_test_cookie2
+    except Exception as e:
+        logger.error(f"Test failed: {str(e)}", tag="TEST")
+        try:
+            await manager.close()
+        except:
+            pass
+            
+        # Clean up temporary directory
+        try:
+            import shutil
+            shutil.rmtree(user_data_dir, ignore_errors=True)
+        except:
+            pass
+            
+        return False
+
+async def test_cdp_session_management():
+    """Test session management with CDP browser."""
+    logger.info("Testing session management with CDP browser", tag="TEST")
+    
+    browser_config = BrowserConfig(
+        use_managed_browser=True,
+        headless=True
+    )
+    
+    manager = BrowserManager(browser_config=browser_config, logger=logger)
+    
+    try:
+        await manager.start()
+        logger.info("Browser launched successfully", tag="TEST")
+        
+        # Create two sessions
+        session1_id = "test_session_1"
+        session2_id = "test_session_2"
+        
+        # Set up first session
+        crawler_config1 = CrawlerRunConfig(session_id=session1_id)
+        page1, context1 = await manager.get_page(crawler_config1)
+        await page1.goto("https://example.com")
+        await page1.evaluate("localStorage.setItem('session1_data', 'test_value')")
+        logger.info(f"Set up session 1 with ID: {session1_id}", tag="TEST")
+        
+        # Set up second session
+        crawler_config2 = CrawlerRunConfig(session_id=session2_id)
+        page2, context2 = await manager.get_page(crawler_config2)
+        await page2.goto("https://example.org")
+        await page2.evaluate("localStorage.setItem('session2_data', 'test_value2')")
+        logger.info(f"Set up session 2 with ID: {session2_id}", tag="TEST")
+        
+        # Get first session again
+        page1_again, _ = await manager.get_page(crawler_config1)
+        
+        # Verify it's the same page and data persists
+        is_same_page = page1 == page1_again
+        data1 = await page1_again.evaluate("localStorage.getItem('session1_data')")
+        logger.info(f"Session 1 reuse successful: {is_same_page}, data: {data1}", tag="TEST")
+        
+        # Kill first session
+        await manager.kill_session(session1_id)
+        logger.info(f"Killed session 1", tag="TEST")
+        
+        # Verify second session still works
+        data2 = await page2.evaluate("localStorage.getItem('session2_data')")
+        logger.info(f"Session 2 still functional after killing session 1, data: {data2}", tag="TEST")
+        
+        # Clean up
+        await manager.close()
+        logger.info("Browser closed successfully", tag="TEST")
+        
+        return is_same_page and data1 == "test_value" and data2 == "test_value2"
+    except Exception as e:
+        logger.error(f"Test failed: {str(e)}", tag="TEST")
+        try:
+            await manager.close()
+        except:
+            pass
+        return False
+
+async def run_tests():
+    """Run all tests sequentially."""
+    results = []
+    
+    # results.append(await test_cdp_launch_connect())
+    results.append(await test_cdp_with_user_data_dir())
+    results.append(await test_cdp_session_management())
+    
+    # Print summary
+    total = len(results)
+    passed = sum(results)
+    logger.info(f"Tests complete: {passed}/{total} passed", tag="SUMMARY")
+    
+    if passed == total:
+        logger.success("All tests passed!", tag="SUMMARY")
+    else:
+        logger.error(f"{total - passed} tests failed", tag="SUMMARY")
+
+if __name__ == "__main__":
+    asyncio.run(run_tests())
--- a/tests/browser/test_combined.py
+++ b/tests/browser/test_combined.py
@@ -0,0 +1,77 @@
+"""Combined test runner for all browser module tests.
+
+This script runs all the browser module tests in sequence and
+provides a comprehensive summary.
+"""
+
+import asyncio
+import os
+import sys
+import time
+
+# Add the project root to Python path if running directly
+if __name__ == "__main__":
+    sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))
+
+from crawl4ai.async_logger import AsyncLogger
+
+# Create a logger for clear terminal output
+logger = AsyncLogger(verbose=True, log_file=None)
+
+async def run_test_module(module_name, header):
+    """Run all tests in a module and return results."""
+    logger.info(f"\n{'-'*30}", tag="TEST")
+    logger.info(f"RUNNING: {header}", tag="TEST")
+    logger.info(f"{'-'*30}", tag="TEST")
+    
+    # Import the module dynamically
+    module = __import__(f"tests.browser.{module_name}", fromlist=["run_tests"])
+    
+    # Track time for performance measurement
+    start_time = time.time()
+    
+    # Run the tests
+    await module.run_tests()
+    
+    # Calculate time taken
+    time_taken = time.time() - start_time
+    logger.info(f"Time taken: {time_taken:.2f} seconds", tag="TIMING")
+    
+    return time_taken
+
+async def main():
+    """Run all test modules."""
+    logger.info("STARTING COMPREHENSIVE BROWSER MODULE TESTS", tag="MAIN")
+    
+    # List of test modules to run
+    test_modules = [
+        ("test_browser_manager", "Browser Manager Tests"),
+        ("test_playwright_strategy", "Playwright Strategy Tests"),
+        ("test_cdp_strategy", "CDP Strategy Tests"),
+        ("test_builtin_strategy", "Builtin Browser Strategy Tests"),
+        ("test_profiles", "Profile Management Tests")
+    ]
+    
+    # Run each test module
+    timings = {}
+    for module_name, header in test_modules:
+        try:
+            time_taken = await run_test_module(module_name, header)
+            timings[module_name] = time_taken
+        except Exception as e:
+            logger.error(f"Error running {module_name}: {str(e)}", tag="ERROR")
+    
+    # Print summary
+    logger.info("\n\nTEST SUMMARY:", tag="SUMMARY")
+    logger.info(f"{'-'*50}", tag="SUMMARY")
+    for module_name, header in test_modules:
+        if module_name in timings:
+            logger.info(f"{header}: {timings[module_name]:.2f} seconds", tag="SUMMARY")
+        else:
+            logger.error(f"{header}: FAILED TO RUN", tag="SUMMARY")
+    logger.info(f"{'-'*50}", tag="SUMMARY")
+    total_time = sum(timings.values())
+    logger.info(f"Total time: {total_time:.2f} seconds", tag="SUMMARY")
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/tests/browser/test_launch_standalone.py
+++ b/tests/browser/test_launch_standalone.py
@@ -0,0 +1,17 @@
+from crawl4ai.browser_profiler import BrowserProfiler
+import asyncio
+
+
+if __name__ == "__main__":
+    # Test launching a standalone browser
+    async def test_standalone_browser():
+        profiler = BrowserProfiler()
+        cdp_url = await profiler.launch_standalone_browser(
+            browser_type="chromium",
+            user_data_dir="~/.crawl4ai/browser_profile/test-browser-data",
+            debugging_port=9222,
+            headless=False
+        )
+        print(f"CDP URL: {cdp_url}")
+
+    asyncio.run(test_standalone_browser())
--- a/tests/browser/test_parallel_crawling.py
+++ b/tests/browser/test_parallel_crawling.py
@@ -0,0 +1,902 @@
+"""
+Test examples for parallel crawling with the browser module.
+
+These examples demonstrate the functionality of parallel page creation
+and serve as functional tests for multi-page crawling performance.
+"""
+
+import asyncio
+import os
+import sys
+import time
+from typing import List
+
+# Add the project root to Python path if running directly
+if __name__ == "__main__":
+    sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))
+
+from crawl4ai.browser import BrowserManager
+from crawl4ai.async_configs import BrowserConfig, CrawlerRunConfig
+from crawl4ai.async_logger import AsyncLogger
+
+# Create a logger for clear terminal output
+logger = AsyncLogger(verbose=True, log_file=None)
+
+async def test_get_pages_basic():
+    """Test basic functionality of get_pages method."""
+    logger.info("Testing basic get_pages functionality", tag="TEST")
+    
+    browser_config = BrowserConfig(headless=True)
+    manager = BrowserManager(browser_config=browser_config, logger=logger)
+    
+    try:
+        await manager.start()
+        
+        # Request 3 pages
+        crawler_config = CrawlerRunConfig()
+        pages = await manager.get_pages(crawler_config, count=3)
+        
+        # Verify we got the correct number of pages
+        assert len(pages) == 3, f"Expected 3 pages, got {len(pages)}"
+        
+        # Verify each page is valid
+        for i, (page, context) in enumerate(pages):
+            await page.goto("https://example.com")
+            title = await page.title()
+            logger.info(f"Page {i+1} title: {title}", tag="TEST")
+            assert title, f"Page {i+1} has no title"
+        
+        await manager.close()
+        logger.success("Basic get_pages test completed successfully", tag="TEST")
+        return True
+    except Exception as e:
+        logger.error(f"Test failed: {str(e)}", tag="TEST")
+        try:
+            await manager.close()
+        except:
+            pass
+        return False
+
+async def test_parallel_approaches_comparison():
+    """Compare two parallel crawling approaches:
+    1. Create a page for each URL on-demand (get_page + gather)
+    2. Get all pages upfront with get_pages, then use them (get_pages + gather)
+    """
+    logger.info("Comparing different parallel crawling approaches", tag="TEST")
+    
+    urls = [
+        "https://example.com/page1",
+        "https://crawl4ai.com",
+        "https://kidocode.com",
+        "https://bbc.com",
+        # "https://example.com/page1",
+        # "https://example.com/page2",
+        # "https://example.com/page3",
+        # "https://example.com/page4",
+    ]
+    
+    browser_config = BrowserConfig(headless=False)
+    manager = BrowserManager(browser_config=browser_config, logger=logger)
+    
+    try:
+        await manager.start()
+        
+        # Approach 1: Create a page for each URL on-demand and run in parallel
+        logger.info("Testing approach 1: get_page for each URL + gather", tag="TEST")
+        start_time = time.time()
+        
+        async def fetch_title_approach1(url):
+            """Create a new page for each URL, go to the URL, and get title"""
+            crawler_config = CrawlerRunConfig(url=url)
+            page, context = await manager.get_page(crawler_config)
+            try:
+                await page.goto(url)
+                title = await page.title()
+                return title
+            finally:
+                await page.close()
+        
+        # Run fetch_title_approach1 for each URL in parallel
+        tasks = [fetch_title_approach1(url) for url in urls]
+        approach1_results = await asyncio.gather(*tasks)
+        
+        approach1_time = time.time() - start_time
+        logger.info(f"Approach 1 time (get_page + gather): {approach1_time:.2f}s", tag="TEST")
+        
+        # Approach 2: Get all pages upfront with get_pages, then use them in parallel
+        logger.info("Testing approach 2: get_pages upfront + gather", tag="TEST")
+        start_time = time.time()
+        
+        # Get all pages upfront
+        crawler_config = CrawlerRunConfig()
+        pages = await manager.get_pages(crawler_config, count=len(urls))
+        
+        async def fetch_title_approach2(page_ctx, url):
+            """Use a pre-created page to go to URL and get title"""
+            page, _ = page_ctx
+            try:
+                await page.goto(url)
+                title = await page.title()
+                return title
+            finally:
+                await page.close()
+        
+        # Use the pre-created pages to fetch titles in parallel
+        tasks = [fetch_title_approach2(page_ctx, url) for page_ctx, url in zip(pages, urls)]
+        approach2_results = await asyncio.gather(*tasks)
+        
+        approach2_time = time.time() - start_time
+        logger.info(f"Approach 2 time (get_pages + gather): {approach2_time:.2f}s", tag="TEST")
+        
+        # Compare results and performance
+        speedup = approach1_time / approach2_time if approach2_time > 0 else 0
+        if speedup > 1:
+            logger.success(f"Approach 2 (get_pages upfront) was {speedup:.2f}x faster", tag="TEST")
+        else:
+            logger.info(f"Approach 1 (get_page + gather) was {1/speedup:.2f}x faster", tag="TEST")
+        
+        # Verify same content was retrieved in both approaches
+        assert len(approach1_results) == len(approach2_results), "Result count mismatch"
+        
+        # Sort results for comparison since parallel execution might complete in different order
+        assert sorted(approach1_results) == sorted(approach2_results), "Results content mismatch"
+        
+        await manager.close()
+        return True
+        
+    except Exception as e:
+        logger.error(f"Test failed: {str(e)}", tag="TEST")
+        try:
+            await manager.close()
+        except:
+            pass
+        return False
+
+async def test_multi_browser_scaling(num_browsers=3, pages_per_browser=5):
+    """Test performance with multiple browsers and pages per browser.
+    Compares two approaches:
+    1. On-demand page creation (get_page + gather)
+    2. Pre-created pages (get_pages + gather)
+    """
+    logger.info(f"Testing multi-browser scaling with {num_browsers} browsers × {pages_per_browser} pages", tag="TEST")
+    
+    # Generate test URLs
+    total_pages = num_browsers * pages_per_browser
+    urls = [f"https://example.com/page_{i}" for i in range(total_pages)]
+    
+    # Create browser managers
+    managers = []
+    base_port = 9222
+    
+    try:
+        # Start all browsers in parallel
+        start_tasks = []
+        for i in range(num_browsers):
+            browser_config = BrowserConfig(
+                headless=True  # Using default browser mode like in test_parallel_approaches_comparison
+            )
+            manager = BrowserManager(browser_config=browser_config, logger=logger)
+            start_tasks.append(manager.start())
+            managers.append(manager)
+        
+        await asyncio.gather(*start_tasks)
+        
+        # Distribute URLs among managers
+        urls_per_manager = {}
+        for i, manager in enumerate(managers):
+            start_idx = i * pages_per_browser
+            end_idx = min(start_idx + pages_per_browser, len(urls))
+            urls_per_manager[manager] = urls[start_idx:end_idx]
+        
+        # Approach 1: Create a page for each URL on-demand and run in parallel
+        logger.info("Testing approach 1: get_page for each URL + gather", tag="TEST")
+        start_time = time.time()
+        
+        async def fetch_title_approach1(manager, url):
+            """Create a new page for the URL, go to the URL, and get title"""
+            crawler_config = CrawlerRunConfig(url=url)
+            page, context = await manager.get_page(crawler_config)
+            try:
+                await page.goto(url)
+                title = await page.title()
+                return title
+            finally:
+                await page.close()
+        
+        # Run fetch_title_approach1 for each URL in parallel
+        tasks = []
+        for manager, manager_urls in urls_per_manager.items():
+            for url in manager_urls:
+                tasks.append(fetch_title_approach1(manager, url))
+        
+        approach1_results = await asyncio.gather(*tasks)
+        
+        approach1_time = time.time() - start_time
+        logger.info(f"Approach 1 time (get_page + gather): {approach1_time:.2f}s", tag="TEST")
+        
+        # Approach 2: Get all pages upfront with get_pages, then use them in parallel
+        logger.info("Testing approach 2: get_pages upfront + gather", tag="TEST")
+        start_time = time.time()
+        
+        # Get all pages upfront for each manager
+        all_pages = []
+        for manager, manager_urls in urls_per_manager.items():
+            crawler_config = CrawlerRunConfig()
+            pages = await manager.get_pages(crawler_config, count=len(manager_urls))
+            all_pages.extend(zip(pages, manager_urls))
+        
+        async def fetch_title_approach2(page_ctx, url):
+            """Use a pre-created page to go to URL and get title"""
+            page, _ = page_ctx
+            try:
+                await page.goto(url)
+                title = await page.title()
+                return title
+            finally:
+                await page.close()
+        
+        # Use the pre-created pages to fetch titles in parallel
+        tasks = [fetch_title_approach2(page_ctx, url) for page_ctx, url in all_pages]
+        approach2_results = await asyncio.gather(*tasks)
+        
+        approach2_time = time.time() - start_time
+        logger.info(f"Approach 2 time (get_pages + gather): {approach2_time:.2f}s", tag="TEST")
+        
+        # Compare results and performance
+        speedup = approach1_time / approach2_time if approach2_time > 0 else 0
+        pages_per_second = total_pages / approach2_time
+        
+        # Show a simple summary
+        logger.info(f"📊 Summary: {num_browsers} browsers × {pages_per_browser} pages = {total_pages} total crawls", tag="TEST")
+        logger.info(f"⚡ Performance: {pages_per_second:.1f} pages/second ({pages_per_second*60:.0f} pages/minute)", tag="TEST")
+        logger.info(f"🚀 Total crawl time: {approach2_time:.2f} seconds", tag="TEST")
+        
+        if speedup > 1:
+            logger.success(f"✅ Approach 2 (get_pages upfront) was {speedup:.2f}x faster", tag="TEST")
+        else:
+            logger.info(f"✅ Approach 1 (get_page + gather) was {1/speedup:.2f}x faster", tag="TEST")
+        
+        # Close all managers
+        for manager in managers:
+            await manager.close()
+        
+        return True
+    
+    except Exception as e:
+        logger.error(f"Test failed: {str(e)}", tag="TEST")
+        # Clean up
+        for manager in managers:
+            try:
+                await manager.close()
+            except:
+                pass
+        return False
+
+async def grid_search_optimal_configuration(total_urls=50):
+    """Perform a grid search to find the optimal balance between number of browsers and pages per browser.
+    
+    This function tests different combinations of browser count and pages per browser,
+    while keeping the total number of URLs constant. It measures performance metrics
+    for each configuration to find the "sweet spot" that provides the best speed 
+    with reasonable memory usage.
+    
+    Args:
+        total_urls: Total number of URLs to crawl (default: 50)
+    """
+    logger.info(f"=== GRID SEARCH FOR OPTIMAL CRAWLING CONFIGURATION ({total_urls} URLs) ===", tag="TEST")
+    
+    # Generate test URLs once
+    urls = [f"https://example.com/page_{i}" for i in range(total_urls)]
+    
+    # Define grid search configurations
+    # We'll use more flexible approach: test all browser counts from 1 to min(20, total_urls)
+    # and distribute pages evenly (some browsers may have 1 more page than others)
+    configurations = []
+    
+    # Maximum number of browsers to test
+    max_browsers_to_test = min(20, total_urls)
+    
+    # Try configurations with 1 to max_browsers_to_test browsers
+    for num_browsers in range(1, max_browsers_to_test + 1):
+        base_pages_per_browser = total_urls // num_browsers
+        remainder = total_urls % num_browsers
+        
+        # Generate exact page distribution array
+        if remainder > 0:
+            # First 'remainder' browsers get one more page
+            page_distribution = [base_pages_per_browser + 1] * remainder + [base_pages_per_browser] * (num_browsers - remainder)
+            pages_distribution = f"{base_pages_per_browser+1} pages × {remainder} browsers, {base_pages_per_browser} pages × {num_browsers - remainder} browsers"
+        else:
+            # All browsers get the same number of pages
+            page_distribution = [base_pages_per_browser] * num_browsers
+            pages_distribution = f"{base_pages_per_browser} pages × {num_browsers} browsers"
+        
+        # Format the distribution as a tuple string like (4, 4, 3, 3)
+        distribution_str = str(tuple(page_distribution))
+            
+        configurations.append((num_browsers, base_pages_per_browser, pages_distribution, page_distribution, distribution_str))
+    
+    # Track results
+    results = []
+    
+    # Test each configuration
+    for num_browsers, pages_per_browser, pages_distribution, page_distribution, distribution_str in configurations:
+        logger.info("-" * 80, tag="TEST")
+        logger.info(f"Testing configuration: {num_browsers} browsers with distribution: {distribution_str}", tag="TEST")
+        logger.info(f"Details: {pages_distribution}", tag="TEST")
+        # Sleep a bit for randomness
+        await asyncio.sleep(0.5)
+        
+        try:
+            # Import psutil for memory tracking
+            try:
+                import psutil
+                process = psutil.Process()
+                initial_memory = process.memory_info().rss / (1024 * 1024)  # MB
+            except ImportError:
+                logger.warning("psutil not available, memory metrics will not be tracked", tag="TEST")
+                initial_memory = 0
+            
+            # Create and start browser managers
+            managers = []
+            start_time = time.time()
+            
+            # Start all browsers in parallel
+            start_tasks = []
+            for i in range(num_browsers):
+                browser_config = BrowserConfig(
+                    headless=True
+                )
+                manager = BrowserManager(browser_config=browser_config, logger=logger)
+                start_tasks.append(manager.start())
+                managers.append(manager)
+            
+            await asyncio.gather(*start_tasks)
+            browser_startup_time = time.time() - start_time
+            
+            # Measure memory after browser startup
+            if initial_memory > 0:
+                browser_memory = process.memory_info().rss / (1024 * 1024) - initial_memory
+            else:
+                browser_memory = 0
+            
+            # Distribute URLs among managers using the exact page distribution
+            urls_per_manager = {}
+            total_assigned = 0
+            
+            for i, manager in enumerate(managers):
+                if i < len(page_distribution):
+                    # Get the exact number of pages for this browser from our distribution
+                    manager_pages = page_distribution[i]
+                    
+                    # Get the URL slice for this manager
+                    start_idx = total_assigned
+                    end_idx = start_idx + manager_pages
+                    urls_per_manager[manager] = urls[start_idx:end_idx]
+                    total_assigned += manager_pages
+                else:
+                    # If we have more managers than our distribution (should never happen)
+                    urls_per_manager[manager] = []
+            
+            # Use the more efficient approach (pre-created pages)
+            logger.info("Running page crawling test...", tag="TEST")
+            crawl_start_time = time.time()
+            
+            # Get all pages upfront for each manager
+            all_pages = []
+            for manager, manager_urls in urls_per_manager.items():
+                if not manager_urls:  # Skip managers with no URLs
+                    continue
+                crawler_config = CrawlerRunConfig()
+                pages = await manager.get_pages(crawler_config, count=len(manager_urls))
+                all_pages.extend(zip(pages, manager_urls))
+            
+            # Measure memory after page creation
+            if initial_memory > 0:
+                pages_memory = process.memory_info().rss / (1024 * 1024) - browser_memory - initial_memory
+            else:
+                pages_memory = 0
+                
+            # Function to crawl a URL with a pre-created page
+            async def fetch_title(page_ctx, url):
+                page, _ = page_ctx
+                try:
+                    await page.goto(url)
+                    title = await page.title()
+                    return title
+                finally:
+                    await page.close()
+            
+            # Use the pre-created pages to fetch titles in parallel
+            tasks = [fetch_title(page_ctx, url) for page_ctx, url in all_pages]
+            crawl_results = await asyncio.gather(*tasks)
+            
+            crawl_time = time.time() - crawl_start_time
+            total_time = time.time() - start_time
+            
+            # Final memory measurement
+            if initial_memory > 0:
+                peak_memory = max(browser_memory + pages_memory, process.memory_info().rss / (1024 * 1024) - initial_memory)
+            else:
+                peak_memory = 0
+                
+            # Close all managers
+            for manager in managers:
+                await manager.close()
+                
+            # Calculate metrics
+            pages_per_second = total_urls / crawl_time
+            
+            # Store result metrics
+            result = {
+                "num_browsers": num_browsers,
+                "pages_per_browser": pages_per_browser,
+                "page_distribution": page_distribution,
+                "distribution_str": distribution_str,
+                "total_urls": total_urls,
+                "browser_startup_time": browser_startup_time,
+                "crawl_time": crawl_time,
+                "total_time": total_time,
+                "browser_memory": browser_memory,
+                "pages_memory": pages_memory,
+                "peak_memory": peak_memory,
+                "pages_per_second": pages_per_second,
+                # Calculate efficiency score (higher is better)
+                # This balances speed vs memory usage
+                "efficiency_score": pages_per_second / (peak_memory + 1) if peak_memory > 0 else pages_per_second,
+            }
+            
+            results.append(result)
+            
+            # Log the results
+            logger.info(f"Browser startup: {browser_startup_time:.2f}s", tag="TEST")
+            logger.info(f"Crawl time: {crawl_time:.2f}s", tag="TEST")
+            logger.info(f"Total time: {total_time:.2f}s", tag="TEST")
+            logger.info(f"Performance: {pages_per_second:.1f} pages/second", tag="TEST")
+            
+            if peak_memory > 0:
+                logger.info(f"Browser memory: {browser_memory:.1f}MB", tag="TEST")
+                logger.info(f"Pages memory: {pages_memory:.1f}MB", tag="TEST")
+                logger.info(f"Peak memory: {peak_memory:.1f}MB", tag="TEST")
+                logger.info(f"Efficiency score: {result['efficiency_score']:.6f}", tag="TEST")
+                
+        except Exception as e:
+            logger.error(f"Error testing configuration: {str(e)}", tag="TEST")
+            import traceback
+            traceback.print_exc()
+            
+            # Clean up
+            for manager in managers:
+                try:
+                    await manager.close()
+                except:
+                    pass
+                    
+    # Print summary of all configurations
+    logger.info("=" * 100, tag="TEST")
+    logger.info("GRID SEARCH RESULTS SUMMARY", tag="TEST")
+    logger.info("=" * 100, tag="TEST")
+    
+    # Rank configurations by efficiency score
+    ranked_results = sorted(results, key=lambda x: x["efficiency_score"], reverse=True)
+    
+    # Also determine rankings by different metrics
+    fastest = sorted(results, key=lambda x: x["crawl_time"])[0]
+    lowest_memory = sorted(results, key=lambda x: x["peak_memory"] if x["peak_memory"] > 0 else float('inf'))[0]
+    most_efficient = ranked_results[0]
+    
+    # Print top performers by category
+    logger.info("🏆 TOP PERFORMERS BY CATEGORY:", tag="TEST")
+    logger.info(f"⚡ Fastest: {fastest['num_browsers']} browsers × ~{fastest['pages_per_browser']} pages " + 
+                f"({fastest['crawl_time']:.2f}s, {fastest['pages_per_second']:.1f} pages/s)", tag="TEST")
+    
+    if lowest_memory["peak_memory"] > 0:
+        logger.info(f"💾 Lowest memory: {lowest_memory['num_browsers']} browsers × ~{lowest_memory['pages_per_browser']} pages " +
+                    f"({lowest_memory['peak_memory']:.1f}MB)", tag="TEST")
+    
+    logger.info(f"🌟 Most efficient: {most_efficient['num_browsers']} browsers × ~{most_efficient['pages_per_browser']} pages " +
+                f"(score: {most_efficient['efficiency_score']:.6f})", tag="TEST")
+    
+    # Print result table header
+    logger.info("\n📊 COMPLETE RANKING TABLE (SORTED BY EFFICIENCY SCORE):", tag="TEST")
+    logger.info("-" * 120, tag="TEST")
+    
+    # Define table header
+    header = f"{'Rank':<5} | {'Browsers':<8} | {'Distribution':<55} | {'Total Time(s)':<12} | {'Speed(p/s)':<12} | {'Memory(MB)':<12} | {'Efficiency':<10} | {'Notes'}"
+    logger.info(header, tag="TEST")
+    logger.info("-" * 120, tag="TEST")
+    
+    # Print each configuration in ranked order
+    for rank, result in enumerate(ranked_results, 1):
+        # Add special notes for top performers
+        notes = []
+        if result == fastest:
+            notes.append("⚡ Fastest")
+        if result == lowest_memory:
+            notes.append("💾 Lowest Memory")
+        if result == most_efficient:
+            notes.append("🌟 Most Efficient")
+        
+        notes_str = " | ".join(notes) if notes else ""
+        
+        # Format memory if available
+        memory_str = f"{result['peak_memory']:.1f}" if result['peak_memory'] > 0 else "N/A"
+        
+        # Get the distribution string
+        dist_str = result.get('distribution_str', str(tuple([result['pages_per_browser']] * result['num_browsers'])))
+        
+        # Build the row
+        row = f"{rank:<5} | {result['num_browsers']:<8} | {dist_str:<55} | {result['total_time']:.2f}s{' ':<7} | "
+        row += f"{result['pages_per_second']:.2f}{' ':<6} | {memory_str}{' ':<6} | {result['efficiency_score']:.4f}{' ':<4} | {notes_str}"
+        
+        logger.info(row, tag="TEST")
+    
+    logger.info("-" * 120, tag="TEST")
+    
+    # Generate visualization if matplotlib is available
+    try:
+        import matplotlib.pyplot as plt
+        import numpy as np
+        
+        # Extract data for plotting from ranked results
+        browser_counts = [r["num_browsers"] for r in ranked_results]
+        efficiency_scores = [r["efficiency_score"] for r in ranked_results]
+        crawl_times = [r["crawl_time"] for r in ranked_results]
+        total_times = [r["total_time"] for r in ranked_results]
+        
+        # Filter results with memory data
+        memory_results = [r for r in ranked_results if r["peak_memory"] > 0]
+        memory_browser_counts = [r["num_browsers"] for r in memory_results]
+        peak_memories = [r["peak_memory"] for r in memory_results]
+        
+        # Create figure with clean design
+        plt.figure(figsize=(14, 12), facecolor='white')
+        plt.style.use('ggplot')
+        
+        # Create grid for subplots
+        gs = plt.GridSpec(3, 1, height_ratios=[1, 1, 1], hspace=0.3)
+        
+        # Plot 1: Efficiency Score (higher is better)
+        ax1 = plt.subplot(gs[0])
+        bar_colors = ['#3498db'] * len(browser_counts)
+        
+        # Highlight the most efficient
+        most_efficient_idx = browser_counts.index(most_efficient["num_browsers"])
+        bar_colors[most_efficient_idx] = '#e74c3c'  # Red for most efficient
+        
+        bars = ax1.bar(range(len(browser_counts)), efficiency_scores, color=bar_colors)
+        ax1.set_xticks(range(len(browser_counts)))
+        ax1.set_xticklabels([f"{bc}" for bc in browser_counts], rotation=45)
+        ax1.set_xlabel('Number of Browsers')
+        ax1.set_ylabel('Efficiency Score (higher is better)')
+        ax1.set_title('Browser Configuration Efficiency (higher is better)')
+        
+        # Add value labels on top of bars
+        for bar, score in zip(bars, efficiency_scores):
+            height = bar.get_height()
+            ax1.text(bar.get_x() + bar.get_width()/2., height + 0.02*max(efficiency_scores),
+                    f'{score:.3f}', ha='center', va='bottom', rotation=90, fontsize=8)
+        
+        # Highlight best configuration
+        ax1.text(0.02, 0.90, f"🌟 Most Efficient: {most_efficient['num_browsers']} browsers with ~{most_efficient['pages_per_browser']} pages",
+                transform=ax1.transAxes, fontsize=12, verticalalignment='top',
+                bbox=dict(boxstyle='round,pad=0.5', facecolor='yellow', alpha=0.3))
+        
+        # Plot 2: Time Performance
+        ax2 = plt.subplot(gs[1])
+        
+        # Plot both total time and crawl time
+        ax2.plot(browser_counts, crawl_times, 'bo-', label='Crawl Time (s)', linewidth=2)
+        ax2.plot(browser_counts, total_times, 'go--', label='Total Time (s)', linewidth=2, alpha=0.6)
+        
+        # Mark the fastest configuration
+        fastest_idx = browser_counts.index(fastest["num_browsers"])
+        ax2.plot(browser_counts[fastest_idx], crawl_times[fastest_idx], 'ro', ms=10, 
+                label=f'Fastest: {fastest["num_browsers"]} browsers')
+        
+        ax2.set_xlabel('Number of Browsers')
+        ax2.set_ylabel('Time (seconds)')
+        ax2.set_title(f'Time Performance for {total_urls} URLs by Browser Count')
+        ax2.grid(True, linestyle='--', alpha=0.7)
+        ax2.legend(loc='upper right')
+        
+        # Plot pages per second on second y-axis
+        pages_per_second = [total_urls/t for t in crawl_times]
+        ax2_twin = ax2.twinx()
+        ax2_twin.plot(browser_counts, pages_per_second, 'r^--', label='Pages/second', alpha=0.5)
+        ax2_twin.set_ylabel('Pages per second')
+        
+        # Add note about the fastest configuration
+        ax2.text(0.02, 0.90, f"⚡ Fastest: {fastest['num_browsers']} browsers with ~{fastest['pages_per_browser']} pages" +
+                f"\n   {fastest['crawl_time']:.2f}s ({fastest['pages_per_second']:.1f} pages/s)",
+                transform=ax2.transAxes, fontsize=12, verticalalignment='top',
+                bbox=dict(boxstyle='round,pad=0.5', facecolor='lightblue', alpha=0.3))
+        
+        # Plot 3: Memory Usage (if available)
+        if memory_results:
+            ax3 = plt.subplot(gs[2])
+            
+            # Prepare data for grouped bar chart
+            memory_per_browser = [m/n for m, n in zip(peak_memories, memory_browser_counts)]
+            memory_per_page = [m/(n*p) for m, n, p in zip(
+                [r["peak_memory"] for r in memory_results],
+                [r["num_browsers"] for r in memory_results],
+                [r["pages_per_browser"] for r in memory_results])]
+            
+            x = np.arange(len(memory_browser_counts))
+            width = 0.35
+            
+            # Create grouped bars
+            ax3.bar(x - width/2, peak_memories, width, label='Total Memory (MB)', color='#9b59b6')
+            ax3.bar(x + width/2, memory_per_browser, width, label='Memory per Browser (MB)', color='#3498db')
+            
+            # Configure axis
+            ax3.set_xticks(x)
+            ax3.set_xticklabels([f"{bc}" for bc in memory_browser_counts], rotation=45)
+            ax3.set_xlabel('Number of Browsers')
+            ax3.set_ylabel('Memory (MB)')
+            ax3.set_title('Memory Usage by Browser Configuration')
+            ax3.legend(loc='upper left')
+            ax3.grid(True, linestyle='--', alpha=0.7)
+            
+            # Add second y-axis for memory per page
+            ax3_twin = ax3.twinx()
+            ax3_twin.plot(x, memory_per_page, 'ro-', label='Memory per Page (MB)')
+            ax3_twin.set_ylabel('Memory per Page (MB)')
+            
+            # Get lowest memory configuration
+            lowest_memory_idx = memory_browser_counts.index(lowest_memory["num_browsers"])
+            
+            # Add note about lowest memory configuration
+            ax3.text(0.02, 0.90, f"💾 Lowest Memory: {lowest_memory['num_browsers']} browsers with ~{lowest_memory['pages_per_browser']} pages" +
+                    f"\n   {lowest_memory['peak_memory']:.1f}MB ({lowest_memory['peak_memory']/total_urls:.2f}MB per page)",
+                    transform=ax3.transAxes, fontsize=12, verticalalignment='top',
+                    bbox=dict(boxstyle='round,pad=0.5', facecolor='lightgreen', alpha=0.3))
+        
+        # Add overall title
+        plt.suptitle(f'Browser Scaling Grid Search Results for {total_urls} URLs', fontsize=16, y=0.98)
+        
+        # Add timestamp and info at the bottom
+        plt.figtext(0.5, 0.01, f"Generated by Crawl4AI at {time.strftime('%Y-%m-%d %H:%M:%S')}", 
+                   ha="center", fontsize=10, style='italic')
+        
+        # Get current directory and save the figure there
+        import os
+        __current_file = os.path.abspath(__file__)
+        current_dir = os.path.dirname(__current_file)
+        output_file = os.path.join(current_dir, 'browser_scaling_grid_search.png')
+        
+        # Adjust layout and save figure with high DPI
+        plt.tight_layout(rect=[0, 0.03, 1, 0.97])
+        plt.savefig(output_file, dpi=200, bbox_inches='tight')
+        logger.success(f"Visualization saved to {output_file}", tag="TEST")
+        
+    except ImportError:
+        logger.warning("matplotlib not available, skipping visualization", tag="TEST")
+    
+    return most_efficient["num_browsers"], most_efficient["pages_per_browser"]
+    
+async def find_optimal_browser_config(total_urls=50, verbose=True, rate_limit_delay=0.2):
+    """Find optimal browser configuration for crawling a specific number of URLs.
+    
+    Args:
+        total_urls: Number of URLs to crawl
+        verbose: Whether to print progress
+        rate_limit_delay: Delay between page loads to avoid rate limiting
+        
+    Returns:
+        dict: Contains fastest, lowest_memory, and optimal configurations
+    """
+    if verbose:
+        print(f"\n=== Finding optimal configuration for crawling {total_urls} URLs ===\n")
+    
+    # Generate test URLs with timestamp to avoid caching
+    timestamp = int(time.time())
+    urls = [f"https://example.com/page_{i}?t={timestamp}" for i in range(total_urls)]
+    
+    # Limit browser configurations to test (1 browser to max 10)
+    max_browsers = min(10, total_urls)
+    configs_to_test = []
+    
+    # Generate configurations (browser count, pages distribution)
+    for num_browsers in range(1, max_browsers + 1):
+        base_pages = total_urls // num_browsers
+        remainder = total_urls % num_browsers
+        
+        # Create distribution array like [3, 3, 2, 2] (some browsers get one more page)
+        if remainder > 0:
+            distribution = [base_pages + 1] * remainder + [base_pages] * (num_browsers - remainder)
+        else:
+            distribution = [base_pages] * num_browsers
+            
+        configs_to_test.append((num_browsers, distribution))
+    
+    results = []
+    
+    # Test each configuration
+    for browser_count, page_distribution in configs_to_test:
+        if verbose:
+            print(f"Testing {browser_count} browsers with distribution {tuple(page_distribution)}")
+        
+        try:
+            # Track memory if possible
+            try:
+                import psutil
+                process = psutil.Process()
+                start_memory = process.memory_info().rss / (1024 * 1024)  # MB
+            except ImportError:
+                if verbose: 
+                    print("Memory tracking not available (psutil not installed)")
+                start_memory = 0
+            
+            # Start browsers in parallel
+            managers = []
+            start_tasks = []
+            start_time = time.time()
+            
+            for i in range(browser_count):
+                config = BrowserConfig(headless=True)
+                manager = BrowserManager(browser_config=config, logger=logger)
+                start_tasks.append(manager.start())
+                managers.append(manager)
+            
+            await asyncio.gather(*start_tasks)
+            
+            # Distribute URLs among browsers
+            urls_per_manager = {}
+            url_index = 0
+            
+            for i, manager in enumerate(managers):
+                pages_for_this_browser = page_distribution[i]
+                end_index = url_index + pages_for_this_browser
+                urls_per_manager[manager] = urls[url_index:end_index]
+                url_index = end_index
+            
+            # Create pages for each browser
+            all_pages = []
+            for manager, manager_urls in urls_per_manager.items():
+                if not manager_urls:
+                    continue
+                pages = await manager.get_pages(CrawlerRunConfig(), count=len(manager_urls))
+                all_pages.extend(zip(pages, manager_urls))
+            
+            # Crawl pages with delay to avoid rate limiting
+            async def crawl_page(page_ctx, url):
+                page, _ = page_ctx
+                try:
+                    await page.goto(url)
+                    if rate_limit_delay > 0:
+                        await asyncio.sleep(rate_limit_delay)
+                    title = await page.title()
+                    return title
+                finally:
+                    await page.close()
+            
+            crawl_start = time.time()
+            crawl_tasks = [crawl_page(page_ctx, url) for page_ctx, url in all_pages]
+            await asyncio.gather(*crawl_tasks)
+            crawl_time = time.time() - crawl_start
+            total_time = time.time() - start_time
+            
+            # Measure final memory usage
+            if start_memory > 0:
+                end_memory = process.memory_info().rss / (1024 * 1024)
+                memory_used = end_memory - start_memory
+            else:
+                memory_used = 0
+            
+            # Close all browsers
+            for manager in managers:
+                await manager.close()
+            
+            # Calculate metrics
+            pages_per_second = total_urls / crawl_time
+            
+            # Calculate efficiency score (higher is better)
+            # This balances speed vs memory
+            if memory_used > 0:
+                efficiency = pages_per_second / (memory_used + 1)
+            else:
+                efficiency = pages_per_second
+            
+            # Store result
+            result = {
+                "browser_count": browser_count,
+                "distribution": tuple(page_distribution),
+                "crawl_time": crawl_time,
+                "total_time": total_time,
+                "memory_used": memory_used,
+                "pages_per_second": pages_per_second, 
+                "efficiency": efficiency
+            }
+            
+            results.append(result)
+            
+            if verbose:
+                print(f"  ✓ Crawled {total_urls} pages in {crawl_time:.2f}s ({pages_per_second:.1f} pages/sec)")
+                if memory_used > 0:
+                    print(f"  ✓ Memory used: {memory_used:.1f}MB ({memory_used/total_urls:.1f}MB per page)")
+                print(f"  ✓ Efficiency score: {efficiency:.4f}")
+            
+        except Exception as e:
+            if verbose:
+                print(f"  ✗ Error: {str(e)}")
+            
+            # Clean up
+            for manager in managers:
+                try:
+                    await manager.close()
+                except:
+                    pass
+    
+    # If no successful results, return None
+    if not results:
+        return None
+    
+    # Find best configurations
+    fastest = sorted(results, key=lambda x: x["crawl_time"])[0]
+    
+    # Only consider memory if available
+    memory_results = [r for r in results if r["memory_used"] > 0]
+    if memory_results:
+        lowest_memory = sorted(memory_results, key=lambda x: x["memory_used"])[0]
+    else:
+        lowest_memory = fastest
+    
+    # Find most efficient (balanced speed vs memory)
+    optimal = sorted(results, key=lambda x: x["efficiency"], reverse=True)[0]
+    
+    # Print summary
+    if verbose:
+        print("\n=== OPTIMAL CONFIGURATIONS ===")
+        print(f"⚡ Fastest: {fastest['browser_count']} browsers {fastest['distribution']}")
+        print(f"   {fastest['crawl_time']:.2f}s, {fastest['pages_per_second']:.1f} pages/sec")
+        
+        print(f"💾 Memory-efficient: {lowest_memory['browser_count']} browsers {lowest_memory['distribution']}")
+        if lowest_memory["memory_used"] > 0:
+            print(f"   {lowest_memory['memory_used']:.1f}MB, {lowest_memory['memory_used']/total_urls:.2f}MB per page")
+        
+        print(f"🌟 Balanced optimal: {optimal['browser_count']} browsers {optimal['distribution']}")
+        print(f"   {optimal['crawl_time']:.2f}s, {optimal['pages_per_second']:.1f} pages/sec, score: {optimal['efficiency']:.4f}")
+    
+    return {
+        "fastest": fastest,
+        "lowest_memory": lowest_memory,
+        "optimal": optimal,
+        "all_configs": results
+    }
+
+async def run_tests():
+    """Run all tests sequentially."""
+    results = []
+    
+    # Find optimal configuration using our utility function
+    configs = await find_optimal_browser_config(
+        total_urls=20,  # Use a small number for faster testing
+        verbose=True,
+        rate_limit_delay=0.2  # 200ms delay between page loads to avoid rate limiting
+    )
+    
+    if configs:
+        # Show the optimal configuration
+        optimal = configs["optimal"]
+        print(f"\n🎯 Recommended configuration for production use:")
+        print(f"   {optimal['browser_count']} browsers with distribution {optimal['distribution']}")
+        print(f"   Estimated performance: {optimal['pages_per_second']:.1f} pages/second")
+        results.append(True)
+    else:
+        print("\n❌ Failed to find optimal configuration")
+        results.append(False)
+    
+    # Print summary
+    total = len(results)
+    passed = sum(results)
+    print(f"\nTests complete: {passed}/{total} passed")
+    
+    if passed == total:
+        print("All tests passed!")
+    else:
+        print(f"{total - passed} tests failed")
+
+if __name__ == "__main__":
+    asyncio.run(run_tests())
--- a/tests/browser/test_playwright_strategy.py
+++ b/tests/browser/test_playwright_strategy.py
@@ -0,0 +1,316 @@
+"""Test examples for PlaywrightBrowserStrategy.
+
+These examples demonstrate the functionality of PlaywrightBrowserStrategy
+and serve as functional tests.
+"""
+
+import asyncio
+import os
+import re
+import sys
+
+# Add the project root to Python path if running directly
+if __name__ == "__main__":
+    sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))
+
+from crawl4ai.browser import BrowserManager
+from crawl4ai.async_configs import BrowserConfig, CrawlerRunConfig
+from crawl4ai.async_logger import AsyncLogger
+
+# Create a logger for clear terminal output
+logger = AsyncLogger(verbose=True, log_file=None)
+
+
+
+async def test_start_close():
+    # Create browser config for standard Playwright
+    browser_config = BrowserConfig(
+        headless=True,
+        viewport_width=1280,
+        viewport_height=800
+    )
+    
+    # Create browser manager with the config
+    manager = BrowserManager(browser_config=browser_config, logger=logger)
+    
+    try:
+        for _ in range(4):
+            # Start the browser
+            await manager.start()
+            logger.info("Browser started successfully", tag="TEST")
+
+            # Get a page
+            page, context = await manager.get_page(CrawlerRunConfig())
+            logger.info("Got page successfully", tag="TEST")
+            
+            # Navigate to a website
+            await page.goto("https://example.com")
+            logger.info("Navigated to example.com", tag="TEST")
+            
+            # Get page title
+            title = await page.title()
+            logger.info(f"Page title: {title}", tag="TEST")
+            
+            # Clean up
+            await manager.close()
+            logger.info("Browser closed successfully", tag="TEST")
+   
+            await asyncio.sleep(1)  # Wait for a moment before restarting
+
+    except Exception as e:
+        logger.error(f"Test failed: {str(e)}", tag="TEST")
+        # Ensure cleanup
+        try:
+            await manager.close()
+        except:
+            pass
+        return False
+    return True
+
+async def test_playwright_basic():
+    """Test basic Playwright browser functionality."""
+    logger.info("Testing standard Playwright browser", tag="TEST")
+    
+    # Create browser config for standard Playwright
+    browser_config = BrowserConfig(
+        headless=True,
+        viewport_width=1280,
+        viewport_height=800
+    )
+    
+    # Create browser manager with the config
+    manager = BrowserManager(browser_config=browser_config, logger=logger)
+    
+    try:
+        # Start the browser
+        await manager.start()
+        logger.info("Browser started successfully", tag="TEST")
+        
+        # Create crawler config
+        crawler_config = CrawlerRunConfig(url="https://example.com")
+        
+        # Get a page
+        page, context = await manager.get_page(crawler_config)
+        logger.info("Got page successfully", tag="TEST")
+        
+        # Navigate to a website
+        await page.goto("https://example.com")
+        logger.info("Navigated to example.com", tag="TEST")
+        
+        # Get page title
+        title = await page.title()
+        logger.info(f"Page title: {title}", tag="TEST")
+        
+        # Clean up
+        await manager.close()
+        logger.info("Browser closed successfully", tag="TEST")
+        
+        return True
+    except Exception as e:
+        logger.error(f"Test failed: {str(e)}", tag="TEST")
+        # Ensure cleanup
+        try:
+            await manager.close()
+        except:
+            pass
+        return False
+
+async def test_playwright_text_mode():
+    """Test Playwright browser in text-only mode."""
+    logger.info("Testing Playwright text mode", tag="TEST")
+    
+    # Create browser config with text mode enabled
+    browser_config = BrowserConfig(
+        headless=True,
+        text_mode=True  # Enable text-only mode
+    )
+    
+    # Create browser manager with the config
+    manager = BrowserManager(browser_config=browser_config, logger=logger)
+    
+    try:
+        # Start the browser
+        await manager.start()
+        logger.info("Browser started successfully in text mode", tag="TEST")
+        
+        # Get a page
+        crawler_config = CrawlerRunConfig(url="https://example.com")
+        page, context = await manager.get_page(crawler_config)
+        
+        # Navigate to a website
+        await page.goto("https://example.com")
+        logger.info("Navigated to example.com", tag="TEST")
+        
+        # Get page title
+        title = await page.title()
+        logger.info(f"Page title: {title}", tag="TEST")
+        
+        # Check if images are blocked in text mode
+        # We'll check if any image requests were made
+        has_images = False
+        async with page.expect_request("**/*.{png,jpg,jpeg,gif,webp,svg}", timeout=1000) as request_info:
+            try:
+                # Try to load a page with images
+                await page.goto("https://picsum.photos/", wait_until="domcontentloaded")
+                request = await request_info.value
+                has_images = True
+            except:
+                # Timeout without image requests means text mode is working
+                has_images = False
+        
+        logger.info(f"Text mode image blocking working: {not has_images}", tag="TEST")
+        
+        # Clean up
+        await manager.close()
+        logger.info("Browser closed successfully", tag="TEST")
+        
+        return True
+    except Exception as e:
+        logger.error(f"Test failed: {str(e)}", tag="TEST")
+        # Ensure cleanup
+        try:
+            await manager.close()
+        except:
+            pass
+        return False
+
+async def test_playwright_context_reuse():
+    """Test context caching and reuse with identical configurations."""
+    logger.info("Testing context reuse with identical configurations", tag="TEST")
+    
+    # Create browser config
+    browser_config = BrowserConfig(headless=True)
+    
+    # Create browser manager
+    manager = BrowserManager(browser_config=browser_config, logger=logger)
+    
+    try:
+        # Start the browser
+        await manager.start()
+        logger.info("Browser started successfully", tag="TEST")
+        
+        # Create identical crawler configs
+        crawler_config1 = CrawlerRunConfig(
+            css_selector="body",
+        )
+        
+        crawler_config2 = CrawlerRunConfig(
+            css_selector="body",
+        )
+        
+        # Get pages with these configs
+        page1, context1 = await manager.get_page(crawler_config1)
+        page2, context2 = await manager.get_page(crawler_config2)
+        
+        # Check if contexts are reused
+        is_same_context = context1 == context2
+        logger.info(f"Contexts reused: {is_same_context}", tag="TEST")
+        
+        # Now try with a different config
+        crawler_config3 = CrawlerRunConfig()
+        
+        page3, context3 = await manager.get_page(crawler_config3)
+        
+        # This should be a different context
+        is_different_context = context1 != context3
+        logger.info(f"Different contexts for different configs: {is_different_context}", tag="TEST")
+        
+        # Clean up
+        await manager.close()
+        logger.info("Browser closed successfully", tag="TEST")
+        
+        # Both tests should pass for success
+        return is_same_context and is_different_context
+    except Exception as e:
+        logger.error(f"Test failed: {str(e)}", tag="TEST")
+        # Ensure cleanup
+        try:
+            await manager.close()
+        except:
+            pass
+        return False
+
+async def test_playwright_session_management():
+    """Test session management with Playwright browser."""
+    logger.info("Testing session management with Playwright browser", tag="TEST")
+    
+    browser_config = BrowserConfig(
+        headless=True
+    )
+    
+    manager = BrowserManager(browser_config=browser_config, logger=logger)
+    
+    try:
+        await manager.start()
+        logger.info("Browser launched successfully", tag="TEST")
+        
+        # Create two sessions
+        session1_id = "playwright_session_1"
+        session2_id = "playwright_session_2"
+        
+        # Set up first session
+        crawler_config1 = CrawlerRunConfig(session_id=session1_id, url="https://example.com")
+        page1, context1 = await manager.get_page(crawler_config1)
+        await page1.goto("https://example.com")
+        await page1.evaluate("localStorage.setItem('playwright_session1_data', 'test_value1')")
+        logger.info(f"Set up session 1 with ID: {session1_id}", tag="TEST")
+        
+        # Set up second session
+        crawler_config2 = CrawlerRunConfig(session_id=session2_id, url="https://example.org")
+        page2, context2 = await manager.get_page(crawler_config2)
+        await page2.goto("https://example.org")
+        await page2.evaluate("localStorage.setItem('playwright_session2_data', 'test_value2')")
+        logger.info(f"Set up session 2 with ID: {session2_id}", tag="TEST")
+        
+        # Get first session again
+        page1_again, context1_again = await manager.get_page(crawler_config1)
+        
+        # Verify it's the same page and data persists
+        is_same_page = page1 == page1_again
+        is_same_context = context1 == context1_again
+        data1 = await page1_again.evaluate("localStorage.getItem('playwright_session1_data')")
+        logger.info(f"Session 1 reuse successful: {is_same_page}, data: {data1}", tag="TEST")
+        
+        # Kill first session
+        await manager.kill_session(session1_id)
+        logger.info(f"Killed session 1", tag="TEST")
+        
+        # Verify second session still works
+        data2 = await page2.evaluate("localStorage.getItem('playwright_session2_data')")
+        logger.info(f"Session 2 still functional after killing session 1, data: {data2}", tag="TEST")
+        
+        # Clean up
+        await manager.close()
+        logger.info("Browser closed successfully", tag="TEST")
+        
+        return is_same_page and is_same_context and data1 == "test_value1" and data2 == "test_value2"
+    except Exception as e:
+        logger.error(f"Test failed: {str(e)}", tag="TEST")
+        try:
+            await manager.close()
+        except:
+            pass
+        return False
+
+async def run_tests():
+    """Run all tests sequentially."""
+    results = []
+    
+    # results.append(await test_start_close())
+    # results.append(await test_playwright_basic())
+    # results.append(await test_playwright_text_mode())
+    # results.append(await test_playwright_context_reuse())
+    results.append(await test_playwright_session_management())
+    
+    # Print summary
+    total = len(results)
+    passed = sum(results)
+    logger.info(f"Tests complete: {passed}/{total} passed", tag="SUMMARY")
+    
+    if passed == total:
+        logger.success("All tests passed!", tag="SUMMARY")
+    else:
+        logger.error(f"{total - passed} tests failed", tag="SUMMARY")
+
+if __name__ == "__main__":
+    asyncio.run(run_tests())
--- a/tests/browser/test_profiles.py
+++ b/tests/browser/test_profiles.py
@@ -0,0 +1,176 @@
+"""Test examples for BrowserProfileManager.
+
+These examples demonstrate the functionality of BrowserProfileManager
+and serve as functional tests.
+"""
+
+import asyncio
+import os
+import sys
+import uuid
+import shutil
+
+# Add the project root to Python path if running directly
+if __name__ == "__main__":
+    sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))
+
+from crawl4ai.browser import BrowserManager, BrowserProfileManager
+from crawl4ai.async_configs import BrowserConfig, CrawlerRunConfig
+from crawl4ai.async_logger import AsyncLogger
+
+# Create a logger for clear terminal output
+logger = AsyncLogger(verbose=True, log_file=None)
+
+async def test_profile_creation():
+    """Test creating and managing browser profiles."""
+    logger.info("Testing profile creation and management", tag="TEST")
+    
+    profile_manager = BrowserProfileManager(logger=logger)
+    
+    try:
+        # List existing profiles
+        profiles = profile_manager.list_profiles()
+        logger.info(f"Found {len(profiles)} existing profiles", tag="TEST")
+        
+        # Generate a unique profile name for testing
+        test_profile_name = f"test-profile-{uuid.uuid4().hex[:8]}"
+        
+        # Create a test profile directory
+        profile_path = os.path.join(profile_manager.profiles_dir, test_profile_name)
+        os.makedirs(os.path.join(profile_path, "Default"), exist_ok=True)
+        
+        # Create a dummy Preferences file to simulate a Chrome profile
+        with open(os.path.join(profile_path, "Default", "Preferences"), "w") as f:
+            f.write("{\"test\": true}")
+        
+        logger.info(f"Created test profile at: {profile_path}", tag="TEST")
+        
+        # Verify the profile is now in the list
+        profiles = profile_manager.list_profiles()
+        profile_found = any(p["name"] == test_profile_name for p in profiles)
+        logger.info(f"Profile found in list: {profile_found}", tag="TEST")
+        
+        # Try to get the profile path
+        retrieved_path = profile_manager.get_profile_path(test_profile_name)
+        path_match = retrieved_path == profile_path
+        logger.info(f"Retrieved correct profile path: {path_match}", tag="TEST")
+        
+        # Delete the profile
+        success = profile_manager.delete_profile(test_profile_name)
+        logger.info(f"Profile deletion successful: {success}", tag="TEST")
+        
+        # Verify it's gone
+        profiles_after = profile_manager.list_profiles()
+        profile_removed = not any(p["name"] == test_profile_name for p in profiles_after)
+        logger.info(f"Profile removed from list: {profile_removed}", tag="TEST")
+        
+        # Clean up just in case
+        if os.path.exists(profile_path):
+            shutil.rmtree(profile_path, ignore_errors=True)
+        
+        return profile_found and path_match and success and profile_removed
+    except Exception as e:
+        logger.error(f"Test failed: {str(e)}", tag="TEST")
+        # Clean up test directory
+        try:
+            if os.path.exists(profile_path):
+                shutil.rmtree(profile_path, ignore_errors=True)
+        except:
+            pass
+        return False
+
+async def test_profile_with_browser():
+    """Test using a profile with a browser."""
+    logger.info("Testing using a profile with a browser", tag="TEST")
+    
+    profile_manager = BrowserProfileManager(logger=logger)
+    test_profile_name = f"test-browser-profile-{uuid.uuid4().hex[:8]}"
+    profile_path = None
+    
+    try:
+        # Create a test profile directory
+        profile_path = os.path.join(profile_manager.profiles_dir, test_profile_name)
+        os.makedirs(os.path.join(profile_path, "Default"), exist_ok=True)
+        
+        # Create a dummy Preferences file to simulate a Chrome profile
+        with open(os.path.join(profile_path, "Default", "Preferences"), "w") as f:
+            f.write("{\"test\": true}")
+        
+        logger.info(f"Created test profile at: {profile_path}", tag="TEST")
+        
+        # Now use this profile with a browser
+        browser_config = BrowserConfig(
+            user_data_dir=profile_path,
+            headless=True
+        )
+        
+        manager = BrowserManager(browser_config=browser_config, logger=logger)
+        
+        # Start the browser with the profile
+        await manager.start()
+        logger.info("Browser started with profile", tag="TEST")
+        
+        # Create a page
+        crawler_config = CrawlerRunConfig()
+        page, context = await manager.get_page(crawler_config)
+        
+        # Navigate and set some data to verify profile works
+        await page.goto("https://example.com")
+        await page.evaluate("localStorage.setItem('test_data', 'profile_value')")
+        
+        # Close browser
+        await manager.close()
+        logger.info("First browser session closed", tag="TEST")
+        
+        # Create a new browser with the same profile
+        manager2 = BrowserManager(browser_config=browser_config, logger=logger)
+        await manager2.start()
+        logger.info("Second browser session started with same profile", tag="TEST")
+        
+        # Get a page and check if the data persists
+        page2, context2 = await manager2.get_page(crawler_config)
+        await page2.goto("https://example.com")
+        data = await page2.evaluate("localStorage.getItem('test_data')")
+        
+        # Verify data persisted
+        data_persisted = data == "profile_value"
+        logger.info(f"Data persisted across sessions: {data_persisted}", tag="TEST")
+        
+        # Clean up
+        await manager2.close()
+        logger.info("Second browser session closed", tag="TEST")
+        
+        # Delete the test profile
+        success = profile_manager.delete_profile(test_profile_name)
+        logger.info(f"Test profile deleted: {success}", tag="TEST")
+        
+        return data_persisted and success
+    except Exception as e:
+        logger.error(f"Test failed: {str(e)}", tag="TEST")
+        # Clean up
+        try:
+            if profile_path and os.path.exists(profile_path):
+                shutil.rmtree(profile_path, ignore_errors=True)
+        except:
+            pass
+        return False
+
+async def run_tests():
+    """Run all tests sequentially."""
+    results = []
+    
+    results.append(await test_profile_creation())
+    results.append(await test_profile_with_browser())
+    
+    # Print summary
+    total = len(results)
+    passed = sum(results)
+    logger.info(f"Tests complete: {passed}/{total} passed", tag="SUMMARY")
+    
+    if passed == total:
+        logger.success("All tests passed!", tag="SUMMARY")
+    else:
+        logger.error(f"{total - passed} tests failed", tag="SUMMARY")
+
+if __name__ == "__main__":
+    asyncio.run(run_tests())
--- a/tests/cli/test_cli.py
+++ b/tests/cli/test_cli.py
@@ -0,0 +1,133 @@
+import pytest
+from click.testing import CliRunner
+from pathlib import Path
+import json
+import yaml
+from crawl4ai.cli import cli, load_config_file, parse_key_values
+import tempfile
+import os
+import click
+
+@pytest.fixture
+def runner():
+    return CliRunner()
+
+@pytest.fixture
+def temp_config_dir():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        old_home = os.environ.get('HOME')
+        os.environ['HOME'] = tmpdir
+        yield Path(tmpdir)
+        if old_home:
+            os.environ['HOME'] = old_home
+
+@pytest.fixture
+def sample_configs(temp_config_dir):
+    configs = {
+        'browser.yml': {
+            'headless': True,
+            'viewport_width': 1280,
+            'user_agent_mode': 'random'
+        },
+        'crawler.yml': {
+            'cache_mode': 'bypass',
+            'wait_until': 'networkidle',
+            'scan_full_page': True
+        },
+        'extract_css.yml': {
+            'type': 'json-css',
+            'params': {'verbose': True}
+        },
+        'css_schema.json': {
+            'name': 'ArticleExtractor',
+            'baseSelector': '.article',
+            'fields': [
+                {'name': 'title', 'selector': 'h1.title', 'type': 'text'},
+                {'name': 'link', 'selector': 'a.read-more', 'type': 'attribute', 'attribute': 'href'}
+            ]
+        }
+    }
+    
+    for filename, content in configs.items():
+        path = temp_config_dir / filename
+        with open(path, 'w') as f:
+            if filename.endswith('.yml'):
+                yaml.dump(content, f)
+            else:
+                json.dump(content, f)
+                
+    return {name: str(temp_config_dir / name) for name in configs}
+
+class TestCLIBasics:
+    def test_help(self, runner):
+        result = runner.invoke(cli, ['--help'])
+        assert result.exit_code == 0
+        assert 'Crawl4AI CLI' in result.output
+
+    def test_examples(self, runner):
+        result = runner.invoke(cli, ['--example'])
+        assert result.exit_code == 0
+        assert 'Examples' in result.output
+
+    def test_missing_url(self, runner):
+        result = runner.invoke(cli)
+        assert result.exit_code != 0
+        assert 'URL argument is required' in result.output
+
+class TestConfigParsing:
+    def test_parse_key_values_basic(self):
+        result = parse_key_values(None, None, "key1=value1,key2=true")
+        assert result == {'key1': 'value1', 'key2': True}
+
+    def test_parse_key_values_invalid(self):
+        with pytest.raises(click.BadParameter):
+            parse_key_values(None, None, "invalid_format")
+
+class TestConfigLoading:
+    def test_load_yaml_config(self, sample_configs):
+        config = load_config_file(sample_configs['browser.yml'])
+        assert config['headless'] is True
+        assert config['viewport_width'] == 1280
+
+    def test_load_json_config(self, sample_configs):
+        config = load_config_file(sample_configs['css_schema.json'])
+        assert config['name'] == 'ArticleExtractor'
+        assert len(config['fields']) == 2
+
+    def test_load_nonexistent_config(self):
+        with pytest.raises(click.BadParameter):
+            load_config_file('nonexistent.yml')
+
+class TestLLMConfig:
+    def test_llm_config_creation(self, temp_config_dir, runner):
+        def input_simulation(inputs):
+            return runner.invoke(cli, ['https://example.com', '-q', 'test question'], 
+                               input='\n'.join(inputs))
+            
+class TestCrawlingFeatures:
+    def test_basic_crawl(self, runner):
+        result = runner.invoke(cli, ['https://example.com'])
+        assert result.exit_code == 0
+
+
+class TestErrorHandling:
+    def test_invalid_config_file(self, runner):
+        result = runner.invoke(cli, [
+            'https://example.com',
+            '--browser-config', 'nonexistent.yml'
+        ])
+        assert result.exit_code != 0
+
+    def test_invalid_schema(self, runner, temp_config_dir):
+        invalid_schema = temp_config_dir / 'invalid_schema.json'
+        with open(invalid_schema, 'w') as f:
+            f.write('invalid json')
+            
+        result = runner.invoke(cli, [
+            'https://example.com',
+            '--schema', str(invalid_schema)
+        ])
+        assert result.exit_code != 0
+
+if __name__ == '__main__':
+    pytest.main(['-v', '-s', '--tb=native', __file__])
--- a/tests/docker/test_config_object.py
+++ b/tests/docker/test_config_object.py
@@ -0,0 +1,113 @@
+import json
+from crawl4ai import (
+    CrawlerRunConfig,
+    DefaultMarkdownGenerator,
+    RegexChunking,
+    JsonCssExtractionStrategy,
+    BM25ContentFilter,
+    CacheMode
+)
+from crawl4ai.deep_crawling import BFSDeepCrawlStrategy
+from crawl4ai.deep_crawling.filters import FastFilterChain
+from crawl4ai.deep_crawling.filters import FastContentTypeFilter, FastDomainFilter
+from crawl4ai.deep_crawling.scorers import FastKeywordRelevanceScorer
+
+def create_test_config() -> CrawlerRunConfig:
+    # Set up content filtering and markdown generation
+    content_filter = BM25ContentFilter(
+        user_query="technology articles",
+    )
+    
+    markdown_generator = DefaultMarkdownGenerator(
+        content_filter=content_filter,
+        options={"ignore_links": False, "body_width": 0}
+    )
+
+    # Set up extraction strategy
+    extraction_schema = {
+        "name": "ArticleExtractor",
+        "baseSelector": "article.content",
+        "fields": [
+            {"name": "title", "selector": "h1", "type": "text"},
+            {"name": "content", "selector": ".article-body", "type": "html"}
+        ]
+    }
+    extraction_strategy = JsonCssExtractionStrategy(schema=extraction_schema)
+
+    # Set up deep crawling
+    filter_chain = FastFilterChain([
+        FastContentTypeFilter(["text/html"]),
+        FastDomainFilter(blocked_domains=["ads.*"])
+    ])
+
+    url_scorer = FastKeywordRelevanceScorer(
+        keywords=["article", "blog"],
+        weight=1.0
+    )
+
+    deep_crawl_strategy = BFSDeepCrawlStrategy(
+        max_depth=3,
+        filter_chain=filter_chain,
+        url_scorer=url_scorer
+    )
+
+    # Create the config
+    config = CrawlerRunConfig(
+        word_count_threshold=200,
+        extraction_strategy=extraction_strategy,
+        chunking_strategy=RegexChunking(patterns=[r"\n\n"]),
+        markdown_generator=markdown_generator,
+        css_selector="main.content",
+        excluded_tags=["nav", "footer"],
+        keep_attrs=["href", "src"],
+        cache_mode=CacheMode.BYPASS,
+        wait_until="networkidle",
+        page_timeout=30000,
+        scan_full_page=True,
+        deep_crawl_strategy=deep_crawl_strategy,
+        verbose=True,
+        stream=True
+    )
+
+    return config
+
+def test_config_serialization_cycle():
+    # Create original config
+    original_config = create_test_config()
+    
+    # Dump to serializable dictionary
+    serialized = original_config.dump()
+
+    print(json.dumps(serialized, indent=2))
+    
+    # Load back into config object
+    deserialized_config = CrawlerRunConfig.load(serialized)
+    
+    # Verify core attributes
+    assert deserialized_config.word_count_threshold == original_config.word_count_threshold
+    assert deserialized_config.css_selector == original_config.css_selector
+    assert deserialized_config.excluded_tags == original_config.excluded_tags
+    assert deserialized_config.keep_attrs == original_config.keep_attrs
+    assert deserialized_config.cache_mode == original_config.cache_mode
+    assert deserialized_config.wait_until == original_config.wait_until
+    assert deserialized_config.page_timeout == original_config.page_timeout
+    assert deserialized_config.scan_full_page == original_config.scan_full_page
+    assert deserialized_config.verbose == original_config.verbose
+    assert deserialized_config.stream == original_config.stream
+
+    # Verify complex objects
+    assert isinstance(deserialized_config.extraction_strategy, JsonCssExtractionStrategy)
+    assert isinstance(deserialized_config.chunking_strategy, RegexChunking)
+    assert isinstance(deserialized_config.markdown_generator, DefaultMarkdownGenerator)
+    assert isinstance(deserialized_config.markdown_generator.content_filter, BM25ContentFilter)
+    assert isinstance(deserialized_config.deep_crawl_strategy, BFSDeepCrawlStrategy)
+    
+    # Verify deep crawl strategy configuration
+    assert deserialized_config.deep_crawl_strategy.max_depth == 3
+    assert isinstance(deserialized_config.deep_crawl_strategy.filter_chain, FastFilterChain)
+    assert isinstance(deserialized_config.deep_crawl_strategy.url_scorer, FastKeywordRelevanceScorer)
+
+    print("Serialization cycle test passed successfully!")
+
+if __name__ == "__main__":
+    test_config_serialization_cycle()
--- a/tests/docker/test_docker.py
+++ b/tests/docker/test_docker.py
@@ -0,0 +1,175 @@
+import requests
+import time
+import httpx
+import asyncio
+from typing import Dict, Any
+from crawl4ai import (
+    BrowserConfig, CrawlerRunConfig, DefaultMarkdownGenerator,
+    PruningContentFilter, JsonCssExtractionStrategy, LLMContentFilter, CacheMode
+)
+from crawl4ai import LLMConfig
+from crawl4ai.docker_client import Crawl4aiDockerClient
+
+class Crawl4AiTester:
+    def __init__(self, base_url: str = "http://localhost:11235"):
+        self.base_url = base_url
+
+    def submit_and_wait(
+        self, request_data: Dict[str, Any], timeout: int = 300
+    ) -> Dict[str, Any]:
+        # Submit crawl job
+        response = requests.post(f"{self.base_url}/crawl", json=request_data)
+        task_id = response.json()["task_id"]
+        print(f"Task ID: {task_id}")
+
+        # Poll for result
+        start_time = time.time()
+        while True:
+            if time.time() - start_time > timeout:
+                raise TimeoutError(
+                    f"Task {task_id} did not complete within {timeout} seconds"
+                )
+
+            result = requests.get(f"{self.base_url}/task/{task_id}")
+            status = result.json()
+
+            if status["status"] == "failed":
+                print("Task failed:", status.get("error"))
+                raise Exception(f"Task failed: {status.get('error')}")
+
+            if status["status"] == "completed":
+                return status
+
+            time.sleep(2)
+
+async def test_direct_api():
+    """Test direct API endpoints without using the client SDK"""
+    print("\n=== Testing Direct API Calls ===")
+    
+    # Test 1: Basic crawl with content filtering
+    browser_config = BrowserConfig(
+        headless=True,
+        viewport_width=1200,
+        viewport_height=800
+    )
+    
+    crawler_config = CrawlerRunConfig(
+        cache_mode=CacheMode.BYPASS,
+        markdown_generator=DefaultMarkdownGenerator(
+            content_filter=PruningContentFilter(
+                threshold=0.48,
+                threshold_type="fixed",
+                min_word_threshold=0
+            ),
+            options={"ignore_links": True}
+        )
+    )
+
+    request_data = {
+        "urls": ["https://example.com"],
+        "browser_config": browser_config.dump(),
+        "crawler_config": crawler_config.dump()
+    }
+
+    # Make direct API call
+    async with httpx.AsyncClient() as client:
+        response = await client.post(
+            "http://localhost:8000/crawl",
+            json=request_data,
+            timeout=300
+        )
+        assert response.status_code == 200
+        result = response.json()
+        print("Basic crawl result:", result["success"])
+
+    # Test 2: Structured extraction with JSON CSS
+    schema = {
+        "baseSelector": "article.post",
+        "fields": [
+            {"name": "title", "selector": "h1", "type": "text"},
+            {"name": "content", "selector": ".content", "type": "html"}
+        ]
+    }
+
+    crawler_config = CrawlerRunConfig(
+        cache_mode=CacheMode.BYPASS,
+        extraction_strategy=JsonCssExtractionStrategy(schema=schema)
+    )
+
+    request_data["crawler_config"] = crawler_config.dump()
+
+    async with httpx.AsyncClient() as client:
+        response = await client.post(
+            "http://localhost:8000/crawl",
+            json=request_data
+        )
+        assert response.status_code == 200
+        result = response.json()
+        print("Structured extraction result:", result["success"])
+
+    # Test 3: Get schema
+    # async with httpx.AsyncClient() as client:
+    #     response = await client.get("http://localhost:8000/schema")
+    #     assert response.status_code == 200
+    #     schemas = response.json()
+    #     print("Retrieved schemas for:", list(schemas.keys()))
+
+async def test_with_client():
+    """Test using the Crawl4AI Docker client SDK"""
+    print("\n=== Testing Client SDK ===")
+    
+    async with Crawl4aiDockerClient(verbose=True) as client:
+        # Test 1: Basic crawl
+        browser_config = BrowserConfig(headless=True)
+        crawler_config = CrawlerRunConfig(
+            cache_mode=CacheMode.BYPASS,
+            markdown_generator=DefaultMarkdownGenerator(
+                content_filter=PruningContentFilter(
+                    threshold=0.48,
+                    threshold_type="fixed"
+                )
+            )
+        )
+
+        result = await client.crawl(
+            urls=["https://example.com"],
+            browser_config=browser_config,
+            crawler_config=crawler_config
+        )
+        print("Client SDK basic crawl:", result.success)
+
+        # Test 2: LLM extraction with streaming
+        crawler_config = CrawlerRunConfig(
+            cache_mode=CacheMode.BYPASS,
+            markdown_generator=DefaultMarkdownGenerator(
+                content_filter=LLMContentFilter(
+                    llm_config=LLMConfig(provider="openai/gpt-40"),
+                    instruction="Extract key technical concepts"
+                )
+            ),
+            stream=True
+        )
+
+        async for result in await client.crawl(
+            urls=["https://example.com"],
+            browser_config=browser_config,
+            crawler_config=crawler_config
+        ):
+            print(f"Streaming result for: {result.url}")
+
+        # # Test 3: Get schema
+        # schemas = await client.get_schema()
+        # print("Retrieved client schemas for:", list(schemas.keys()))
+
+async def main():
+    """Run all tests"""
+    # Test direct API
+    print("Testing direct API calls...")
+    await test_direct_api()
+
+    # Test client SDK
+    print("\nTesting client SDK...")
+    await test_with_client()
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/tests/docker/test_dockerclient.py
+++ b/tests/docker/test_dockerclient.py
@@ -0,0 +1,34 @@
+import asyncio
+from crawl4ai.docker_client import Crawl4aiDockerClient
+from crawl4ai import (
+    BrowserConfig,
+    CrawlerRunConfig
+)
+
+async def main():
+    async with Crawl4aiDockerClient(base_url="http://localhost:8000", verbose=True) as client:
+        await client.authenticate("test@example.com")
+        
+        # Non-streaming crawl
+        results = await client.crawl(
+            ["https://example.com", "https://python.org"],
+            browser_config=BrowserConfig(headless=True),
+            crawler_config=CrawlerRunConfig()
+        )
+        print(f"Non-streaming results: {results}")
+        
+        # Streaming crawl
+        crawler_config = CrawlerRunConfig(stream=True)
+        async for result in await client.crawl(
+            ["https://example.com", "https://python.org"],
+            browser_config=BrowserConfig(headless=True),
+            crawler_config=crawler_config
+        ):
+            print(f"Streamed result: {result}")
+        
+        # Get schema
+        schema = await client.get_schema()
+        print(f"Schema: {schema}")
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/tests/docker/test_rest_api_deep_crawl.py
+++ b/tests/docker/test_rest_api_deep_crawl.py
@@ -0,0 +1,596 @@
+# ==== File: test_rest_api_deep_crawl.py ====
+
+import pytest
+import pytest_asyncio
+import httpx
+import json
+import asyncio
+import os
+from typing import List, Dict, Any, AsyncGenerator
+
+from dotenv import load_dotenv
+load_dotenv() # Load environment variables from .env file if present
+
+# --- Test Configuration ---
+BASE_URL = os.getenv("CRAWL4AI_TEST_URL", "http://localhost:11235") # If server is running in Docker, use the host's IP
+BASE_URL = os.getenv("CRAWL4AI_TEST_URL", "http://localhost:8020") # If server is running in dev debug mode
+DEEP_CRAWL_BASE_URL = "https://docs.crawl4ai.com/samples/deepcrawl/"
+DEEP_CRAWL_DOMAIN = "docs.crawl4ai.com" # Used for domain filter
+
+# --- Helper Functions ---
+def load_proxies_from_env() -> List[Dict]:
+    """Load proxies from PROXIES environment variable"""
+    proxies = []
+    proxies_str = os.getenv("PROXIES", "")
+    if not proxies_str:
+        print("PROXIES environment variable not set or empty.")
+        return proxies
+    try:
+        proxy_list = proxies_str.split(",")
+        for proxy in proxy_list:
+            proxy = proxy.strip()
+            if not proxy:
+                continue
+            parts = proxy.split(":")
+            if len(parts) == 4:
+                ip, port, username, password = parts
+                proxies.append({
+                    "server": f"http://{ip}:{port}", # Assuming http, adjust if needed
+                    "username": username,
+                    "password": password,
+                    "ip": ip  # Store original IP if available
+                })
+            elif len(parts) == 2: # ip:port only
+                 ip, port = parts
+                 proxies.append({
+                    "server": f"http://{ip}:{port}",
+                    "ip": ip
+                 })
+            else:
+                 print(f"Skipping invalid proxy string format: {proxy}")
+
+    except Exception as e:
+        print(f"Error loading proxies from environment: {e}")
+    return proxies
+
+
+async def check_server_health(client: httpx.AsyncClient):
+    """Check if the server is healthy before running tests."""
+    try:
+        response = await client.get("/health")
+        response.raise_for_status()
+        print(f"\nServer healthy: {response.json()}")
+        return True
+    except (httpx.RequestError, httpx.HTTPStatusError) as e:
+        pytest.fail(f"Server health check failed: {e}. Is the server running at {BASE_URL}?", pytrace=False)
+
+async def assert_crawl_result_structure(result: Dict[str, Any], check_ssl=False):
+    """Asserts the basic structure of a single crawl result."""
+    assert isinstance(result, dict)
+    assert "url" in result
+    assert "success" in result
+    assert "html" in result # Basic crawls should return HTML
+    assert "metadata" in result
+    assert isinstance(result["metadata"], dict)
+    assert "depth" in result["metadata"] # Deep crawls add depth
+
+    if check_ssl:
+        assert "ssl_certificate" in result # Check if SSL info is present
+        assert isinstance(result["ssl_certificate"], dict) or result["ssl_certificate"] is None
+
+
+async def process_streaming_response(response: httpx.Response) -> List[Dict[str, Any]]:
+    """Processes an NDJSON streaming response."""
+    results = []
+    completed = False
+    async for line in response.aiter_lines():
+        if line:
+            try:
+                data = json.loads(line)
+                if data.get("status") == "completed":
+                    completed = True
+                    break # Stop processing after completion marker
+                elif data.get("url"): # Ensure it looks like a result object
+                    results.append(data)
+                else:
+                    print(f"Received non-result JSON line: {data}") # Log other status messages if needed
+            except json.JSONDecodeError:
+                pytest.fail(f"Failed to decode JSON line: {line}")
+    assert completed, "Streaming response did not end with a completion marker."
+    return results
+
+
+# --- Pytest Fixtures ---
+@pytest_asyncio.fixture(scope="function")
+async def async_client() -> AsyncGenerator[httpx.AsyncClient, None]:
+    """Provides an async HTTP client"""
+    # Increased timeout for potentially longer deep crawls
+    async with httpx.AsyncClient(base_url=BASE_URL, timeout=300.0) as client:
+        yield client
+    # No explicit close needed with 'async with'
+
+# --- Test Class ---
+@pytest.mark.asyncio
+class TestDeepCrawlEndpoints:
+
+    @pytest_asyncio.fixture(autouse=True)
+    async def check_health_before_tests(self, async_client: httpx.AsyncClient):
+        """Fixture to ensure server is healthy before each test in the class."""
+        await check_server_health(async_client)
+
+    # 1. Basic Deep Crawl
+    async def test_deep_crawl_basic_bfs(self, async_client: httpx.AsyncClient):
+        """Test BFS deep crawl with limited depth and pages."""
+        max_depth = 1
+        max_pages = 3 # start_url + 2 more
+        payload = {
+            "urls": [DEEP_CRAWL_BASE_URL],
+            "browser_config": {"type": "BrowserConfig", "params": {"headless": True}},
+            "crawler_config": {
+                "type": "CrawlerRunConfig",
+                "params": {
+                    "stream": False,
+                    "cache_mode": "BYPASS", # Use string value for CacheMode
+                    "deep_crawl_strategy": {
+                        "type": "BFSDeepCrawlStrategy",
+                        "params": {
+                            "max_depth": max_depth,
+                            "max_pages": max_pages,
+                            # Minimal filters for basic test
+                            "filter_chain": {
+                                "type": "FilterChain",
+                                "params": {
+                                    "filters": [
+                                        {
+                                            "type": "DomainFilter",
+                                            "params": {"allowed_domains": [DEEP_CRAWL_DOMAIN]}
+                                        }
+                                    ]
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        response = await async_client.post("/crawl", json=payload)
+        response.raise_for_status()
+        data = response.json()
+
+        assert data["success"] is True
+        assert isinstance(data["results"], list)
+        assert len(data["results"]) > 1 # Should be more than just the start URL
+        assert len(data["results"]) <= max_pages # Respect max_pages
+
+        found_depth_0 = False
+        found_depth_1 = False
+        for result in data["results"]:
+            await assert_crawl_result_structure(result)
+            assert result["success"] is True
+            assert DEEP_CRAWL_DOMAIN in result["url"]
+            depth = result["metadata"]["depth"]
+            assert depth <= max_depth
+            if depth == 0: found_depth_0 = True
+            if depth == 1: found_depth_1 = True
+
+        assert found_depth_0
+        assert found_depth_1
+
+    # 2. Deep Crawl with Filtering
+    async def test_deep_crawl_with_filters(self, async_client: httpx.AsyncClient):
+        """Test BFS deep crawl with content type and domain filters."""
+        max_depth = 1
+        max_pages = 5
+        payload = {
+            "urls": [DEEP_CRAWL_BASE_URL],
+            "browser_config": {"type": "BrowserConfig", "params": {"headless": True}},
+            "crawler_config": {
+                "type": "CrawlerRunConfig",
+                "params": {
+                    "stream": False,
+                    "cache_mode": "BYPASS",
+                    "deep_crawl_strategy": {
+                        "type": "BFSDeepCrawlStrategy",
+                        "params": {
+                            "max_depth": max_depth,
+                            "max_pages": max_pages,
+                            "filter_chain": {
+                                "type": "FilterChain",
+                                "params": {
+                                    "filters": [
+                                        {
+                                            "type": "DomainFilter",
+                                            "params": {"allowed_domains": [DEEP_CRAWL_DOMAIN]}
+                                        },
+                                        {
+                                            "type": "ContentTypeFilter",
+                                            "params": {"allowed_types": ["text/html"]}
+                                        },
+                                        # Example: Exclude specific paths using regex
+                                        {
+                                            "type": "URLPatternFilter",
+                                             "params": {
+                                                 "patterns": ["*/category-3/*"], # Block category 3
+                                                 "reverse": True # Block if match
+                                             }
+                                        }
+                                    ]
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        response = await async_client.post("/crawl", json=payload)
+        response.raise_for_status()
+        data = response.json()
+
+        assert data["success"] is True
+        assert len(data["results"]) > 0
+        assert len(data["results"]) <= max_pages
+
+        for result in data["results"]:
+            await assert_crawl_result_structure(result)
+            assert result["success"] is True
+            assert DEEP_CRAWL_DOMAIN in result["url"]
+            assert "category-3" not in result["url"] # Check if filter worked
+            assert result["metadata"]["depth"] <= max_depth
+
+    # 3. Deep Crawl with Scoring
+    async def test_deep_crawl_with_scoring(self, async_client: httpx.AsyncClient):
+        """Test BFS deep crawl with URL scoring."""
+        max_depth = 1
+        max_pages = 4
+        payload = {
+            "urls": [DEEP_CRAWL_BASE_URL],
+            "browser_config": {"type": "BrowserConfig", "params": {"headless": True}},
+            "crawler_config": {
+                "type": "CrawlerRunConfig",
+                "params": {
+                    "stream": False,
+                    "cache_mode": "BYPASS",
+                    "deep_crawl_strategy": {
+                        "type": "BFSDeepCrawlStrategy",
+                        "params": {
+                            "max_depth": max_depth,
+                            "max_pages": max_pages,
+                            "filter_chain": { # Keep basic domain filter
+                                "type": "FilterChain",
+                                "params": { "filters": [{"type": "DomainFilter", "params": {"allowed_domains": [DEEP_CRAWL_DOMAIN]}}]}
+                            },
+                            "url_scorer": { # Add scorer
+                                "type": "CompositeScorer",
+                                "params": {
+                                    "scorers": [
+                                        {   # Favor pages with 'product' in the URL
+                                            "type": "KeywordRelevanceScorer",
+                                            "params": {"keywords": ["product"], "weight": 1.0}
+                                        },
+                                        {   # Penalize deep paths slightly
+                                            "type": "PathDepthScorer",
+                                            "params": {"optimal_depth": 2, "weight": -0.2}
+                                        }
+                                    ]
+                                }
+                            },
+                            # Set a threshold if needed: "score_threshold": 0.1
+                        }
+                    }
+                }
+            }
+        }
+        response = await async_client.post("/crawl", json=payload)
+        response.raise_for_status()
+        data = response.json()
+
+        assert data["success"] is True
+        assert len(data["results"]) > 0
+        assert len(data["results"]) <= max_pages
+
+        # Check if results seem biased towards products (harder to assert strictly without knowing exact scores)
+        product_urls_found = any("product_" in result["url"] for result in data["results"] if result["metadata"]["depth"] > 0)
+        print(f"Product URLs found among depth > 0 results: {product_urls_found}")
+        # We expect scoring to prioritize product pages if available within limits
+        # assert product_urls_found # This might be too strict depending on site structure and limits
+
+        for result in data["results"]:
+            await assert_crawl_result_structure(result)
+            assert result["success"] is True
+            assert result["metadata"]["depth"] <= max_depth
+
+    # 4. Deep Crawl with CSS Extraction
+    async def test_deep_crawl_with_css_extraction(self, async_client: httpx.AsyncClient):
+        """Test BFS deep crawl combined with JsonCssExtractionStrategy."""
+        max_depth = 6 # Go deep enough to reach product pages
+        max_pages = 20
+        # Schema to extract product details
+        product_schema = {
+            "name": "ProductDetails",
+            "baseSelector": "div.container", # Base for product page
+            "fields": [
+                {"name": "product_title", "selector": "h1", "type": "text"},
+                {"name": "price", "selector": ".product-price", "type": "text"},
+                {"name": "description", "selector": ".product-description p", "type": "text"},
+                {"name": "specs", "selector": ".product-specs li", "type": "list", "fields":[
+                     {"name": "spec_name", "selector": ".spec-name", "type": "text"},
+                     {"name": "spec_value", "selector": ".spec-value", "type": "text"}
+                ]}
+            ]
+        }
+        payload = {
+            "urls": [DEEP_CRAWL_BASE_URL],
+            "browser_config": {"type": "BrowserConfig", "params": {"headless": True}},
+            "crawler_config": {
+                "type": "CrawlerRunConfig",
+                "params": {
+                    "stream": False,
+                    "cache_mode": "BYPASS",
+                    "extraction_strategy": { # Apply extraction to ALL crawled pages
+                        "type": "JsonCssExtractionStrategy",
+                        "params": {"schema": {"type": "dict", "value": product_schema}}
+                    },
+                    "deep_crawl_strategy": {
+                        "type": "BFSDeepCrawlStrategy",
+                        "params": {
+                            "max_depth": max_depth,
+                            "max_pages": max_pages,
+                            "filter_chain": { # Only crawl HTML on our domain
+                                "type": "FilterChain",
+                                "params": {
+                                    "filters": [
+                                        {"type": "DomainFilter", "params": {"allowed_domains": [DEEP_CRAWL_DOMAIN]}},
+                                        {"type": "ContentTypeFilter", "params": {"allowed_types": ["text/html"]}}
+                                    ]
+                                }
+                            }
+                            # Optional: Add scoring to prioritize product pages for extraction
+                        }
+                    }
+                }
+            }
+        }
+        response = await async_client.post("/crawl", json=payload)
+        response.raise_for_status()
+        data = response.json()
+
+        assert data["success"] is True
+        assert len(data["results"]) > 0
+        # assert len(data["results"]) <= max_pages
+
+        found_extracted_product = False
+        for result in data["results"]:
+            await assert_crawl_result_structure(result)
+            assert result["success"] is True
+            assert "extracted_content" in result
+            if "product_" in result["url"]: # Check product pages specifically
+                 assert result["extracted_content"] is not None
+                 try:
+                     extracted = json.loads(result["extracted_content"])
+                     # Schema returns list even if one base match
+                     assert isinstance(extracted, list)
+                     if extracted:
+                         item = extracted[0]
+                         assert "product_title" in item and item["product_title"]
+                         assert "price" in item and item["price"]
+                         # Specs might be empty list if not found
+                         assert "specs" in item and isinstance(item["specs"], list)
+                         found_extracted_product = True
+                         print(f"Extracted product: {item.get('product_title')}")
+                 except (json.JSONDecodeError, AssertionError, IndexError) as e:
+                      pytest.fail(f"Extraction validation failed for {result['url']}: {e}\nContent: {result['extracted_content']}")
+            # else:
+            #      # Non-product pages might have None or empty list depending on schema match
+            #      assert result["extracted_content"] is None or json.loads(result["extracted_content"]) == []
+
+        assert found_extracted_product, "Did not find any pages where product data was successfully extracted."
+
+    # 5. Deep Crawl with LLM Extraction (Requires Server LLM Setup)
+    async def test_deep_crawl_with_llm_extraction(self, async_client: httpx.AsyncClient):
+        """Test BFS deep crawl combined with LLMExtractionStrategy."""
+        max_depth = 1 # Limit depth to keep LLM calls manageable
+        max_pages = 3
+        payload = {
+            "urls": [DEEP_CRAWL_BASE_URL],
+            "browser_config": {"type": "BrowserConfig", "params": {"headless": True}},
+            "crawler_config": {
+                "type": "CrawlerRunConfig",
+                "params": {
+                    "stream": False,
+                    "cache_mode": "BYPASS",
+                    "extraction_strategy": { # Apply LLM extraction to crawled pages
+                        "type": "LLMExtractionStrategy",
+                        "params": {
+                            "instruction": "Extract the main H1 title and the text content of the first paragraph.",
+                            "llm_config": { # Example override, rely on server default if possible
+                               "type": "LLMConfig",
+                               "params": {"provider": "openai/gpt-4.1-mini"} # Use a cheaper model for testing
+                            },
+                             "schema": { # Expected JSON output
+                                "type": "dict",
+                                "value": {
+                                    "title": "PageContent", "type": "object",
+                                    "properties": {
+                                        "h1_title": {"type": "string"},
+                                        "first_paragraph": {"type": "string"}
+                                    }
+                                }
+                            }
+                        }
+                    },
+                    "deep_crawl_strategy": {
+                        "type": "BFSDeepCrawlStrategy",
+                        "params": {
+                            "max_depth": max_depth,
+                            "max_pages": max_pages,
+                            "filter_chain": {
+                                "type": "FilterChain",
+                                "params": {
+                                    "filters": [
+                                        {"type": "DomainFilter", "params": {"allowed_domains": [DEEP_CRAWL_DOMAIN]}},
+                                        {"type": "ContentTypeFilter", "params": {"allowed_types": ["text/html"]}}
+                                    ]
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        try:
+            response = await async_client.post("/crawl", json=payload)
+            response.raise_for_status()
+            data = response.json()
+        except httpx.HTTPStatusError as e:
+            pytest.fail(f"Deep Crawl + LLM extraction request failed: {e}. Response: {e.response.text}. Check server logs and LLM API key setup.")
+        except httpx.RequestError as e:
+             pytest.fail(f"Deep Crawl + LLM extraction request failed: {e}.")
+
+
+        assert data["success"] is True
+        assert len(data["results"]) > 0
+        assert len(data["results"]) <= max_pages
+
+        found_llm_extraction = False
+        for result in data["results"]:
+            await assert_crawl_result_structure(result)
+            assert result["success"] is True
+            assert "extracted_content" in result
+            assert result["extracted_content"] is not None
+            try:
+                extracted = json.loads(result["extracted_content"])
+                if isinstance(extracted, list): extracted = extracted[0] # Handle list output
+                assert isinstance(extracted, dict)
+                assert "h1_title" in extracted # Check keys based on schema
+                assert "first_paragraph" in extracted
+                found_llm_extraction = True
+                print(f"LLM extracted from {result['url']}: Title='{extracted.get('h1_title')}'")
+            except (json.JSONDecodeError, AssertionError, IndexError, TypeError) as e:
+                pytest.fail(f"LLM extraction validation failed for {result['url']}: {e}\nContent: {result['extracted_content']}")
+
+        assert found_llm_extraction, "LLM extraction did not yield expected data on any crawled page."
+
+
+    # 6. Deep Crawl with SSL Certificate Fetching
+    async def test_deep_crawl_with_ssl(self, async_client: httpx.AsyncClient):
+        """Test BFS deep crawl with fetch_ssl_certificate enabled."""
+        max_depth = 0 # Only fetch for start URL to keep test fast
+        max_pages = 1
+        payload = {
+            "urls": [DEEP_CRAWL_BASE_URL],
+            "browser_config": {"type": "BrowserConfig", "params": {"headless": True}},
+            "crawler_config": {
+                "type": "CrawlerRunConfig",
+                "params": {
+                    "stream": False,
+                    "cache_mode": "BYPASS",
+                    "fetch_ssl_certificate": True, # <-- Enable SSL fetching
+                    "deep_crawl_strategy": {
+                        "type": "BFSDeepCrawlStrategy",
+                        "params": {
+                            "max_depth": max_depth,
+                            "max_pages": max_pages,
+                        }
+                    }
+                }
+            }
+        }
+        response = await async_client.post("/crawl", json=payload)
+        response.raise_for_status()
+        data = response.json()
+
+        assert data["success"] is True
+        assert len(data["results"]) == 1
+        result = data["results"][0]
+
+        await assert_crawl_result_structure(result, check_ssl=True) # <-- Tell helper to check SSL field
+        assert result["success"] is True
+                # Check if SSL info was actually retrieved
+        if result["ssl_certificate"]:
+            # Assert directly using dictionary keys
+            assert isinstance(result["ssl_certificate"], dict) # Verify it's a dict
+            assert "issuer" in result["ssl_certificate"]
+            assert "subject" in result["ssl_certificate"]
+            # --- MODIFIED ASSERTIONS ---
+            assert "not_before" in result["ssl_certificate"] # Check for the actual key
+            assert "not_after" in result["ssl_certificate"]  # Check for the actual key
+            # --- END MODIFICATIONS ---
+            assert "fingerprint" in result["ssl_certificate"] # Check another key
+
+            # This print statement using .get() already works correctly with dictionaries
+            print(f"SSL Issuer Org: {result['ssl_certificate'].get('issuer', {}).get('O', 'N/A')}")
+            print(f"SSL Valid From: {result['ssl_certificate'].get('not_before', 'N/A')}")
+        else:
+            # This part remains the same
+            print("SSL Certificate was null in the result.")
+
+
+    # 7. Deep Crawl with Proxy Rotation (Requires PROXIES env var)
+    async def test_deep_crawl_with_proxies(self, async_client: httpx.AsyncClient):
+        """Test BFS deep crawl using proxy rotation."""
+        proxies = load_proxies_from_env()
+        if not proxies:
+            pytest.skip("Skipping proxy test: PROXIES environment variable not set or empty.")
+
+        print(f"\nTesting with {len(proxies)} proxies loaded from environment.")
+
+        max_depth = 1
+        max_pages = 3
+        payload = {
+            "urls": [DEEP_CRAWL_BASE_URL], # Use the dummy site
+             # Use a BrowserConfig that *might* pick up proxy if set, but rely on CrawlerRunConfig
+            "browser_config": {"type": "BrowserConfig", "params": {"headless": True}},
+            "crawler_config": {
+                "type": "CrawlerRunConfig",
+                "params": {
+                    "stream": False,
+                    "cache_mode": "BYPASS",
+                    "proxy_rotation_strategy": { # <-- Define the strategy
+                        "type": "RoundRobinProxyStrategy",
+                        "params": {
+                             # Convert ProxyConfig dicts back to the serialized format expected by server
+                             "proxies": [{"type": "ProxyConfig", "params": p} for p in proxies]
+                        }
+                    },
+                    "deep_crawl_strategy": {
+                        "type": "BFSDeepCrawlStrategy",
+                        "params": {
+                            "max_depth": max_depth,
+                            "max_pages": max_pages,
+                            "filter_chain": {
+                                "type": "FilterChain",
+                                "params": { "filters": [{"type": "DomainFilter", "params": {"allowed_domains": [DEEP_CRAWL_DOMAIN]}}]}
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        try:
+            response = await async_client.post("/crawl", json=payload)
+            response.raise_for_status()
+            data = response.json()
+        except httpx.HTTPStatusError as e:
+            # Proxies often cause connection errors, catch them
+            pytest.fail(f"Proxy deep crawl failed: {e}. Response: {e.response.text}. Are proxies valid and accessible by the server?")
+        except httpx.RequestError as e:
+             pytest.fail(f"Proxy deep crawl request failed: {e}. Are proxies valid and accessible?")
+
+        assert data["success"] is True
+        assert len(data["results"]) > 0
+        assert len(data["results"]) <= max_pages
+        # Primary assertion is that the crawl succeeded *with* proxy config
+        print(f"Proxy deep crawl completed successfully for {len(data['results'])} pages.")
+
+        # Verifying specific proxy usage requires server logs or custom headers/responses
+
+
+# --- Main Execution Block (for running script directly) ---
+if __name__ == "__main__":
+    pytest_args = ["-v", "-s", __file__]
+    # Example: Run only proxy test
+    # pytest_args.append("-k test_deep_crawl_with_proxies")
+    print(f"Running pytest with args: {pytest_args}")
+    exit_code = pytest.main(pytest_args)
+    print(f"Pytest finished with exit code: {exit_code}")
--- a/tests/docker/test_serialization.py
+++ b/tests/docker/test_serialization.py
@@ -0,0 +1,255 @@
+import inspect
+from typing import Any, Dict
+from enum import Enum
+
+from crawl4ai import LLMConfig
+
+def to_serializable_dict(obj: Any) -> Dict:
+    """
+    Recursively convert an object to a serializable dictionary using {type, params} structure
+    for complex objects.
+    """
+    if obj is None:
+        return None
+        
+    # Handle basic types
+    if isinstance(obj, (str, int, float, bool)):
+        return obj
+        
+    # Handle Enum
+    if isinstance(obj, Enum):
+        return {
+            "type": obj.__class__.__name__,
+            "params": obj.value
+        }
+        
+    # Handle datetime objects
+    if hasattr(obj, 'isoformat'):
+        return obj.isoformat()
+        
+    # Handle lists, tuples, and sets
+    if isinstance(obj, (list, tuple, set)):
+        return [to_serializable_dict(item) for item in obj]
+        
+    # Handle dictionaries - preserve them as-is
+    if isinstance(obj, dict):
+        return {
+            "type": "dict",  # Mark as plain dictionary
+            "value": {str(k): to_serializable_dict(v) for k, v in obj.items()}
+        }
+    
+    # Handle class instances
+    if hasattr(obj, '__class__'):
+        # Get constructor signature
+        sig = inspect.signature(obj.__class__.__init__)
+        params = sig.parameters
+        
+        # Get current values
+        current_values = {}
+        for name, param in params.items():
+            if name == 'self':
+                continue
+                
+            value = getattr(obj, name, param.default)
+            
+            # Only include if different from default, considering empty values
+            if not (is_empty_value(value) and is_empty_value(param.default)):
+                if value != param.default:
+                    current_values[name] = to_serializable_dict(value)
+        
+        return {
+            "type": obj.__class__.__name__,
+            "params": current_values
+        }
+        
+    return str(obj)
+
+def from_serializable_dict(data: Any) -> Any:
+    """
+    Recursively convert a serializable dictionary back to an object instance.
+    """
+    if data is None:
+        return None
+        
+    # Handle basic types
+    if isinstance(data, (str, int, float, bool)):
+        return data
+        
+    # Handle typed data
+    if isinstance(data, dict) and "type" in data:
+        # Handle plain dictionaries
+        if data["type"] == "dict":
+            return {k: from_serializable_dict(v) for k, v in data["value"].items()}
+            
+        # Import from crawl4ai for class instances
+        import crawl4ai
+        cls = getattr(crawl4ai, data["type"])
+        
+        # Handle Enum
+        if issubclass(cls, Enum):
+            return cls(data["params"])
+            
+        # Handle class instances
+        constructor_args = {
+            k: from_serializable_dict(v) for k, v in data["params"].items()
+        }
+        return cls(**constructor_args)
+        
+    # Handle lists
+    if isinstance(data, list):
+        return [from_serializable_dict(item) for item in data]
+        
+    # Handle raw dictionaries (legacy support)
+    if isinstance(data, dict):
+        return {k: from_serializable_dict(v) for k, v in data.items()}
+        
+    return data
+    
+def is_empty_value(value: Any) -> bool:
+    """Check if a value is effectively empty/null."""
+    if value is None:
+        return True
+    if isinstance(value, (list, tuple, set, dict, str)) and len(value) == 0:
+        return True
+    return False
+
+# if __name__ == "__main__":
+#     from crawl4ai import (
+#         CrawlerRunConfig, CacheMode, DefaultMarkdownGenerator, 
+#         PruningContentFilter, BM25ContentFilter, LLMContentFilter,
+#         JsonCssExtractionStrategy, CosineStrategy, RegexChunking,
+#         WebScrapingStrategy, LXMLWebScrapingStrategy
+#     )
+
+#     # Test Case 1: BM25 content filtering through markdown generator
+#     config1 = CrawlerRunConfig(
+#         cache_mode=CacheMode.BYPASS,
+#         markdown_generator=DefaultMarkdownGenerator(
+#             content_filter=BM25ContentFilter(
+#                 user_query="technology articles",
+#                 bm25_threshold=1.2,
+#                 language="english"
+#             )
+#         ),
+#         chunking_strategy=RegexChunking(patterns=[r"\n\n", r"\.\s+"]),
+#         excluded_tags=["nav", "footer", "aside"],
+#         remove_overlay_elements=True
+#     )
+
+#     # Serialize
+#     serialized = to_serializable_dict(config1)
+#     print("\nSerialized Config:")
+#     print(serialized)
+    
+#     # Example output structure would now look like:
+#     """
+#     {
+#         "type": "CrawlerRunConfig",
+#         "params": {
+#             "cache_mode": {
+#                 "type": "CacheMode",
+#                 "params": "bypass"
+#             },
+#             "markdown_generator": {
+#                 "type": "DefaultMarkdownGenerator",
+#                 "params": {
+#                     "content_filter": {
+#                         "type": "BM25ContentFilter",
+#                         "params": {
+#                             "user_query": "technology articles",
+#                             "bm25_threshold": 1.2,
+#                             "language": "english"
+#                         }
+#                     }
+#                 }
+#             }
+#         }
+#     }
+#     """
+    
+#     # Deserialize
+#     deserialized = from_serializable_dict(serialized)
+#     print("\nDeserialized Config:")
+#     print(to_serializable_dict(deserialized))
+    
+#     # Verify they match
+#     assert to_serializable_dict(config1) == to_serializable_dict(deserialized)
+#     print("\nVerification passed: Configuration matches after serialization/deserialization!")
+
+if __name__ == "__main__":
+    from crawl4ai import (
+        CrawlerRunConfig, CacheMode, DefaultMarkdownGenerator, 
+        PruningContentFilter, BM25ContentFilter, LLMContentFilter,
+        JsonCssExtractionStrategy, RegexChunking,
+        WebScrapingStrategy, LXMLWebScrapingStrategy
+    )
+
+    # Test Case 1: BM25 content filtering through markdown generator
+    config1 = CrawlerRunConfig(
+        cache_mode=CacheMode.BYPASS,
+        markdown_generator=DefaultMarkdownGenerator(
+            content_filter=BM25ContentFilter(
+                user_query="technology articles",
+                bm25_threshold=1.2,
+                language="english"
+            )
+        ),
+        chunking_strategy=RegexChunking(patterns=[r"\n\n", r"\.\s+"]),
+        excluded_tags=["nav", "footer", "aside"],
+        remove_overlay_elements=True
+    )
+
+    # Test Case 2: LLM-based extraction with pruning filter
+    schema = {
+        "baseSelector": "article.post",
+        "fields": [
+            {"name": "title", "selector": "h1", "type": "text"},
+            {"name": "content", "selector": ".content", "type": "html"}
+        ]
+    }
+    config2 = CrawlerRunConfig(
+        extraction_strategy=JsonCssExtractionStrategy(schema=schema),
+        markdown_generator=DefaultMarkdownGenerator(
+            content_filter=PruningContentFilter(
+                threshold=0.48,
+                threshold_type="fixed",
+                min_word_threshold=0
+            ),
+            options={"ignore_links": True}
+        ),
+        scraping_strategy=LXMLWebScrapingStrategy()
+    )
+
+    # Test Case 3:LLM content filter
+    config3 = CrawlerRunConfig(
+        markdown_generator=DefaultMarkdownGenerator(
+            content_filter=LLMContentFilter(
+                llm_config = LLMConfig(provider="openai/gpt-4"),
+                instruction="Extract key technical concepts",
+                chunk_token_threshold=2000,
+                overlap_rate=0.1
+            ),
+            options={"ignore_images": True}
+        ),
+        scraping_strategy=WebScrapingStrategy()
+    )
+
+    # Test all configurations
+    test_configs = [config1, config2, config3]
+    
+    for i, config in enumerate(test_configs, 1):
+        print(f"\nTesting Configuration {i}:")
+        
+        # Serialize
+        serialized = to_serializable_dict(config)
+        print(f"\nSerialized Config {i}:")
+        print(serialized)
+        
+        # Deserialize
+        deserialized = from_serializable_dict(serialized)
+        print(f"\nDeserialized Config {i}:")
+        print(to_serializable_dict(deserialized))  # Convert back to dict for comparison
+        
+        # Verify they match
+        assert to_serializable_dict(config) == to_serializable_dict(deserialized)
+        print(f"\nVerification passed: Configuration {i} matches after serialization/deserialization!")
--- a/tests/docker/test_server.py
+++ b/tests/docker/test_server.py
@@ -0,0 +1,146 @@
+import asyncio
+import json
+from typing import Optional
+from urllib.parse import quote
+
+async def test_endpoint(
+    endpoint: str, 
+    url: str, 
+    params: Optional[dict] = None,
+    expected_status: int = 200
+) -> None:
+    """Test an endpoint and print results"""
+    import aiohttp
+    
+    params = params or {}
+    param_str = "&".join(f"{k}={v}" for k, v in params.items())
+    full_url = f"http://localhost:8000/{endpoint}/{quote(url)}"
+    if param_str:
+        full_url += f"?{param_str}"
+        
+    print(f"\nTesting: {full_url}")
+    
+    try:
+        async with aiohttp.ClientSession() as session:
+            async with session.get(full_url) as response:
+                status = response.status
+                try:
+                    data = await response.json()
+                except:
+                    data = await response.text()
+                
+                print(f"Status: {status} (Expected: {expected_status})")
+                if isinstance(data, dict):
+                    print(f"Response: {json.dumps(data, indent=2)}")
+                else:
+                    print(f"Response: {data[:500]}...")  # First 500 chars
+                assert status == expected_status
+                return data
+    except Exception as e:
+        print(f"Error: {str(e)}")
+        return None
+
+async def test_llm_task_completion(task_id: str) -> None:
+    """Poll task until completion"""
+    for _ in range(10):  # Try 10 times
+        result = await test_endpoint("llm", task_id)
+        if result and result.get("status") in ["completed", "failed"]:
+            return result
+        print("Task still processing, waiting 5 seconds...")
+        await asyncio.sleep(5)
+    print("Task timed out")
+
+async def run_tests():
+    print("Starting API Tests...")
+    
+    # Test URLs
+    urls = [
+        "example.com",
+        "https://www.python.org",
+        "https://news.ycombinator.com/news",
+        "https://github.com/trending"
+    ]
+    
+    print("\n=== Testing Markdown Endpoint ===")
+    for url in[] : #urls:
+        # Test different filter types
+        for filter_type in ["raw", "fit", "bm25", "llm"]:
+            params = {"f": filter_type}
+            if filter_type in ["bm25", "llm"]:
+                params["q"] = "extract main content"
+            
+            # Test with and without cache
+            for cache in ["0", "1"]:
+                params["c"] = cache
+                await test_endpoint("md", url, params)
+                await asyncio.sleep(1)  # Be nice to the server
+
+    print("\n=== Testing LLM Endpoint ===")
+    for url in []: # urls:
+        # Test basic extraction
+        result = await test_endpoint(
+            "llm", 
+            url, 
+            {"q": "Extract title and main content"}
+        )
+        if result and "task_id" in result:
+            print("\nChecking task completion...")
+            await test_llm_task_completion(result["task_id"])
+        
+        # Test with schema
+        schema = {
+            "type": "object",
+            "properties": {
+                "title": {"type": "string"},
+                "content": {"type": "string"},
+                "links": {"type": "array", "items": {"type": "string"}}
+            }
+        }
+        result = await test_endpoint(
+            "llm", 
+            url, 
+            {
+                "q": "Extract content with links", 
+                "s": json.dumps(schema),
+                "c": "1"  # Test with cache
+            }
+        )
+        if result and "task_id" in result:
+            print("\nChecking schema task completion...")
+            await test_llm_task_completion(result["task_id"])
+        
+        await asyncio.sleep(2)  # Be nice to the server
+    
+    print("\n=== Testing Error Cases ===")
+    # Test invalid URL
+    await test_endpoint(
+        "md", 
+        "not_a_real_url", 
+        expected_status=500
+    )
+    
+    # Test invalid filter type
+    await test_endpoint(
+        "md", 
+        "example.com", 
+        {"f": "invalid"},
+        expected_status=422
+    )
+    
+    # Test LLM without query
+    await test_endpoint(
+        "llm", 
+        "example.com"
+    )
+    
+    # Test invalid task ID
+    await test_endpoint(
+        "llm", 
+        "llm_invalid_task",
+        expected_status=404
+    )
+    
+    print("\nAll tests completed!")
+
+if __name__ == "__main__":
+    asyncio.run(run_tests())
--- a/tests/docker/test_server_requests.py
+++ b/tests/docker/test_server_requests.py
@@ -0,0 +1,655 @@
+import pytest
+import pytest_asyncio
+import httpx
+import json
+import asyncio
+import os
+from typing import List, Dict, Any, AsyncGenerator
+
+from dotenv import load_dotenv
+load_dotenv()
+
+
+# Optional: Import crawl4ai classes directly for reference/easier payload creation aid
+# You don't strictly NEED these imports for the tests to run against the server,
+# but they help in understanding the structure you are mimicking in JSON.
+from crawl4ai import (
+    BrowserConfig,
+    CrawlerRunConfig,
+    CacheMode,
+    DefaultMarkdownGenerator,
+    PruningContentFilter,
+    BM25ContentFilter,
+    BFSDeepCrawlStrategy,
+    FilterChain,
+    ContentTypeFilter,
+    DomainFilter,
+    CompositeScorer,
+    KeywordRelevanceScorer,
+    PathDepthScorer,
+    JsonCssExtractionStrategy,
+    LLMExtractionStrategy,
+    LLMConfig
+)
+
+# --- Test Configuration ---
+# BASE_URL = os.getenv("CRAWL4AI_TEST_URL", "http://localhost:8020") # Make base URL configurable
+BASE_URL = os.getenv("CRAWL4AI_TEST_URL", "http://localhost:11235") # Make base URL configurable
+# Use a known simple HTML page for basic tests
+SIMPLE_HTML_URL = "https://httpbin.org/html"
+# Use a site suitable for scraping tests
+SCRAPE_TARGET_URL = "http://books.toscrape.com/"
+# Use a site with internal links for deep crawl tests
+DEEP_CRAWL_URL = "https://python.org"
+
+# --- Pytest Fixtures ---
+
+# Use the built-in event_loop fixture from pytest_asyncio
+# The custom implementation was causing issues with closing the loop
+
+@pytest_asyncio.fixture(scope="function")  # Changed to function scope to avoid event loop issues
+async def async_client() -> AsyncGenerator[httpx.AsyncClient, None]:
+    """Provides an async HTTP client"""
+    client = httpx.AsyncClient(base_url=BASE_URL, timeout=120.0)
+    yield client
+    await client.aclose()
+
+# --- Helper Functions ---
+
+async def check_server_health(client: httpx.AsyncClient):
+    """Check if the server is healthy before running tests."""
+    try:
+        response = await client.get("/health")
+        response.raise_for_status()
+        print(f"\nServer healthy: {response.json()}")
+        return True
+    except (httpx.RequestError, httpx.HTTPStatusError) as e:
+        pytest.fail(f"Server health check failed: {e}. Is the server running at {BASE_URL}?", pytrace=False)
+
+async def assert_crawl_result_structure(result: Dict[str, Any]):
+    """Asserts the basic structure of a single crawl result."""
+    assert isinstance(result, dict)
+    assert "url" in result
+    assert "success" in result
+    assert "html" in result
+    # Add more common checks if needed
+
+async def process_streaming_response(response: httpx.Response) -> List[Dict[str, Any]]:
+    """Processes an NDJSON streaming response."""
+    results = []
+    completed = False
+    async for line in response.aiter_lines():
+        if line:
+            try:
+                data = json.loads(line)
+                if data.get("status") == "completed":
+                    completed = True
+                    break # Stop processing after completion marker
+                else:
+                    results.append(data)
+            except json.JSONDecodeError:
+                pytest.fail(f"Failed to decode JSON line: {line}")
+    assert completed, "Streaming response did not end with a completion marker."
+    return results
+
+
+# --- Test Class ---
+
+@pytest.mark.asyncio
+class TestCrawlEndpoints:
+
+    @pytest_asyncio.fixture(autouse=True)
+    async def check_health_before_tests(self, async_client: httpx.AsyncClient):
+        """Fixture to ensure server is healthy before each test in the class."""
+        await check_server_health(async_client)
+
+    # 1. Simple Requests (Primitives)
+    async def test_simple_crawl_single_url(self, async_client: httpx.AsyncClient):
+        """Test /crawl with a single URL and simple config values."""
+        payload = {
+            "urls": [SIMPLE_HTML_URL],
+            "browser_config": {
+                "type": "BrowserConfig",
+                "params": {
+                    "headless": True,
+                }
+            },
+            "crawler_config": {
+                "type": "CrawlerRunConfig",
+                "params": {
+                    "stream": False, # Explicitly false for /crawl
+                    "screenshot": False,
+                    "cache_mode": CacheMode.BYPASS.value # Use enum value
+                }
+            }
+        }
+        try:
+            response = await async_client.post("/crawl", json=payload)
+            print(f"Response status: {response.status_code}")
+            response.raise_for_status()
+            data = response.json()
+        except httpx.HTTPStatusError as e:
+            print(f"Server error: {e}")
+            print(f"Response content: {e.response.text}")
+            raise
+
+        assert data["success"] is True
+        assert isinstance(data["results"], list)
+        assert len(data["results"]) == 1
+        result = data["results"][0]
+        await assert_crawl_result_structure(result)
+        assert result["success"] is True
+        assert result["url"] == SIMPLE_HTML_URL
+        assert "<h1>Herman Melville - Moby-Dick</h1>" in result["html"]
+        # We don't specify a markdown generator in this test, so don't make assumptions about markdown field
+        # It might be null, missing, or populated depending on the server's default behavior
+
+    async def test_simple_crawl_single_url_streaming(self, async_client: httpx.AsyncClient):
+        """Test /crawl/stream with a single URL and simple config values."""
+        payload = {
+            "urls": [SIMPLE_HTML_URL],
+            "browser_config": {
+                "type": "BrowserConfig",
+                "params": {
+                    "headless": True,
+                }
+            },
+            "crawler_config": {
+                "type": "CrawlerRunConfig",
+                "params": {
+                    "stream": True, # Must be true for /crawl/stream
+                    "screenshot": False,
+                    "cache_mode": CacheMode.BYPASS.value
+                }
+            }
+        }
+        async with async_client.stream("POST", "/crawl/stream", json=payload) as response:
+            response.raise_for_status()
+            results = await process_streaming_response(response)
+
+        assert len(results) == 1
+        result = results[0]
+        await assert_crawl_result_structure(result)
+        assert result["success"] is True
+        assert result["url"] == SIMPLE_HTML_URL
+        assert "<h1>Herman Melville - Moby-Dick</h1>" in result["html"]
+
+
+    # 2. Multi-URL and Dispatcher
+    async def test_multi_url_crawl(self, async_client: httpx.AsyncClient):
+        """Test /crawl with multiple URLs, implicitly testing dispatcher."""
+        urls = [SIMPLE_HTML_URL, "https://httpbin.org/links/10/0"]
+        payload = {
+            "urls": urls,
+            "browser_config": {
+                "type": "BrowserConfig",
+                "params": {"headless": True}
+            },
+            "crawler_config": {
+                "type": "CrawlerRunConfig",
+                "params": {"stream": False, "cache_mode": CacheMode.BYPASS.value}
+            }
+        }
+        try:
+            print(f"Sending deep crawl request to server...")
+            response = await async_client.post("/crawl", json=payload)
+            print(f"Response status: {response.status_code}")
+            
+            if response.status_code >= 400:
+                error_detail = response.json().get('detail', 'No detail provided')
+                print(f"Error detail: {error_detail}")
+                print(f"Full response: {response.text}")
+            
+            response.raise_for_status()
+            data = response.json()
+        except httpx.HTTPStatusError as e:
+            print(f"Server error status: {e.response.status_code}")
+            print(f"Server error response: {e.response.text}")
+            try:
+                error_json = e.response.json()
+                print(f"Parsed error: {error_json}")
+            except:
+                print("Could not parse error response as JSON")
+            raise
+
+        assert data["success"] is True
+        assert isinstance(data["results"], list)
+        assert len(data["results"]) == len(urls)
+        for result in data["results"]:
+            await assert_crawl_result_structure(result)
+            assert result["success"] is True
+            assert result["url"] in urls
+
+    async def test_multi_url_crawl_streaming(self, async_client: httpx.AsyncClient):
+        """Test /crawl/stream with multiple URLs."""
+        urls = [SIMPLE_HTML_URL, "https://httpbin.org/links/10/0"]
+        payload = {
+            "urls": urls,
+            "browser_config": {
+                "type": "BrowserConfig",
+                "params": {"headless": True}
+            },
+            "crawler_config": {
+                "type": "CrawlerRunConfig",
+                "params": {"stream": True, "cache_mode": CacheMode.BYPASS.value}
+            }
+        }
+        async with async_client.stream("POST", "/crawl/stream", json=payload) as response:
+            response.raise_for_status()
+            results = await process_streaming_response(response)
+
+        assert len(results) == len(urls)
+        processed_urls = set()
+        for result in results:
+            await assert_crawl_result_structure(result)
+            assert result["success"] is True
+            assert result["url"] in urls
+            processed_urls.add(result["url"])
+        assert processed_urls == set(urls) # Ensure all URLs were processed
+
+
+    # 3. Class Values and Nested Classes (Markdown Generator)
+    async def test_crawl_with_markdown_pruning_filter(self, async_client: httpx.AsyncClient):
+        """Test /crawl with MarkdownGenerator using PruningContentFilter."""
+        payload = {
+            "urls": [SIMPLE_HTML_URL],
+            "browser_config": {"type": "BrowserConfig", "params": {"headless": True}},
+            "crawler_config": {
+                "type": "CrawlerRunConfig",
+                "params": {
+                    "cache_mode": CacheMode.ENABLED.value, # Test different cache mode
+                    "markdown_generator": {
+                        "type": "DefaultMarkdownGenerator",
+                        "params": {
+                            "content_filter": {
+                                "type": "PruningContentFilter",
+                                "params": {
+                                    "threshold": 0.5, # Example param
+                                    "threshold_type": "relative"
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        try:
+            print(f"Sending deep crawl request to server...")
+            response = await async_client.post("/crawl", json=payload)
+            print(f"Response status: {response.status_code}")
+            
+            if response.status_code >= 400:
+                error_detail = response.json().get('detail', 'No detail provided')
+                print(f"Error detail: {error_detail}")
+                print(f"Full response: {response.text}")
+            
+            response.raise_for_status()
+            data = response.json()
+        except httpx.HTTPStatusError as e:
+            print(f"Server error status: {e.response.status_code}")
+            print(f"Server error response: {e.response.text}")
+            try:
+                error_json = e.response.json()
+                print(f"Parsed error: {error_json}")
+            except:
+                print("Could not parse error response as JSON")
+            raise
+
+        assert data["success"] is True
+        assert len(data["results"]) == 1
+        result = data["results"][0]
+        await assert_crawl_result_structure(result)
+        assert result["success"] is True
+        assert "markdown" in result
+        assert isinstance(result["markdown"], dict)
+        assert "raw_markdown" in result["markdown"]
+        assert "fit_markdown" in result["markdown"] # Pruning creates fit_markdown
+        assert "Moby-Dick" in result["markdown"]["raw_markdown"]
+        # Fit markdown content might be different/shorter due to pruning
+        assert len(result["markdown"]["fit_markdown"]) <= len(result["markdown"]["raw_markdown"])
+
+    async def test_crawl_with_markdown_bm25_filter(self, async_client: httpx.AsyncClient):
+        """Test /crawl with MarkdownGenerator using BM25ContentFilter."""
+        payload = {
+            "urls": [SIMPLE_HTML_URL],
+            "browser_config": {"type": "BrowserConfig", "params": {"headless": True}},
+            "crawler_config": {
+                "type": "CrawlerRunConfig",
+                "params": {
+                    "markdown_generator": {
+                        "type": "DefaultMarkdownGenerator",
+                        "params": {
+                            "content_filter": {
+                                "type": "BM25ContentFilter",
+                                "params": {
+                                    "user_query": "Herman Melville", # Query for BM25
+                                    "bm25_threshold": 0.1, # Lower threshold to increase matches
+                                    "language": "english"  # Valid parameters
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        try:
+            print(f"Payload for BM25 test: {json.dumps(payload)}")
+            response = await async_client.post("/crawl", json=payload)
+            print(f"Response status: {response.status_code}")
+            
+            if response.status_code >= 400:
+                error_detail = response.json().get('detail', 'No detail provided')
+                print(f"Error detail: {error_detail}")
+                print(f"Full response: {response.text}")
+            
+            response.raise_for_status()
+            data = response.json()
+        except httpx.HTTPStatusError as e:
+            print(f"Server error status: {e.response.status_code}")
+            print(f"Server error response: {e.response.text}")
+            try:
+                error_json = e.response.json()
+                print(f"Parsed error: {error_json}")
+            except:
+                print("Could not parse error response as JSON")
+            raise
+
+        assert data["success"] is True
+        assert len(data["results"]) == 1
+        result = data["results"][0]
+        await assert_crawl_result_structure(result)
+        assert result["success"] is True
+        assert "markdown" in result
+        assert isinstance(result["markdown"], dict)
+        assert "raw_markdown" in result["markdown"]
+        assert "fit_markdown" in result["markdown"] # BM25 creates fit_markdown
+        
+        # Print values for debug
+        print(f"Raw markdown length: {len(result['markdown']['raw_markdown'])}")
+        print(f"Fit markdown length: {len(result['markdown']['fit_markdown'])}")
+        
+        # Either fit_markdown has content (possibly including our query terms)
+        # or it might be empty if no good BM25 matches were found
+        # Don't assert specific content since it can be environment-dependent
+
+
+    # 4. Deep Crawling
+    async def test_deep_crawl(self, async_client: httpx.AsyncClient):
+        """Test /crawl with a deep crawl strategy."""
+        payload = {
+            "urls": [DEEP_CRAWL_URL], # Start URL
+            "browser_config": {"type": "BrowserConfig", "params": {"headless": True}},
+            "crawler_config": {
+                "type": "CrawlerRunConfig",
+                "params": {
+                    "stream": False,
+                    "cache_mode": CacheMode.BYPASS.value,
+                    "deep_crawl_strategy": {
+                        "type": "BFSDeepCrawlStrategy",
+                        "params": {
+                            "max_depth": 1, # Limit depth for testing speed
+                            "max_pages": 5, # Limit pages to crawl
+                            "filter_chain": {
+                                "type": "FilterChain",
+                                "params": {
+                                    "filters": [
+                                        {
+                                            "type": "ContentTypeFilter",
+                                            "params": {"allowed_types": ["text/html"]}
+                                        },
+                                        {
+                                            "type": "DomainFilter",
+                                            "params": {"allowed_domains": ["python.org", "docs.python.org"]} # Include important subdomains
+                                        }
+                                    ]
+                                }
+                            },
+                            "url_scorer": {
+                                "type": "CompositeScorer",
+                                "params": {
+                                    "scorers": [
+                                        {
+                                            "type": "KeywordRelevanceScorer",
+                                            "params": {"keywords": ["documentation", "tutorial"]}
+                                        },
+                                        {
+                                            "type": "PathDepthScorer",
+                                            "params": {"weight": 0.5, "optimal_depth": 2}
+                                        }
+                                    ]
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        try:
+            print(f"Sending deep crawl request to server...")
+            response = await async_client.post("/crawl", json=payload)
+            print(f"Response status: {response.status_code}")
+            
+            if response.status_code >= 400:
+                error_detail = response.json().get('detail', 'No detail provided')
+                print(f"Error detail: {error_detail}")
+                print(f"Full response: {response.text}")
+            
+            response.raise_for_status()
+            data = response.json()
+        except httpx.HTTPStatusError as e:
+            print(f"Server error status: {e.response.status_code}")
+            print(f"Server error response: {e.response.text}")
+            try:
+                error_json = e.response.json()
+                print(f"Parsed error: {error_json}")
+            except:
+                print("Could not parse error response as JSON")
+            raise
+
+        assert data["success"] is True
+        assert isinstance(data["results"], list)
+        # Expect more than 1 result due to deep crawl (start URL + crawled links)
+        assert len(data["results"]) > 1
+        assert len(data["results"]) <= 6 # Start URL + max_links=5
+
+        start_url_found = False
+        crawled_urls_found = False
+        for result in data["results"]:
+            await assert_crawl_result_structure(result)
+            assert result["success"] is True
+            
+            # Print URL for debugging
+            print(f"Crawled URL: {result['url']}")
+            
+            # Allow URLs that contain python.org (including subdomains like docs.python.org)
+            assert "python.org" in result["url"]
+            if result["url"] == DEEP_CRAWL_URL:
+                start_url_found = True
+            else:
+                crawled_urls_found = True
+
+        assert start_url_found
+        assert crawled_urls_found
+
+
+    # 5. Extraction without LLM (JSON/CSS)
+    async def test_json_css_extraction(self, async_client: httpx.AsyncClient):
+        """Test /crawl with JsonCssExtractionStrategy."""
+        payload = {
+            "urls": [SCRAPE_TARGET_URL],
+            "browser_config": {"type": "BrowserConfig", "params": {"headless": True}},
+            "crawler_config": {
+                "type": "CrawlerRunConfig",
+                "params": {
+                    "cache_mode": CacheMode.BYPASS.value,
+                    "extraction_strategy": {
+                        "type": "JsonCssExtractionStrategy",
+                        "params": {
+                            "schema": { 
+                                "type": "dict", # IMPORTANT: Wrap schema dict with type/value structure
+                                "value": {
+                                    "name": "BookList",
+                                    "baseSelector": "ol.row li.col-xs-6", # Select each book item
+                                    "fields": [
+                                        {"name": "title", "selector": "article.product_pod h3 a", "type": "attribute", "attribute": "title"},
+                                        {"name": "price", "selector": "article.product_pod .price_color", "type": "text"},
+                                        {"name": "rating", "selector": "article.product_pod p.star-rating", "type": "attribute", "attribute": "class"}
+                                    ]
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        try:
+            print(f"Sending deep crawl request to server...")
+            response = await async_client.post("/crawl", json=payload)
+            print(f"Response status: {response.status_code}")
+            
+            if response.status_code >= 400:
+                error_detail = response.json().get('detail', 'No detail provided')
+                print(f"Error detail: {error_detail}")
+                print(f"Full response: {response.text}")
+            
+            response.raise_for_status()
+            data = response.json()
+        except httpx.HTTPStatusError as e:
+            print(f"Server error status: {e.response.status_code}")
+            print(f"Server error response: {e.response.text}")
+            try:
+                error_json = e.response.json()
+                print(f"Parsed error: {error_json}")
+            except:
+                print("Could not parse error response as JSON")
+            raise
+
+        assert data["success"] is True
+        assert len(data["results"]) == 1
+        result = data["results"][0]
+        await assert_crawl_result_structure(result)
+        assert result["success"] is True
+        assert "extracted_content" in result
+        assert result["extracted_content"] is not None
+
+        # Extracted content should be a JSON string representing a list of dicts
+        try:
+            extracted_data = json.loads(result["extracted_content"])
+            assert isinstance(extracted_data, list)
+            assert len(extracted_data) > 0 # Should find some books
+            # Check structure of the first extracted item
+            first_item = extracted_data[0]
+            assert "title" in first_item
+            assert "price" in first_item
+            assert "rating" in first_item
+            assert "star-rating" in first_item["rating"] # e.g., "star-rating Three"
+        except (json.JSONDecodeError, AssertionError) as e:
+            pytest.fail(f"Extracted content parsing or validation failed: {e}\nContent: {result['extracted_content']}")
+
+
+    # 6. Extraction with LLM
+    async def test_llm_extraction(self, async_client: httpx.AsyncClient):
+        """
+        Test /crawl with LLMExtractionStrategy.
+        NOTE: Requires the server to have appropriate LLM API keys (e.g., OPENAI_API_KEY)
+              configured via .llm.env or environment variables.
+              This test uses the default provider configured in the server's config.yml.
+        """
+        payload = {
+            "urls": [SIMPLE_HTML_URL],
+            "browser_config": {"type": "BrowserConfig", "params": {"headless": True}},
+            "crawler_config": {
+                "type": "CrawlerRunConfig",
+                "params": {
+                    "cache_mode": CacheMode.BYPASS.value,
+                    "extraction_strategy": {
+                        "type": "LLMExtractionStrategy",
+                        "params": {
+                            "instruction": "Extract the main title and the author mentioned in the text into JSON.",
+                            # LLMConfig is implicitly defined by server's config.yml and .llm.env
+                            # If you needed to override provider/token PER REQUEST:
+                            "llm_config": {
+                               "type": "LLMConfig",
+                               "params": {
+                                  "provider": "openai/gpt-4o", # Example override
+                                  "api_token": os.getenv("OPENAI_API_KEY") # Example override
+                               }
+                            },
+                            "schema": { # Optional: Provide a schema for structured output
+                                "type": "dict", # IMPORTANT: Wrap schema dict
+                                "value": {
+                                    "title": "Book Info",
+                                    "type": "object",
+                                    "properties": {
+                                        "title": {"type": "string", "description": "The main title of the work"},
+                                        "author": {"type": "string", "description": "The author of the work"}
+                                    },
+                                     "required": ["title", "author"]
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        try:
+            response = await async_client.post("/crawl", json=payload)
+            response.raise_for_status() # Will raise if server returns 500 (e.g., bad API key)
+            data = response.json()
+        except httpx.HTTPStatusError as e:
+            # Catch potential server errors (like 500 due to missing/invalid API keys)
+            pytest.fail(f"LLM extraction request failed: {e}. Response: {e.response.text}. Check server logs and ensure API keys are correctly configured for the server.")
+        except httpx.RequestError as e:
+             pytest.fail(f"LLM extraction request failed: {e}.")
+
+        assert data["success"] is True
+        assert len(data["results"]) == 1
+        result = data["results"][0]
+        await assert_crawl_result_structure(result)
+        assert result["success"] is True
+        assert "extracted_content" in result
+        assert result["extracted_content"] is not None
+
+        # Extracted content should be JSON (because we provided a schema)
+        try:
+            extracted_data = json.loads(result["extracted_content"])
+            print(f"\nLLM Extracted Data: {extracted_data}") # Print for verification
+            
+            # Handle both dict and list formats (server returns a list)
+            if isinstance(extracted_data, list):
+                assert len(extracted_data) > 0
+                extracted_item = extracted_data[0]  # Take first item
+                assert isinstance(extracted_item, dict)
+                assert "title" in extracted_item
+                assert "author" in extracted_item
+                assert "Moby-Dick" in extracted_item.get("title", "")
+                assert "Herman Melville" in extracted_item.get("author", "")
+            else:
+                assert isinstance(extracted_data, dict)
+                assert "title" in extracted_data
+                assert "author" in extracted_data
+                assert "Moby-Dick" in extracted_data.get("title", "")
+                assert "Herman Melville" in extracted_data.get("author", "")
+        except (json.JSONDecodeError, AssertionError) as e:
+            pytest.fail(f"LLM extracted content parsing or validation failed: {e}\nContent: {result['extracted_content']}")
+        except Exception as e: # Catch any other unexpected error
+            pytest.fail(f"An unexpected error occurred during LLM result processing: {e}\nContent: {result['extracted_content']}")
+            
+if __name__ == "__main__":
+    # Define arguments for pytest programmatically
+    # -v: verbose output
+    # -s: show print statements immediately (useful for debugging)
+    # __file__: tells pytest to run tests in the current file
+    pytest_args = ["-v", "-s", __file__]
+
+    # You can add more pytest arguments here if needed, for example:
+    # '-k test_llm_extraction': Run only the LLM test function
+    # pytest_args.append("-k test_llm_extraction")
+
+    print(f"Running pytest with args: {pytest_args}")
+
+    # Execute pytest
+    exit_code = pytest.main(pytest_args)
+
+    print(f"Pytest finished with exit code: {exit_code}")
--- a/tests/docker/test_server_token.py
+++ b/tests/docker/test_server_token.py
@@ -0,0 +1,212 @@
+import asyncio
+import json
+from typing import Optional
+from urllib.parse import quote
+
+async def get_token(session, email: str = "test@example.com") -> str:
+    """Fetch a JWT token from the /token endpoint."""
+    url = "http://localhost:8000/token"
+    payload = {"email": email}
+    print(f"\nFetching token from {url} with email: {email}")
+    try:
+        async with session.post(url, json=payload) as response:
+            status = response.status
+            data = await response.json()
+            print(f"Token Response Status: {status}")
+            print(f"Token Response: {json.dumps(data, indent=2)}")
+            if status == 200:
+                return data["access_token"]
+            else:
+                raise Exception(f"Failed to get token: {data.get('detail', 'Unknown error')}")
+    except Exception as e:
+        print(f"Error fetching token: {str(e)}")
+        raise
+
+async def test_endpoint(
+    session,
+    endpoint: str,
+    url: str,
+    token: str,
+    params: Optional[dict] = None,
+    expected_status: int = 200
+) -> Optional[dict]:
+    """Test an endpoint with token and print results."""
+    params = params or {}
+    param_str = "&".join(f"{k}={v}" for k, v in params.items())
+    full_url = f"http://localhost:8000/{endpoint}/{quote(url)}"
+    if param_str:
+        full_url += f"?{param_str}"
+    
+    headers = {"Authorization": f"Bearer {token}"}
+    print(f"\nTesting: {full_url}")
+    
+    try:
+        async with session.get(full_url, headers=headers) as response:
+            status = response.status
+            try:
+                data = await response.json()
+            except:
+                data = await response.text()
+            
+            print(f"Status: {status} (Expected: {expected_status})")
+            if isinstance(data, dict):
+                print(f"Response: {json.dumps(data, indent=2)}")
+            else:
+                print(f"Response: {data[:500]}...")  # First 500 chars
+            assert status == expected_status, f"Expected {expected_status}, got {status}"
+            return data
+    except Exception as e:
+        print(f"Error: {str(e)}")
+        return None
+
+
+async def test_stream_crawl(session, token: str):
+    """Test the /crawl/stream endpoint with multiple URLs."""
+    url = "http://localhost:8000/crawl/stream"
+    payload = {
+        "urls": [
+            "https://example.com",
+            "https://example.com/page1",  # Replicated example.com with variation
+            "https://example.com/page2",  # Replicated example.com with variation
+            "https://example.com/page3",  # Replicated example.com with variation
+            # "https://www.python.org",
+            # "https://news.ycombinator.com/news"
+        ],
+        "browser_config": {"headless": True, "viewport": {"width": 1200}},
+        "crawler_config": {"stream": True, "cache_mode": "bypass"}
+    }
+    headers = {"Authorization": f"Bearer {token}"}
+    print(f"\nTesting Streaming Crawl: {url}")
+    print(f"Payload: {json.dumps(payload, indent=2)}")
+    
+    try:
+        async with session.post(url, json=payload, headers=headers) as response:
+            status = response.status
+            print(f"Status: {status} (Expected: 200)")
+            assert status == 200, f"Expected 200, got {status}"
+            
+            # Read streaming response line-by-line (NDJSON)
+            async for line in response.content:
+                if line:
+                    data = json.loads(line.decode('utf-8').strip())
+                    print(f"Streamed Result: {json.dumps(data, indent=2)}")
+    except Exception as e:
+        print(f"Error in streaming crawl test: {str(e)}")
+
+async def run_tests():
+    import aiohttp
+    print("Starting API Tests...")
+    
+    # Test URLs
+    urls = [
+        "example.com",
+        "https://www.python.org",
+        "https://news.ycombinator.com/news",
+        "https://github.com/trending"
+    ]
+    
+    async with aiohttp.ClientSession() as session:
+        # Fetch token once and reuse it
+        token = await get_token(session)
+        if not token:
+            print("Aborting tests due to token failure!")
+            return
+        
+        print("\n=== Testing Crawl Endpoint ===")
+        crawl_payload = {
+            "urls": ["https://example.com"],
+            "browser_config": {"headless": True},
+            "crawler_config": {"stream": False}
+        }
+        async with session.post(
+            "http://localhost:8000/crawl",
+            json=crawl_payload,
+            headers={"Authorization": f"Bearer {token}"}
+        ) as response:
+            status = response.status
+            data = await response.json()
+            print(f"\nCrawl Endpoint Status: {status}")
+            print(f"Crawl Response: {json.dumps(data, indent=2)}")
+        
+
+        print("\n=== Testing Crawl Stream Endpoint ===")
+        await test_stream_crawl(session, token)
+
+        print("\n=== Testing Markdown Endpoint ===")
+        for url in []: #urls:
+            for filter_type in ["raw", "fit", "bm25", "llm"]:
+                params = {"f": filter_type}
+                if filter_type in ["bm25", "llm"]:
+                    params["q"] = "extract main content"
+                
+                for cache in ["0", "1"]:
+                    params["c"] = cache
+                    await test_endpoint(session, "md", url, token, params)
+                    await asyncio.sleep(1)  # Be nice to the server
+
+        print("\n=== Testing LLM Endpoint ===")
+        for url in urls:
+            # Test basic extraction (direct response now)
+            result = await test_endpoint(
+                session,
+                "llm",
+                url,
+                token,
+                {"q": "Extract title and main content"}
+            )
+            
+            # Test with schema (direct response)
+            schema = {
+                "type": "object",
+                "properties": {
+                    "title": {"type": "string"},
+                    "content": {"type": "string"},
+                    "links": {"type": "array", "items": {"type": "string"}}
+                }
+            }
+            result = await test_endpoint(
+                session,
+                "llm",
+                url,
+                token,
+                {
+                    "q": "Extract content with links",
+                    "s": json.dumps(schema),
+                    "c": "1"  # Test with cache
+                }
+            )
+            await asyncio.sleep(2)  # Be nice to the server
+        
+        print("\n=== Testing Error Cases ===")
+        # Test invalid URL
+        await test_endpoint(
+            session,
+            "md",
+            "not_a_real_url",
+            token,
+            expected_status=500
+        )
+        
+        # Test invalid filter type
+        await test_endpoint(
+            session,
+            "md",
+            "example.com",
+            token,
+            {"f": "invalid"},
+            expected_status=422
+        )
+        
+        # Test LLM without query (should fail per your server logic)
+        await test_endpoint(
+            session,
+            "llm",
+            "example.com",
+            token,
+            expected_status=400
+        )
+        
+    print("\nAll tests completed!")
+
+if __name__ == "__main__":
+    asyncio.run(run_tests())
--- a/tests/general/generate_dummy_site.py
+++ b/tests/general/generate_dummy_site.py
@@ -0,0 +1,335 @@
+# ==== File: build_dummy_site.py ====
+
+import os
+import random
+import argparse
+from pathlib import Path
+from urllib.parse import quote
+
+# --- Configuration ---
+NUM_CATEGORIES = 3
+NUM_SUBCATEGORIES_PER_CAT = 2 # Results in NUM_CATEGORIES * NUM_SUBCATEGORIES_PER_CAT total L2 categories
+NUM_PRODUCTS_PER_SUBCAT = 5 # Products listed on L3 pages
+MAX_DEPTH_TARGET = 5 # Explicitly set target depth
+
+# --- Helper Functions ---
+
+def generate_lorem(words=20):
+    """Generates simple placeholder text."""
+    lorem_words = ["lorem", "ipsum", "dolor", "sit", "amet", "consectetur",
+                   "adipiscing", "elit", "sed", "do", "eiusmod", "tempor",
+                   "incididunt", "ut", "labore", "et", "dolore", "magna", "aliqua"]
+    return " ".join(random.choice(lorem_words) for _ in range(words)).capitalize() + "."
+
+def create_html_page(filepath: Path, title: str, body_content: str, breadcrumbs: list = [], head_extras: str = ""):
+    """Creates an HTML file with basic structure and inline CSS."""
+    os.makedirs(filepath.parent, exist_ok=True)
+
+    # Generate breadcrumb HTML using the 'link' provided in the breadcrumbs list
+    breadcrumb_html = ""
+    if breadcrumbs:
+        links_html = " » ".join(f'<a href="{bc["link"]}">{bc["name"]}</a>' for bc in breadcrumbs)
+        breadcrumb_html = f"<nav class='breadcrumbs'>{links_html} » {title}</nav>"
+
+    # Basic CSS for structure identification (kept the same)
+    css = """
+<style>
+  body {
+    font-family: sans-serif;
+    padding: 20px;
+    background-color: #1e1e1e;
+    color: #d1d1d1;
+  }
+
+  .container {
+    max-width: 960px;
+    margin: auto;
+    background: #2c2c2c;
+    padding: 20px;
+    border-radius: 5px;
+    box-shadow: 0 2px 5px rgba(0, 0, 0, 0.5);
+  }
+
+  h1, h2 {
+    color: #ccc;
+  }
+
+  a {
+    color: #9bcdff;
+    text-decoration: none;
+  }
+
+  a:hover {
+    text-decoration: underline;
+  }
+
+  ul {
+    list-style: none;
+    padding-left: 0;
+  }
+
+  li {
+    margin-bottom: 10px;
+  }
+
+  .category-link,
+  .subcategory-link,
+  .product-link,
+  .details-link,
+  .reviews-link {
+    display: block;
+    padding: 8px;
+    background-color: #3a3a3a;
+    border-radius: 3px;
+  }
+
+  .product-preview {
+    border: 1px solid #444;
+    padding: 10px;
+    margin-bottom: 10px;
+    border-radius: 4px;
+    background-color: #2a2a2a;
+  }
+
+  .product-title {
+    color: #d1d1d1;
+  }
+
+  .product-price {
+    font-weight: bold;
+    color: #85e085;
+  }
+
+  .product-description,
+  .product-specs,
+  .product-reviews {
+    margin-top: 15px;
+    line-height: 1.6;
+  }
+
+  .product-specs li {
+    margin-bottom: 5px;
+    font-size: 0.9em;
+  }
+
+  .spec-name {
+    font-weight: bold;
+  }
+
+  .breadcrumbs {
+    margin-bottom: 20px;
+    font-size: 0.9em;
+    color: #888;
+  }
+
+  .breadcrumbs a {
+    color: #9bcdff;
+  }
+</style>
+    """
+    html_content = f"""<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>{title} - FakeShop</title>
+    {head_extras}
+    {css}
+</head>
+<body>
+    <div class="container">
+        {breadcrumb_html}
+        <h1>{title}</h1>
+        {body_content}
+    </div>
+</body>
+</html>"""
+    with open(filepath, "w", encoding="utf-8") as f:
+        f.write(html_content)
+    # Keep print statement concise for clarity
+    # print(f"Created: {filepath}")
+
+def generate_site(base_dir: Path, site_name: str = "FakeShop", base_path: str = ""):
+    """Generates the dummy website structure."""
+    base_dir.mkdir(parents=True, exist_ok=True)
+
+    # --- Clean and prepare the base path for URL construction ---
+    # Ensure it starts with '/' if not empty, and remove any trailing '/'
+    if base_path:
+        full_base_path = "/" + base_path.strip('/')
+    else:
+        full_base_path = "" # Represents the root
+
+    print(f"Using base path for links: '{full_base_path}'")
+
+    # --- Level 0: Homepage ---
+    home_body = "<h2>Welcome to FakeShop!</h2><p>Your one-stop shop for imaginary items.</p><h3>Categories:</h3>\n<ul>"
+    # Define the *actual* link path for the homepage breadcrumb
+    home_link_path = f"{full_base_path}/index.html"
+    breadcrumbs_home = [{"name": "Home", "link": home_link_path}] # Base breadcrumb
+
+    # Links *within* the page content should remain relative
+    for i in range(NUM_CATEGORIES):
+        cat_name = f"Category-{i+1}"
+        cat_folder_name = quote(cat_name.lower().replace(" ", "-"))
+        # This path is relative to the current directory (index.html)
+        cat_relative_page_path = f"{cat_folder_name}/index.html"
+        home_body += f'<li><a class="category-link" href="{cat_relative_page_path}">{cat_name}</a> - {generate_lorem(10)}</li>'
+    home_body += "</ul>"
+    create_html_page(base_dir / "index.html", "Homepage", home_body, []) # No breadcrumbs *on* the homepage itself
+
+    # --- Levels 1-5 ---
+    for i in range(NUM_CATEGORIES):
+        cat_name = f"Category-{i+1}"
+        cat_folder_name = quote(cat_name.lower().replace(" ", "-"))
+        cat_dir = base_dir / cat_folder_name
+        # This is the *absolute* path for the breadcrumb link
+        cat_link_path = f"{full_base_path}/{cat_folder_name}/index.html"
+        # Update breadcrumbs list for this level
+        breadcrumbs_cat = breadcrumbs_home + [{"name": cat_name, "link": cat_link_path}]
+
+        # --- Level 1: Category Page ---
+        cat_body = f"<p>{generate_lorem(15)} for {cat_name}.</p><h3>Sub-Categories:</h3>\n<ul>"
+        for j in range(NUM_SUBCATEGORIES_PER_CAT):
+            subcat_name = f"{cat_name}-Sub-{j+1}"
+            subcat_folder_name = quote(subcat_name.lower().replace(" ", "-"))
+            # Path relative to the category page
+            subcat_relative_page_path = f"{subcat_folder_name}/index.html"
+            cat_body += f'<li><a class="subcategory-link" href="{subcat_relative_page_path}">{subcat_name}</a> - {generate_lorem(8)}</li>'
+        cat_body += "</ul>"
+        # Pass the updated breadcrumbs list
+        create_html_page(cat_dir / "index.html", cat_name, cat_body, breadcrumbs_home) # Parent breadcrumb needed here
+
+        for j in range(NUM_SUBCATEGORIES_PER_CAT):
+            subcat_name = f"{cat_name}-Sub-{j+1}"
+            subcat_folder_name = quote(subcat_name.lower().replace(" ", "-"))
+            subcat_dir = cat_dir / subcat_folder_name
+            # Absolute path for the breadcrumb link
+            subcat_link_path = f"{full_base_path}/{cat_folder_name}/{subcat_folder_name}/index.html"
+            # Update breadcrumbs list for this level
+            breadcrumbs_subcat = breadcrumbs_cat + [{"name": subcat_name, "link": subcat_link_path}]
+
+            # --- Level 2: Sub-Category Page (Product List) ---
+            subcat_body = f"<p>Explore products in {subcat_name}. {generate_lorem(12)}</p><h3>Products:</h3>\n<ul class='product-list'>"
+            for k in range(NUM_PRODUCTS_PER_SUBCAT):
+                prod_id = f"P{i+1}{j+1}{k+1:03d}" # e.g., P11001
+                prod_name = f"{subcat_name} Product {k+1} ({prod_id})"
+                # Filename relative to the subcategory page
+                prod_filename = f"product_{prod_id}.html"
+                # Absolute path for the breadcrumb link
+                prod_link_path = f"{full_base_path}/{cat_folder_name}/{subcat_folder_name}/{prod_filename}"
+
+                # Preview on list page (link remains relative)
+                subcat_body += f"""
+                <li>
+                    <div class="product-preview">
+                        <a class="product-link" href="{prod_filename}"><strong>{prod_name}</strong></a>
+                        <p>{generate_lorem(10)}</p>
+                        <span class="product-price">£{random.uniform(10, 500):.2f}</span>
+                    </div>
+                </li>"""
+
+                # --- Level 3: Product Page ---
+                prod_price = random.uniform(10, 500)
+                prod_desc = generate_lorem(40)
+                prod_specs = {f"Spec {s+1}": generate_lorem(3) for s in range(random.randint(3,6))}
+                prod_reviews_count = random.randint(0, 150)
+                # Relative filenames for links on this page
+                details_filename_relative = f"product_{prod_id}_details.html"
+                reviews_filename_relative = f"product_{prod_id}_reviews.html"
+
+                prod_body = f"""
+                <p class="product-price">Price: £{prod_price:.2f}</p>
+                <div class="product-description">
+                    <h2>Description</h2>
+                    <p>{prod_desc}</p>
+                </div>
+                <div class="product-specs">
+                    <h2>Specifications</h2>
+                    <ul>
+                        {''.join(f'<li><span class="spec-name">{name}</span>: <span class="spec-value">{value}</span></li>' for name, value in prod_specs.items())}
+                    </ul>
+                </div>
+                <div class="product-reviews">
+                    <h2>Reviews</h2>
+                    <p>Total Reviews: <span class="review-count">{prod_reviews_count}</span></p>
+                </div>
+                <hr>
+                <p>
+                    <a class="details-link" href="{details_filename_relative}">View More Details</a> |
+                    <a class="reviews-link" href="{reviews_filename_relative}">See All Reviews</a>
+                </p>
+                """
+                # Update breadcrumbs list for this level
+                breadcrumbs_prod = breadcrumbs_subcat + [{"name": prod_name, "link": prod_link_path}]
+                # Pass the updated breadcrumbs list
+                create_html_page(subcat_dir / prod_filename, prod_name, prod_body, breadcrumbs_subcat) # Parent breadcrumb needed here
+
+                # --- Level 4: Product Details Page ---
+                details_filename = f"product_{prod_id}_details.html" # Actual filename
+                # Absolute path for the breadcrumb link
+                details_link_path = f"{full_base_path}/{cat_folder_name}/{subcat_folder_name}/{details_filename}"
+                details_body = f"<p>This page contains extremely detailed information about {prod_name}.</p>{generate_lorem(100)}"
+                # Update breadcrumbs list for this level
+                breadcrumbs_details = breadcrumbs_prod + [{"name": "Details", "link": details_link_path}]
+                # Pass the updated breadcrumbs list
+                create_html_page(subcat_dir / details_filename, f"{prod_name} - Details", details_body, breadcrumbs_prod) # Parent breadcrumb needed here
+
+                # --- Level 5: Product Reviews Page ---
+                reviews_filename = f"product_{prod_id}_reviews.html" # Actual filename
+                # Absolute path for the breadcrumb link
+                reviews_link_path = f"{full_base_path}/{cat_folder_name}/{subcat_folder_name}/{reviews_filename}"
+                reviews_body = f"<p>All {prod_reviews_count} reviews for {prod_name} are listed here.</p><ul>"
+                for r in range(prod_reviews_count):
+                     reviews_body += f"<li>Review {r+1}: {generate_lorem(random.randint(15, 50))}</li>"
+                reviews_body += "</ul>"
+                # Update breadcrumbs list for this level
+                breadcrumbs_reviews = breadcrumbs_prod + [{"name": "Reviews", "link": reviews_link_path}]
+                # Pass the updated breadcrumbs list
+                create_html_page(subcat_dir / reviews_filename, f"{prod_name} - Reviews", reviews_body, breadcrumbs_prod) # Parent breadcrumb needed here
+
+
+            subcat_body += "</ul>" # Close product-list ul
+            # Pass the correct breadcrumbs list for the subcategory index page
+            create_html_page(subcat_dir / "index.html", subcat_name, subcat_body, breadcrumbs_cat) # Parent breadcrumb needed here
+
+
+# --- Main Execution ---
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Generate a dummy multi-level retail website.")
+    parser.add_argument(
+        "-o", "--output-dir",
+        type=str,
+        default="dummy_retail_site",
+        help="Directory to generate the website in."
+    )
+    parser.add_argument(
+        "-n", "--site-name",
+        type=str,
+        default="FakeShop",
+        help="Name of the fake shop."
+    )
+    parser.add_argument(
+        "-b", "--base-path",
+        type=str,
+        default="",
+        help="Base path for hosting the site (e.g., 'samples/deepcrawl'). Leave empty if hosted at the root."
+    )
+    # Optional: Add more args to configure counts if needed
+
+    args = parser.parse_args()
+
+    output_directory = Path(args.output_dir)
+    site_name = args.site_name
+    base_path = args.base_path
+
+    print(f"Generating dummy site '{site_name}' in '{output_directory}'...")
+    # Pass the base_path to the generation function
+    generate_site(output_directory, site_name, base_path)
+    print(f"\nCreated {sum(1 for _ in output_directory.rglob('*.html'))} HTML pages.")
+    print("Dummy site generation complete.")
+    print(f"To serve locally (example): python -m http.server --directory {output_directory} 8000")
+    if base_path:
+        print(f"Access the site at: http://localhost:8000/{base_path.strip('/')}/index.html")
+    else:
+         print(f"Access the site at: http://localhost:8000/index.html")
--- a/tests/general/test_acyn_crawl_wuth_http_crawler_strategy.py
+++ b/tests/general/test_acyn_crawl_wuth_http_crawler_strategy.py
@@ -0,0 +1,56 @@
+import asyncio
+from crawl4ai import (
+    AsyncWebCrawler,
+    CrawlerRunConfig,
+    HTTPCrawlerConfig,
+    CacheMode,
+    DefaultMarkdownGenerator,
+    PruningContentFilter
+)
+from crawl4ai.async_crawler_strategy import AsyncHTTPCrawlerStrategy
+from crawl4ai.async_logger import AsyncLogger
+
+async def main():
+    # Initialize HTTP crawler strategy
+    http_strategy = AsyncHTTPCrawlerStrategy(
+        browser_config=HTTPCrawlerConfig(
+            method="GET",
+            verify_ssl=True,
+            follow_redirects=True
+        ),
+        logger=AsyncLogger(verbose=True)
+    )
+
+    # Initialize web crawler with HTTP strategy
+    async with AsyncWebCrawler(crawler_strategy=http_strategy) as crawler:
+        crawler_config = CrawlerRunConfig(
+            cache_mode=CacheMode.BYPASS,
+            markdown_generator=DefaultMarkdownGenerator(
+                content_filter=PruningContentFilter(
+                    threshold=0.48, 
+                    threshold_type="fixed", 
+                    min_word_threshold=0
+                )
+            )
+        )
+        
+        # Test different URLs
+        urls = [
+            "https://example.com",
+            "https://httpbin.org/get",
+            "raw://<html><body>Test content</body></html>"
+        ]
+        
+        for url in urls:
+            print(f"\n=== Testing {url} ===")
+            try:
+                result = await crawler.arun(url=url, config=crawler_config)
+                print(f"Status: {result.status_code}")
+                print(f"Raw HTML length: {len(result.html)}")
+                if hasattr(result, 'markdown'):
+                    print(f"Markdown length: {len(result.markdown.raw_markdown)}")
+            except Exception as e:
+                print(f"Error: {e}")
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/tests/general/test_advanced_deep_crawl.py
+++ b/tests/general/test_advanced_deep_crawl.py
@@ -0,0 +1,46 @@
+import asyncio
+import time
+
+
+from crawl4ai import CrawlerRunConfig, AsyncWebCrawler, CacheMode
+from crawl4ai.content_scraping_strategy import LXMLWebScrapingStrategy
+from crawl4ai.deep_crawling import BFSDeepCrawlStrategy, BestFirstCrawlingStrategy
+from crawl4ai.deep_crawling.filters import FilterChain, URLPatternFilter, DomainFilter, ContentTypeFilter, ContentRelevanceFilter
+from crawl4ai.deep_crawling.scorers import KeywordRelevanceScorer
+# from crawl4ai.deep_crawling import BFSDeepCrawlStrategy, BestFirstCrawlingStrategy
+
+
+async def main():
+    """Example deep crawl of documentation site."""
+    filter_chain = FilterChain([
+        URLPatternFilter(patterns=["*2025*"]),
+        DomainFilter(allowed_domains=["techcrunch.com"]),
+        ContentRelevanceFilter(query="Use of artificial intelligence in Defence applications", threshold=1),
+        ContentTypeFilter(allowed_types=["text/html","application/javascript"])
+    ])
+    config = CrawlerRunConfig(
+        deep_crawl_strategy = BestFirstCrawlingStrategy(
+            max_depth=2,
+            include_external=False,
+            filter_chain=filter_chain,
+            url_scorer=KeywordRelevanceScorer(keywords=["anduril", "defence", "AI"]),
+        ),
+        stream=False,
+        verbose=True,
+        cache_mode=CacheMode.BYPASS,
+        scraping_strategy=LXMLWebScrapingStrategy()
+    )
+
+    async with AsyncWebCrawler() as crawler:
+        print("Starting deep crawl in streaming mode:")
+        config.stream = True
+        start_time = time.perf_counter()
+        async for result in await crawler.arun(
+            url="https://techcrunch.com",
+            config=config
+        ):
+            print(f"→ {result.url} (Depth: {result.metadata.get('depth', 0)})")
+        print(f"Duration: {time.perf_counter() - start_time:.2f} seconds")
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/tests/20241401/test_async_crawler_strategy.py
+++ b/tests/20241401/test_async_crawler_strategy.py
--- a/tests/20241401/test_async_markdown_generator.py
+++ b/tests/20241401/test_async_markdown_generator.py
--- a/tests/20241401/test_async_webcrawler.py
+++ b/tests/20241401/test_async_webcrawler.py
--- a/tests/20241401/test_cache_context.py
+++ b/tests/20241401/test_cache_context.py
--- a/tests/general/test_content_source_parameter.py
+++ b/tests/general/test_content_source_parameter.py
@@ -0,0 +1,106 @@
+"""
+Tests for the content_source parameter in markdown generation.
+"""
+import unittest
+import asyncio
+from unittest.mock import patch, MagicMock
+
+from crawl4ai.markdown_generation_strategy import DefaultMarkdownGenerator, MarkdownGenerationStrategy
+from crawl4ai.async_webcrawler import AsyncWebCrawler
+from crawl4ai.async_configs import CrawlerRunConfig
+from crawl4ai.models import MarkdownGenerationResult
+
+HTML_SAMPLE = """
+<html>
+<head><title>Test Page</title></head>
+<body>
+    <h1>Test Content</h1>
+    <p>This is a test paragraph.</p>
+    <div class="container">
+        <p>This is content within a container.</p>
+    </div>
+</body>
+</html>
+"""
+
+
+class TestContentSourceParameter(unittest.TestCase):
+    """Test cases for the content_source parameter in markdown generation."""
+
+    def setUp(self):
+        """Set up test fixtures."""
+        self.loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(self.loop)
+
+    def tearDown(self):
+        """Tear down test fixtures."""
+        self.loop.close()
+
+    def test_default_content_source(self):
+        """Test that the default content_source is 'cleaned_html'."""
+        # Can't directly instantiate abstract class, so just test DefaultMarkdownGenerator
+        generator = DefaultMarkdownGenerator()
+        self.assertEqual(generator.content_source, "cleaned_html")
+
+    def test_custom_content_source(self):
+        """Test that content_source can be customized."""
+        generator = DefaultMarkdownGenerator(content_source="fit_html")
+        self.assertEqual(generator.content_source, "fit_html")
+
+    @patch('crawl4ai.markdown_generation_strategy.CustomHTML2Text')
+    def test_html_processing_using_input_html(self, mock_html2text):
+        """Test that generate_markdown uses input_html parameter."""
+        # Setup mock
+        mock_instance = MagicMock()
+        mock_instance.handle.return_value = "# Test Content\n\nThis is a test paragraph."
+        mock_html2text.return_value = mock_instance
+
+        # Create generator and call generate_markdown
+        generator = DefaultMarkdownGenerator()
+        result = generator.generate_markdown(input_html="<h1>Test Content</h1><p>This is a test paragraph.</p>")
+
+        # Verify input_html was passed to HTML2Text handler
+        mock_instance.handle.assert_called_once()
+        # Get the first positional argument
+        args, _ = mock_instance.handle.call_args
+        self.assertEqual(args[0], "<h1>Test Content</h1><p>This is a test paragraph.</p>")
+        
+        # Check result
+        self.assertIsInstance(result, MarkdownGenerationResult)
+        self.assertEqual(result.raw_markdown, "# Test Content\n\nThis is a test paragraph.")
+
+    def test_html_source_selection_logic(self):
+        """Test that the HTML source selection logic works correctly."""
+        # We'll test the dispatch pattern directly to avoid async complexities
+        
+        # Create test data
+        raw_html = "<html><body><h1>Raw HTML</h1></body></html>"
+        cleaned_html = "<html><body><h1>Cleaned HTML</h1></body></html>"
+        fit_html = "<html><body><h1>Preprocessed HTML</h1></body></html>"
+        
+        # Test the dispatch pattern
+        html_source_selector = {
+            "raw_html": lambda: raw_html,
+            "cleaned_html": lambda: cleaned_html,
+            "fit_html": lambda: fit_html,
+        }
+        
+        # Test Case 1: content_source="cleaned_html"
+        source_lambda = html_source_selector.get("cleaned_html")
+        self.assertEqual(source_lambda(), cleaned_html)
+        
+        # Test Case 2: content_source="raw_html"
+        source_lambda = html_source_selector.get("raw_html")
+        self.assertEqual(source_lambda(), raw_html)
+        
+        # Test Case 3: content_source="fit_html"
+        source_lambda = html_source_selector.get("fit_html")
+        self.assertEqual(source_lambda(), fit_html)
+        
+        # Test Case 4: Invalid content_source falls back to cleaned_html
+        source_lambda = html_source_selector.get("invalid_source", lambda: cleaned_html)
+        self.assertEqual(source_lambda(), cleaned_html)
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/tests/general/test_crawlers.py
+++ b/tests/general/test_crawlers.py
@@ -0,0 +1,17 @@
+
+# example_usageexample_usageexample_usage# example_usage.py
+import asyncio
+from crawl4ai.crawlers import get_crawler
+
+async def main():
+    # Get the registered crawler
+    example_crawler = get_crawler("example_site.content")
+    
+    # Crawl example.com
+    result = await example_crawler(url="https://example.com")
+        
+    print(result)
+            
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/tests/general/test_deep_crawl.py
+++ b/tests/general/test_deep_crawl.py
@@ -0,0 +1,46 @@
+import asyncio
+import time
+
+
+from crawl4ai import CrawlerRunConfig, AsyncWebCrawler, CacheMode
+from crawl4ai.content_scraping_strategy import LXMLWebScrapingStrategy
+from crawl4ai.deep_crawling import BFSDeepCrawlStrategy
+# from crawl4ai.deep_crawling import BFSDeepCrawlStrategy, BestFirstCrawlingStrategy
+
+
+async def main():
+    """Example deep crawl of documentation site."""
+    config = CrawlerRunConfig(
+        deep_crawl_strategy = BFSDeepCrawlStrategy(
+            max_depth=2,
+            include_external=False
+        ),
+        stream=False,
+        verbose=True,
+        cache_mode=CacheMode.BYPASS,
+        scraping_strategy=LXMLWebScrapingStrategy()
+    )
+
+    async with AsyncWebCrawler() as crawler:
+        start_time = time.perf_counter()
+        print("\nStarting deep crawl in batch mode:")
+        results = await crawler.arun(
+            url="https://docs.crawl4ai.com",
+            config=config
+        )
+        print(f"Crawled {len(results)} pages")
+        print(f"Example page: {results[0].url}")
+        print(f"Duration: {time.perf_counter() - start_time:.2f} seconds\n")
+
+        print("Starting deep crawl in streaming mode:")
+        config.stream = True
+        start_time = time.perf_counter()
+        async for result in await crawler.arun(
+            url="https://docs.crawl4ai.com",
+            config=config
+        ):
+            print(f"→ {result.url} (Depth: {result.metadata.get('depth', 0)})")
+        print(f"Duration: {time.perf_counter() - start_time:.2f} seconds")
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/tests/general/test_deep_crawl_filters.py
+++ b/tests/general/test_deep_crawl_filters.py
@@ -0,0 +1,279 @@
+from crawl4ai.deep_crawling.filters import ContentRelevanceFilter, URLPatternFilter, DomainFilter, ContentTypeFilter, SEOFilter
+async def test_pattern_filter():
+    # Test cases as list of tuples instead of dict for multiple patterns
+    test_cases = [
+        # Simple suffix patterns (*.html)
+        ("*.html", {
+            "https://example.com/page.html": True,
+            "https://example.com/path/doc.html": True,
+            "https://example.com/page.htm": False,
+            "https://example.com/page.html?param=1": True,
+        }),
+        
+        # Path prefix patterns (/foo/*)
+        ("*/article/*", {
+            "https://example.com/article/123": True,
+            "https://example.com/blog/article/456": True,
+            "https://example.com/articles/789": False,
+            "https://example.com/article": False,
+        }),
+        
+        # Complex patterns
+        ("blog-*-[0-9]", {
+            "https://example.com/blog-post-1": True,
+            "https://example.com/blog-test-9": True,
+            "https://example.com/blog-post": False,
+            "https://example.com/blog-post-x": False,
+        }),
+        
+        # Multiple patterns case
+        (["*.pdf", "*/download/*"], {
+            "https://example.com/doc.pdf": True,
+            "https://example.com/download/file.txt": True,
+            "https://example.com/path/download/doc": True,
+            "https://example.com/uploads/file.txt": False,
+        }),
+        
+        # Edge cases
+        ("*", {
+            "https://example.com": True,
+            "": True,
+            "http://test.com/path": True,
+        }),
+        
+        # Complex regex
+        (r"^https?://.*\.example\.com/\d+", {
+            "https://sub.example.com/123": True,
+            "http://test.example.com/456": True,
+            "https://example.com/789": False,
+            "https://sub.example.com/abc": False,
+        })
+    ]
+
+    def run_accuracy_test():
+        print("\nAccuracy Tests:")
+        print("-" * 50)
+        
+        all_passed = True
+        for patterns, test_urls in test_cases:
+            filter_obj = URLPatternFilter(patterns)
+            
+            for url, expected in test_urls.items():
+                result = filter_obj.apply(url)
+                if result != expected:
+                    print(f"❌ Failed: Pattern '{patterns}' with URL '{url}'")
+                    print(f"   Expected: {expected}, Got: {result}")
+                    all_passed = False
+                else:
+                    print(f"✅ Passed: Pattern '{patterns}' with URL '{url}'")
+        
+        return all_passed
+
+    # Run tests
+    print("Running Pattern Filter Tests...")
+    accuracy_passed = run_accuracy_test()
+    
+    if accuracy_passed:
+        print("\n✨ All accuracy tests passed!")
+        
+    else:
+        print("\n❌ Some accuracy tests failed!")
+
+async def test_domain_filter():
+    from itertools import chain
+
+    # Test cases
+    test_cases = [
+        # Allowed domains
+        ({"allowed": "example.com"}, {
+            "https://example.com/page": True,
+            "http://example.com": True,
+            "https://sub.example.com": False,
+            "https://other.com": False,
+        }),
+
+        ({"allowed": ["example.com", "test.com"]}, {
+            "https://example.com/page": True,
+            "https://test.com/home": True,
+            "https://other.com": False,
+        }),
+
+        # Blocked domains
+        ({"blocked": "malicious.com"}, {
+            "https://malicious.com": False,
+            "https://safe.com": True,
+            "http://malicious.com/login": False,
+        }),
+
+        ({"blocked": ["spam.com", "ads.com"]}, {
+            "https://spam.com": False,
+            "https://ads.com/banner": False,
+            "https://example.com": True,
+        }),
+
+        # Allowed and Blocked combination
+        ({"allowed": "example.com", "blocked": "sub.example.com"}, {
+            "https://example.com": True,
+            "https://sub.example.com": False,
+            "https://other.com": False,
+        }),
+    ]
+
+    def run_accuracy_test():
+        print("\nAccuracy Tests:")
+        print("-" * 50)
+        
+        all_passed = True
+        for params, test_urls in test_cases:
+            filter_obj = DomainFilter(
+                allowed_domains=params.get("allowed"),
+                blocked_domains=params.get("blocked"),
+            )
+            
+            for url, expected in test_urls.items():
+                result = filter_obj.apply(url)
+                if result != expected:
+                    print(f"\u274C Failed: Params {params} with URL '{url}'")
+                    print(f"   Expected: {expected}, Got: {result}")
+                    all_passed = False
+                else:
+                    print(f"\u2705 Passed: Params {params} with URL '{url}'")
+        
+        return all_passed
+
+    # Run tests
+    print("Running Domain Filter Tests...")
+    accuracy_passed = run_accuracy_test()
+    
+    if accuracy_passed:
+        print("\n\u2728 All accuracy tests passed!")
+    else:
+        print("\n\u274C Some accuracy tests failed!")
+
+async def test_content_relevance_filter():
+    relevance_filter = ContentRelevanceFilter(
+        query="What was the cause of american civil war?", 
+        threshold=1
+    )
+
+    test_cases = {
+        "https://en.wikipedia.org/wiki/Cricket": False,
+        "https://en.wikipedia.org/wiki/American_Civil_War": True,
+    }
+
+    print("\nRunning Content Relevance Filter Tests...")
+    print("-" * 50)
+    
+    all_passed = True
+    for url, expected in test_cases.items():
+        result = await relevance_filter.apply(url)
+        if result != expected:
+            print(f"\u274C Failed: URL '{url}'")
+            print(f"   Expected: {expected}, Got: {result}")
+            all_passed = False
+        else:
+            print(f"\u2705 Passed: URL '{url}'")
+    
+    if all_passed:
+        print("\n\u2728 All content relevance tests passed!")
+    else:
+        print("\n\u274C Some content relevance tests failed!")
+
+async def test_content_type_filter():
+    from itertools import chain
+
+    # Test cases
+    test_cases = [
+        # Allowed single type
+        ({"allowed": "image/png"}, {
+            "https://example.com/image.png": True,
+            "https://example.com/photo.jpg": False,
+            "https://example.com/document.pdf": False,
+        }),
+
+        # Multiple allowed types
+        ({"allowed": ["image/jpeg", "application/pdf"]}, {
+            "https://example.com/photo.jpg": True,
+            "https://example.com/document.pdf": True,
+            "https://example.com/script.js": False,
+        }),
+
+        # No extension should be allowed
+        ({"allowed": "application/json"}, {
+            "https://example.com/api/data": True,
+            "https://example.com/data.json": True,
+            "https://example.com/page.html": False,
+        }),
+
+        # Unknown extensions should not be allowed
+        ({"allowed": "application/octet-stream"}, {
+            "https://example.com/file.unknown": True,
+            "https://example.com/archive.zip": False,
+            "https://example.com/software.exe": False,
+        }),
+    ]
+
+    def run_accuracy_test():
+        print("\nAccuracy Tests:")
+        print("-" * 50)
+        
+        all_passed = True
+        for params, test_urls in test_cases:
+            filter_obj = ContentTypeFilter(
+                allowed_types=params.get("allowed"),
+            )
+            
+            for url, expected in test_urls.items():
+                result = filter_obj.apply(url)
+                if result != expected:
+                    print(f"\u274C Failed: Params {params} with URL '{url}'")
+                    print(f"   Expected: {expected}, Got: {result}")
+                    all_passed = False
+                else:
+                    print(f"\u2705 Passed: Params {params} with URL '{url}'")
+        
+        return all_passed
+
+    # Run tests
+    print("Running Content Type Filter Tests...")
+    accuracy_passed = run_accuracy_test()
+    
+    if accuracy_passed:
+        print("\n\u2728 All accuracy tests passed!")
+    else:
+        print("\n\u274C Some accuracy tests failed!")
+
+async def test_seo_filter():
+    seo_filter = SEOFilter(threshold=0.5, keywords=["SEO", "search engines", "Optimization"])
+
+    test_cases = {
+        "https://en.wikipedia.org/wiki/Search_engine_optimization": True,
+        "https://en.wikipedia.org/wiki/Randomness": False,
+    }
+
+    print("\nRunning SEO Filter Tests...")
+    print("-" * 50)
+    
+    all_passed = True
+    for url, expected in test_cases.items():
+        result = await seo_filter.apply(url)
+        if result != expected:
+            print(f"\u274C Failed: URL '{url}'")
+            print(f"   Expected: {expected}, Got: {result}")
+            all_passed = False
+        else:
+            print(f"\u2705 Passed: URL '{url}'")
+    
+    if all_passed:
+        print("\n\u2728 All SEO filter tests passed!")
+    else:
+        print("\n\u274C Some SEO filter tests failed!")
+
+import asyncio
+
+if __name__ == "__main__":
+    asyncio.run(test_pattern_filter())
+    asyncio.run(test_domain_filter())
+    asyncio.run(test_content_type_filter())
+    asyncio.run(test_content_relevance_filter())
+    asyncio.run(test_seo_filter())
--- a/tests/general/test_deep_crawl_scorers.py
+++ b/tests/general/test_deep_crawl_scorers.py
@@ -0,0 +1,179 @@
+from crawl4ai.deep_crawling.scorers import CompositeScorer, ContentTypeScorer, DomainAuthorityScorer, FreshnessScorer, KeywordRelevanceScorer, PathDepthScorer
+
+
+def test_scorers():
+    test_cases = [
+        # Keyword Scorer Tests
+        {
+            "scorer_type": "keyword",
+            "config": {
+                "keywords": ["python", "blog"],
+                "weight": 1.0,
+                "case_sensitive": False
+            },
+            "urls": {
+                "https://example.com/python-blog": 1.0,
+                "https://example.com/PYTHON-BLOG": 1.0,
+                "https://example.com/python-only": 0.5,
+                "https://example.com/other": 0.0
+            }
+        },
+        
+        # Path Depth Scorer Tests
+        {
+            "scorer_type": "path_depth",
+            "config": {
+                "optimal_depth": 2,
+                "weight": 1.0
+            },
+            "urls": {
+                "https://example.com/a/b": 1.0,
+                "https://example.com/a": 0.5,
+                "https://example.com/a/b/c": 0.5,
+                "https://example.com": 0.33333333
+            }
+        },
+        
+        # Content Type Scorer Tests
+        {
+            "scorer_type": "content_type",
+            "config": {
+                "type_weights": {
+                    ".html$": 1.0,
+                    ".pdf$": 0.8,
+                    ".jpg$": 0.6
+                },
+                "weight": 1.0
+            },
+            "urls": {
+                "https://example.com/doc.html": 1.0,
+                "https://example.com/doc.pdf": 0.8,
+                "https://example.com/img.jpg": 0.6,
+                "https://example.com/other.txt": 0.0
+            }
+        },
+        
+        # Freshness Scorer Tests
+        {
+            "scorer_type": "freshness",
+            "config": {
+                "weight": 1.0,  # Remove current_year since original doesn't support it
+            },
+            "urls": {
+                "https://example.com/2024/01/post": 1.0,
+                "https://example.com/2023/12/post": 0.9,
+                "https://example.com/2022/post": 0.8,
+                "https://example.com/no-date": 0.5
+            }
+        },
+        
+        # Domain Authority Scorer Tests
+        {
+            "scorer_type": "domain",
+            "config": {
+                "domain_weights": {
+                    "python.org": 1.0,
+                    "github.com": 0.8,
+                    "medium.com": 0.6
+                },
+                "default_weight": 0.3,
+                "weight": 1.0
+            },
+            "urls": {
+                "https://python.org/about": 1.0,
+                "https://github.com/repo": 0.8,
+                "https://medium.com/post": 0.6,
+                "https://unknown.com": 0.3
+            }
+        }
+    ]
+
+    def create_scorer(scorer_type, config):
+        if scorer_type == "keyword":
+            return KeywordRelevanceScorer(**config)
+        elif scorer_type == "path_depth":
+            return PathDepthScorer(**config)
+        elif scorer_type == "content_type":
+            return ContentTypeScorer(**config)
+        elif scorer_type == "freshness":
+            return FreshnessScorer(**config,current_year=2024)
+        elif scorer_type == "domain":
+            return DomainAuthorityScorer(**config)
+
+    def run_accuracy_test():
+        print("\nAccuracy Tests:")
+        print("-" * 50)
+        
+        all_passed = True
+        for test_case in test_cases:
+            print(f"\nTesting {test_case['scorer_type']} scorer:")
+            scorer = create_scorer(
+                test_case['scorer_type'],
+                test_case['config']
+            )
+            
+            for url, expected in test_case['urls'].items():
+                score = round(scorer.score(url), 8)
+                expected = round(expected, 8)
+                
+                if abs(score - expected) > 0.00001:
+                    print(f"❌ Scorer Failed: URL '{url}'")
+                    print(f"   Expected: {expected}, Got: {score}")
+                    all_passed = False
+                else:
+                    print(f"✅ Scorer Passed: URL '{url}'")
+                    
+                    
+        return all_passed
+
+    def run_composite_test():
+        print("\nTesting Composite Scorer:")
+        print("-" * 50)
+        
+        # Create test data
+        test_urls = {
+            "https://python.org/blog/2024/01/new-release.html":0.86666667,
+            "https://github.com/repo/old-code.pdf": 0.62,
+            "https://unknown.com/random": 0.26
+        }
+        
+        # Create composite scorers with all types
+        scorers = []
+        
+        for test_case in test_cases:
+            scorer = create_scorer(
+                test_case['scorer_type'],
+                test_case['config']
+            )
+            scorers.append(scorer)
+            
+        composite = CompositeScorer(scorers, normalize=True)
+        
+        all_passed = True
+        for url, expected in test_urls.items():
+            score = round(composite.score(url), 8)
+            
+            if abs(score - expected) > 0.00001:
+                print(f"❌ Composite Failed: URL '{url}'")
+                print(f"   Expected: {expected}, Got: {score}")
+                all_passed = False
+            else:
+                print(f"✅ Composite Passed: URL '{url}'")
+                
+        return all_passed
+
+    # Run tests
+    print("Running Scorer Tests...")
+    accuracy_passed = run_accuracy_test()
+    composite_passed = run_composite_test()
+    
+    if accuracy_passed and composite_passed:
+        print("\n✨ All tests passed!")
+        # Note: Already have performance tests in run_scorer_performance_test()
+    else:
+        print("\n❌ Some tests failed!")
+
+    
+
+if __name__ == "__main__":
+    test_scorers()
--- a/tests/general/test_http_crawler_strategy.py
+++ b/tests/general/test_http_crawler_strategy.py
@@ -0,0 +1,116 @@
+from tkinter import N
+from crawl4ai.async_crawler_strategy import AsyncHTTPCrawlerStrategy
+from crawl4ai.async_logger import AsyncLogger
+from crawl4ai import CrawlerRunConfig, HTTPCrawlerConfig
+from crawl4ai.async_crawler_strategy import ConnectionTimeoutError
+import asyncio
+import os
+
+async def main():
+    """Test the AsyncHTTPCrawlerStrategy with various scenarios"""
+    logger = AsyncLogger(verbose=True)
+
+    # Initialize the strategy with default HTTPCrawlerConfig
+    crawler = AsyncHTTPCrawlerStrategy(
+        browser_config=HTTPCrawlerConfig(),
+        logger=logger
+    )
+    # Test 1: Basic HTTP GET
+    print("\n=== Test 1: Basic HTTP GET ===")
+    result = await crawler.crawl("https://example.com")
+    print(f"Status: {result.status_code}")
+    print(f"Content length: {len(result.html)}")
+    print(f"Headers: {dict(result.response_headers)}")
+
+    # Test 2: POST request with JSON
+    print("\n=== Test 2: POST with JSON ===")
+    crawler.browser_config = crawler.browser_config.clone(
+        method="POST",
+        json={"test": "data"},
+        headers={"Content-Type": "application/json"}
+    )
+    try:
+        result = await crawler.crawl(
+            "https://httpbin.org/post",
+        )
+        print(f"Status: {result.status_code}")
+        print(f"Response: {result.html[:200]}...")
+    except Exception as e:
+        print(f"Error: {e}")
+
+    # Test 3: File handling
+    crawler.browser_config = HTTPCrawlerConfig()
+    print("\n=== Test 3: Local file handling ===")
+    # Create a tmp file with test content
+    from tempfile import NamedTemporaryFile
+    with NamedTemporaryFile(delete=False) as f:
+        f.write(b"<html><body>Test content</body></html>")
+        f.close()
+        result = await crawler.crawl(f"file://{f.name}")
+        print(f"File content: {result.html}")
+
+    # Test 4: Raw content
+    print("\n=== Test 4: Raw content handling ===")
+    raw_html = "raw://<html><body>Raw test content</body></html>"
+    result = await crawler.crawl(raw_html)
+    print(f"Raw content: {result.html}")
+
+    # Test 5: Custom hooks
+    print("\n=== Test 5: Custom hooks ===")
+    async def before_request(url, kwargs):
+        print(f"Before request to {url}")
+        kwargs['headers']['X-Custom'] = 'test'
+
+    async def after_request(response):
+        print(f"After request, status: {response.status_code}")
+
+    crawler.set_hook('before_request', before_request)
+    crawler.set_hook('after_request', after_request)
+    result = await crawler.crawl("https://example.com")
+
+    # Test 6: Error handling
+    print("\n=== Test 6: Error handling ===")
+    try:
+        await crawler.crawl("https://nonexistent.domain.test")
+    except Exception as e:
+        print(f"Expected error: {e}")
+
+    # Test 7: Redirects
+    print("\n=== Test 7: Redirect handling ===")
+    crawler.browser_config = HTTPCrawlerConfig(follow_redirects=True)
+    result = await crawler.crawl("http://httpbin.org/redirect/1")
+    print(f"Final URL: {result.redirected_url}")
+
+    # Test 8: Custom timeout
+    print("\n=== Test 8: Custom timeout ===")
+    try:
+        await crawler.crawl(
+            "https://httpbin.org/delay/5",
+            config=CrawlerRunConfig(page_timeout=2)
+        )
+    except ConnectionTimeoutError as e:
+        print(f"Expected timeout: {e}")
+
+    # Test 9: SSL verification
+    print("\n=== Test 9: SSL verification ===")
+    crawler.browser_config = HTTPCrawlerConfig(verify_ssl=False)
+    try:
+        await crawler.crawl("https://expired.badssl.com/")
+        print("Connected to invalid SSL site with verification disabled")
+    except Exception as e:
+        print(f"SSL error: {e}")
+
+    # Test 10: Large file streaming
+    print("\n=== Test 10: Large file streaming ===")
+    from tempfile import NamedTemporaryFile
+    with NamedTemporaryFile(delete=False) as f:
+        f.write(b"<html><body>" + b"X" * 1024 * 1024 * 10 + b"</body></html>")
+        f.close()
+        result = await crawler.crawl("file://" + f.name)
+        print(f"Large file content length: {len(result.html)}")
+        os.remove(f.name)
+
+    crawler.close()
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/tests/20241401/test_llm_filter.py
+++ b/tests/20241401/test_llm_filter.py
@@ -1,6 +1,7 @@
 import os
 import asyncio
 from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode
+from crawl4ai import LLMConfig
 from crawl4ai.content_filter_strategy import LLMContentFilter

 async def test_llm_filter():
@@ -22,8 +23,7 @@ async def test_llm_filter():

        # Initialize LLM filter with focused instruction
        filter = LLMContentFilter(
-            provider="openai/gpt-4o",
-            api_token=os.getenv('OPENAI_API_KEY'),
+            llm_config=LLMConfig(provider="openai/gpt-4o",api_token=os.getenv('OPENAI_API_KEY')),
            instruction="""
            Focus on extracting the core educational content about Python classes.
            Include:
@@ -43,8 +43,7 @@ async def test_llm_filter():
        )
        
        filter = LLMContentFilter(
-            provider="openai/gpt-4o",
-            api_token=os.getenv('OPENAI_API_KEY'),
+            llm_config = LLMConfig(provider="openai/gpt-4o",api_token=os.getenv('OPENAI_API_KEY')),
            chunk_token_threshold=2 ** 12 * 2, # 2048 * 2
            instruction="""
            Extract the main educational content while preserving its original wording and substance completely. Your task is to:
--- a/tests/general/test_mhtml.py
+++ b/tests/general/test_mhtml.py
@@ -0,0 +1,213 @@
+# test_mhtml_capture.py
+
+import pytest
+import asyncio
+import re  # For more robust MHTML checks
+
+# Assuming these can be imported directly from the crawl4ai library
+from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CrawlResult
+
+# A reliable, simple static HTML page for testing
+# Using httpbin as it's designed for testing clients
+TEST_URL_SIMPLE = "https://httpbin.org/html"
+EXPECTED_CONTENT_SIMPLE = "Herman Melville - Moby-Dick"
+
+# A slightly more complex page that might involve JS (good secondary test)
+TEST_URL_JS = "https://quotes.toscrape.com/js/"
+EXPECTED_CONTENT_JS = "Quotes to Scrape" # Title of the page, which should be present in MHTML
+
+# Removed the custom event_loop fixture as pytest-asyncio provides a default one.
+
+@pytest.mark.asyncio
+async def test_mhtml_capture_when_enabled():
+    """
+    Verify that when CrawlerRunConfig has capture_mhtml=True,
+    the CrawlResult contains valid MHTML content.
+    """
+    # Create a fresh browser config and crawler instance for this test
+    browser_config = BrowserConfig(headless=True) # Use headless for testing CI/CD
+    # --- Key: Enable MHTML capture in the run config ---
+    run_config = CrawlerRunConfig(capture_mhtml=True)
+
+    # Create a fresh crawler instance
+    crawler = AsyncWebCrawler(config=browser_config)
+
+    try:
+        # Start the browser
+        await crawler.start()
+        
+        # Perform the crawl with the MHTML-enabled config
+        result: CrawlResult = await crawler.arun(TEST_URL_SIMPLE, config=run_config)
+
+        # --- Assertions ---
+        assert result is not None, "Crawler should return a result object"
+        assert result.success is True, f"Crawling {TEST_URL_SIMPLE} should succeed. Error: {result.error_message}"
+
+        # 1. Check if the mhtml attribute exists (will fail if CrawlResult not updated)
+        assert hasattr(result, 'mhtml'), "CrawlResult object must have an 'mhtml' attribute"
+
+        # 2. Check if mhtml is populated
+        assert result.mhtml is not None, "MHTML content should be captured when enabled"
+        assert isinstance(result.mhtml, str), "MHTML content should be a string"
+        assert len(result.mhtml) > 500, "MHTML content seems too short, likely invalid" # Basic sanity check
+
+        # 3. Check for MHTML structure indicators (more robust than simple string contains)
+        # MHTML files are multipart MIME messages
+        assert re.search(r"Content-Type: multipart/related;", result.mhtml, re.IGNORECASE), \
+            "MHTML should contain 'Content-Type: multipart/related;'"
+        # Should contain a boundary definition
+        assert re.search(r"boundary=\"----MultipartBoundary", result.mhtml), \
+            "MHTML should contain a multipart boundary"
+        # Should contain the main HTML part
+        assert re.search(r"Content-Type: text/html", result.mhtml, re.IGNORECASE), \
+            "MHTML should contain a 'Content-Type: text/html' part"
+
+        # 4. Check if the *actual page content* is within the MHTML string
+        # This confirms the snapshot captured the rendered page
+        assert EXPECTED_CONTENT_SIMPLE in result.mhtml, \
+            f"Expected content '{EXPECTED_CONTENT_SIMPLE}' not found within the captured MHTML"
+
+        # 5. Ensure standard HTML is still present and correct
+        assert result.html is not None, "Standard HTML should still be present"
+        assert isinstance(result.html, str), "Standard HTML should be a string"
+        assert EXPECTED_CONTENT_SIMPLE in result.html, \
+            f"Expected content '{EXPECTED_CONTENT_SIMPLE}' not found within the standard HTML"
+
+    finally:
+        # Important: Ensure browser is completely closed even if assertions fail
+        await crawler.close()
+        # Help the garbage collector clean up
+        crawler = None
+
+
+@pytest.mark.asyncio
+async def test_mhtml_capture_when_disabled_explicitly():
+    """
+    Verify that when CrawlerRunConfig explicitly has capture_mhtml=False,
+    the CrawlResult.mhtml attribute is None.
+    """
+    # Create a fresh browser config and crawler instance for this test
+    browser_config = BrowserConfig(headless=True)
+    # --- Key: Explicitly disable MHTML capture ---
+    run_config = CrawlerRunConfig(capture_mhtml=False)
+
+    # Create a fresh crawler instance
+    crawler = AsyncWebCrawler(config=browser_config)
+
+    try:
+        # Start the browser
+        await crawler.start()
+        result: CrawlResult = await crawler.arun(TEST_URL_SIMPLE, config=run_config)
+
+        assert result is not None
+        assert result.success is True, f"Crawling {TEST_URL_SIMPLE} should succeed. Error: {result.error_message}"
+
+        # 1. Check attribute existence (important for TDD start)
+        assert hasattr(result, 'mhtml'), "CrawlResult object must have an 'mhtml' attribute"
+
+        # 2. Check mhtml is None
+        assert result.mhtml is None, "MHTML content should be None when explicitly disabled"
+
+        # 3. Ensure standard HTML is still present
+        assert result.html is not None
+        assert EXPECTED_CONTENT_SIMPLE in result.html
+
+    finally:
+        # Important: Ensure browser is completely closed even if assertions fail
+        await crawler.close()
+        # Help the garbage collector clean up
+        crawler = None
+
+
+@pytest.mark.asyncio
+async def test_mhtml_capture_when_disabled_by_default():
+    """
+    Verify that if capture_mhtml is not specified (using its default),
+    the CrawlResult.mhtml attribute is None.
+    (This assumes the default value for capture_mhtml in CrawlerRunConfig is False)
+    """
+    # Create a fresh browser config and crawler instance for this test
+    browser_config = BrowserConfig(headless=True)
+    # --- Key: Use default run config ---
+    run_config = CrawlerRunConfig() # Do not specify capture_mhtml
+
+    # Create a fresh crawler instance
+    crawler = AsyncWebCrawler(config=browser_config)
+
+    try:
+        # Start the browser
+        await crawler.start()
+        result: CrawlResult = await crawler.arun(TEST_URL_SIMPLE, config=run_config)
+
+        assert result is not None
+        assert result.success is True, f"Crawling {TEST_URL_SIMPLE} should succeed. Error: {result.error_message}"
+
+        # 1. Check attribute existence
+        assert hasattr(result, 'mhtml'), "CrawlResult object must have an 'mhtml' attribute"
+
+        # 2. Check mhtml is None (assuming default is False)
+        assert result.mhtml is None, "MHTML content should be None when using default config (assuming default=False)"
+
+        # 3. Ensure standard HTML is still present
+        assert result.html is not None
+        assert EXPECTED_CONTENT_SIMPLE in result.html
+
+    finally:
+        # Important: Ensure browser is completely closed even if assertions fail
+        await crawler.close()
+        # Help the garbage collector clean up
+        crawler = None
+
+# Optional: Add a test for a JS-heavy page if needed
+@pytest.mark.asyncio
+async def test_mhtml_capture_on_js_page_when_enabled():
+    """
+    Verify MHTML capture works on a page requiring JavaScript execution.
+    """
+    # Create a fresh browser config and crawler instance for this test
+    browser_config = BrowserConfig(headless=True)
+    run_config = CrawlerRunConfig(
+        capture_mhtml=True,
+        # Add a small wait or JS execution if needed for the JS page to fully render
+        # For quotes.toscrape.com/js/, it renders quickly, but a wait might be safer
+        # wait_for_timeout=2000 # Example: wait up to 2 seconds
+        js_code="await new Promise(r => setTimeout(r, 500));" # Small delay after potential load
+    )
+
+    # Create a fresh crawler instance
+    crawler = AsyncWebCrawler(config=browser_config)
+
+    try:
+        # Start the browser
+        await crawler.start()
+        result: CrawlResult = await crawler.arun(TEST_URL_JS, config=run_config)
+
+        assert result is not None
+        assert result.success is True, f"Crawling {TEST_URL_JS} should succeed. Error: {result.error_message}"
+        assert hasattr(result, 'mhtml'), "CrawlResult object must have an 'mhtml' attribute"
+        assert result.mhtml is not None, "MHTML content should be captured on JS page when enabled"
+        assert isinstance(result.mhtml, str), "MHTML content should be a string"
+        assert len(result.mhtml) > 500, "MHTML content from JS page seems too short"
+
+        # Check for MHTML structure
+        assert re.search(r"Content-Type: multipart/related;", result.mhtml, re.IGNORECASE)
+        assert re.search(r"Content-Type: text/html", result.mhtml, re.IGNORECASE)
+
+        # Check for content rendered by JS within the MHTML
+        assert EXPECTED_CONTENT_JS in result.mhtml, \
+            f"Expected JS-rendered content '{EXPECTED_CONTENT_JS}' not found within the captured MHTML"
+
+        # Check standard HTML too
+        assert result.html is not None
+        assert EXPECTED_CONTENT_JS in result.html, \
+             f"Expected JS-rendered content '{EXPECTED_CONTENT_JS}' not found within the standard HTML"
+
+    finally:
+        # Important: Ensure browser is completely closed even if assertions fail
+        await crawler.close()
+        # Help the garbage collector clean up
+        crawler = None
+
+if __name__ == "__main__":
+    # Use pytest for async tests
+    pytest.main(["-xvs", __file__])
--- a/tests/general/test_network_console_capture.py
+++ b/tests/general/test_network_console_capture.py
@@ -0,0 +1,185 @@
+from crawl4ai.async_webcrawler import AsyncWebCrawler
+from crawl4ai.async_configs import CrawlerRunConfig, BrowserConfig
+import asyncio
+import aiohttp
+from aiohttp import web
+import tempfile
+import shutil
+import os, sys, time, json
+
+
+async def start_test_server():
+    app = web.Application()
+    
+    async def basic_page(request):
+        return web.Response(text="""
+        <!DOCTYPE html>
+        <html>
+        <head>
+            <title>Network Request Test</title>
+        </head>
+        <body>
+            <h1>Test Page for Network Capture</h1>
+            <p>This page performs network requests and console logging.</p>
+            <img src="/image.png" alt="Test Image">
+            <script>
+                console.log("Basic console log");
+                console.error("Error message");
+                console.warn("Warning message");
+                
+                // Make some XHR requests
+                const xhr = new XMLHttpRequest();
+                xhr.open('GET', '/api/data', true);
+                xhr.send();
+                
+                // Make a fetch request
+                fetch('/api/json')
+                    .then(response => response.json())
+                    .catch(error => console.error('Fetch error:', error));
+                
+                // Trigger an error
+                setTimeout(() => {
+                    try {
+                        nonExistentFunction();
+                    } catch (e) {
+                        console.error("Caught error:", e);
+                    }
+                }, 100);
+            </script>
+        </body>
+        </html>
+        """, content_type="text/html")
+    
+    async def image(request):
+        # Return a small 1x1 transparent PNG
+        return web.Response(body=bytes.fromhex('89504E470D0A1A0A0000000D49484452000000010000000108060000001F15C4890000000D4944415478DA63FAFFFF3F030079DB00018D959DE70000000049454E44AE426082'), content_type="image/png")
+    
+    async def api_data(request):
+        return web.Response(text="sample data")
+    
+    async def api_json(request):
+        return web.json_response({"status": "success", "message": "JSON data"})
+    
+    # Register routes
+    app.router.add_get('/', basic_page)
+    app.router.add_get('/image.png', image)
+    app.router.add_get('/api/data', api_data)
+    app.router.add_get('/api/json', api_json)
+    
+    runner = web.AppRunner(app)
+    await runner.setup()
+    site = web.TCPSite(runner, 'localhost', 8080)
+    await site.start()
+    
+    return runner
+
+
+async def test_network_console_capture():
+    print("\n=== Testing Network and Console Capture ===\n")
+    
+    # Start test server
+    runner = await start_test_server()
+    try:
+        browser_config = BrowserConfig(headless=True)
+        
+        # Test with capture disabled (default)
+        print("\n1. Testing with capture disabled (default)...")
+        async with AsyncWebCrawler(config=browser_config) as crawler:
+            config = CrawlerRunConfig(
+                wait_until="networkidle",  # Wait for network to be idle
+            )
+            result = await crawler.arun(url="http://localhost:8080/", config=config)
+            
+            assert result.network_requests is None, "Network requests should be None when capture is disabled"
+            assert result.console_messages is None, "Console messages should be None when capture is disabled"
+            print("✓ Default config correctly returns None for network_requests and console_messages")
+        
+        # Test with network capture enabled
+        print("\n2. Testing with network capture enabled...")
+        async with AsyncWebCrawler(config=browser_config) as crawler:
+            config = CrawlerRunConfig(
+                wait_until="networkidle",  # Wait for network to be idle
+                capture_network_requests=True
+            )
+            result = await crawler.arun(url="http://localhost:8080/", config=config)
+            
+            assert result.network_requests is not None, "Network requests should be captured"
+            print(f"✓ Captured {len(result.network_requests)} network requests")
+            
+            # Check if we have both requests and responses
+            request_count = len([r for r in result.network_requests if r.get("event_type") == "request"])
+            response_count = len([r for r in result.network_requests if r.get("event_type") == "response"])
+            print(f"  - {request_count} requests, {response_count} responses")
+            
+            # Check if we captured specific resources
+            urls = [r.get("url") for r in result.network_requests]
+            has_image = any("/image.png" in url for url in urls)
+            has_api_data = any("/api/data" in url for url in urls)
+            has_api_json = any("/api/json" in url for url in urls)
+            
+            assert has_image, "Should have captured image request"
+            assert has_api_data, "Should have captured API data request"
+            assert has_api_json, "Should have captured API JSON request"
+            print("✓ Captured expected network requests (image, API endpoints)")
+        
+        # Test with console capture enabled
+        print("\n3. Testing with console capture enabled...")
+        async with AsyncWebCrawler(config=browser_config) as crawler:
+            config = CrawlerRunConfig(
+                wait_until="networkidle",  # Wait for network to be idle
+                capture_console_messages=True
+            )
+            result = await crawler.arun(url="http://localhost:8080/", config=config)
+            
+            assert result.console_messages is not None, "Console messages should be captured"
+            print(f"✓ Captured {len(result.console_messages)} console messages")
+            
+            # Check if we have different types of console messages
+            message_types = set(msg.get("type") for msg in result.console_messages if "type" in msg)
+            print(f"  - Message types: {', '.join(message_types)}")
+            
+            # Print all captured messages for debugging
+            print("  - Captured messages:")
+            for msg in result.console_messages:
+                print(f"    * Type: {msg.get('type', 'N/A')}, Text: {msg.get('text', 'N/A')}")
+            
+            # Look for specific messages
+            messages = [msg.get("text") for msg in result.console_messages if "text" in msg]
+            has_basic_log = any("Basic console log" in msg for msg in messages)
+            has_error_msg = any("Error message" in msg for msg in messages)
+            has_warning_msg = any("Warning message" in msg for msg in messages)
+            
+            assert has_basic_log, "Should have captured basic console.log message"
+            assert has_error_msg, "Should have captured console.error message"
+            assert has_warning_msg, "Should have captured console.warn message"
+            print("✓ Captured expected console messages (log, error, warning)")
+        
+        # Test with both captures enabled
+        print("\n4. Testing with both network and console capture enabled...")
+        async with AsyncWebCrawler(config=browser_config) as crawler:
+            config = CrawlerRunConfig(
+                wait_until="networkidle",  # Wait for network to be idle
+                capture_network_requests=True,
+                capture_console_messages=True
+            )
+            result = await crawler.arun(url="http://localhost:8080/", config=config)
+            
+            assert result.network_requests is not None, "Network requests should be captured"
+            assert result.console_messages is not None, "Console messages should be captured"
+            print(f"✓ Successfully captured both {len(result.network_requests)} network requests and {len(result.console_messages)} console messages")
+        
+    finally:
+        await runner.cleanup()
+        print("\nTest server shutdown")
+
+
+async def main():
+    try:
+        await test_network_console_capture()
+        print("\n✅ All tests passed successfully!")
+    except Exception as e:
+        print(f"\n❌ Test failed: {str(e)}")
+        raise
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/tests/20241401/test_robot_parser.py
+++ b/tests/20241401/test_robot_parser.py
--- a/tests/20241401/test_schema_builder.py
+++ b/tests/20241401/test_schema_builder.py
@@ -10,6 +10,7 @@ import asyncio
 from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode
 from crawl4ai.markdown_generation_strategy import DefaultMarkdownGenerator
 from crawl4ai.extraction_strategy import JsonCssExtractionStrategy, JsonXPathExtractionStrategy
+from crawl4ai.utils import preprocess_html_for_schema, JsonXPathExtractionStrategy
 import json

 # Test HTML - A complex job board with companies, departments, and positions
--- a/tests/20241401/test_stream.py
+++ b/tests/20241401/test_stream.py
--- a/tests/20241401/test_stream_dispatch.py
+++ b/tests/20241401/test_stream_dispatch.py
--- a/tests/20241401/tets_robot.py
+++ b/tests/20241401/tets_robot.py
--- a/tests/hub/test_simple.py
+++ b/tests/hub/test_simple.py
@@ -0,0 +1,34 @@
+# test.py
+from crawl4ai import CrawlerHub
+import json
+
+async def amazon_example():
+    if (crawler_cls := CrawlerHub.get("amazon_product")) :
+        crawler = crawler_cls()
+        print(f"Crawler version: {crawler_cls.meta['version']}")
+        print(f"Rate limits: {crawler_cls.meta.get('rate_limit', 'Unlimited')}")
+        print(await crawler.run("https://amazon.com/test"))
+    else:
+        print("Crawler not found!")
+
+async def google_example():
+    # Get crawler dynamically
+    crawler_cls = CrawlerHub.get("google_search")
+    crawler = crawler_cls()
+
+    # Text search
+    text_results = await crawler.run(
+        query="apple inc", 
+        search_type="text",  
+        schema_cache_path="/Users/unclecode/.crawl4ai"
+    )
+    print(json.dumps(json.loads(text_results), indent=4))
+
+    # Image search
+    # image_results = await crawler.run(query="apple inc", search_type="image")
+    # print(image_results)
+
+if __name__ == "__main__":
+    import asyncio
+    # asyncio.run(amazon_example())
+    asyncio.run(google_example())
--- a/tests/loggers/test_logger.py
+++ b/tests/loggers/test_logger.py
@@ -0,0 +1,80 @@
+import asyncio
+from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode, AsyncLoggerBase
+import os
+from datetime import datetime
+
+class AsyncFileLogger(AsyncLoggerBase):
+    """
+    File-only asynchronous logger that writes logs to a specified file.
+    """
+
+    def __init__(self, log_file: str):
+        """
+        Initialize the file logger.
+
+        Args:
+            log_file: File path for logging
+        """
+        self.log_file = log_file
+        os.makedirs(os.path.dirname(os.path.abspath(log_file)), exist_ok=True)
+
+    def _write_to_file(self, level: str, message: str, tag: str):
+        """Write a message to the log file."""
+        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
+        with open(self.log_file, "a", encoding="utf-8") as f:
+            f.write(f"[{timestamp}] [{level}] [{tag}] {message}\n")
+
+    def debug(self, message: str, tag: str = "DEBUG", **kwargs):
+        """Log a debug message to file."""
+        self._write_to_file("DEBUG", message, tag)
+
+    def info(self, message: str, tag: str = "INFO", **kwargs):
+        """Log an info message to file."""
+        self._write_to_file("INFO", message, tag)
+
+    def success(self, message: str, tag: str = "SUCCESS", **kwargs):
+        """Log a success message to file."""
+        self._write_to_file("SUCCESS", message, tag)
+
+    def warning(self, message: str, tag: str = "WARNING", **kwargs):
+        """Log a warning message to file."""
+        self._write_to_file("WARNING", message, tag)
+
+    def error(self, message: str, tag: str = "ERROR", **kwargs):
+        """Log an error message to file."""
+        self._write_to_file("ERROR", message, tag)
+
+    def url_status(self, url: str, success: bool, timing: float, tag: str = "FETCH", url_length: int = 50):
+        """Log URL fetch status to file."""
+        status = "SUCCESS" if success else "FAILED"
+        message = f"{url[:url_length]}... | Status: {status} | Time: {timing:.2f}s"
+        self._write_to_file("URL_STATUS", message, tag)
+
+    def error_status(self, url: str, error: str, tag: str = "ERROR", url_length: int = 50):
+        """Log error status to file."""
+        message = f"{url[:url_length]}... | Error: {error}"
+        self._write_to_file("ERROR", message, tag)
+
+async def main():
+    browser_config = BrowserConfig(headless=True, verbose=True)
+    crawler = AsyncWebCrawler(config=browser_config, logger=AsyncFileLogger("/Users/unclecode/devs/crawl4ai/.private/tmp/crawl.log"))
+    await crawler.start()
+    
+    try:
+        crawl_config = CrawlerRunConfig(
+            cache_mode=CacheMode.BYPASS,
+        )
+        # Use the crawler multiple times
+        result = await crawler.arun(
+            url='https://kidocode.com/',
+            config=crawl_config
+        )
+        if result.success:
+            print("First crawl - Raw Markdown Length:", len(result.markdown.raw_markdown))
+            
+    finally:
+        # Always ensure we close the crawler
+        await crawler.close()
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/tests/mcp/test_mcp_socket.py
+++ b/tests/mcp/test_mcp_socket.py
@@ -0,0 +1,119 @@
+# pip install "mcp-sdk[ws]" anyio
+import anyio, json
+from mcp.client.websocket import websocket_client
+from mcp.client.session import ClientSession
+
+async def test_list():
+    async with websocket_client("ws://localhost:8020/mcp/ws") as (r, w):
+        async with ClientSession(r, w) as s:
+            await s.initialize()
+
+            print("tools      :", [t.name for t in (await s.list_tools()).tools])
+            print("resources  :", [r.name for r in (await s.list_resources()).resources])
+            print("templates  :", [t.name for t in (await s.list_resource_templates()).resource_templates])
+
+
+async def test_crawl(s: ClientSession) -> None:
+    """Hit the @mcp_tool('crawl') endpoint."""
+    res = await s.call_tool(
+        "crawl",
+        {
+            "urls": ["https://example.com"],
+            "browser_config": {},
+            "crawler_config": {},
+        },
+    )
+    print("crawl →", json.loads(res.content[0].text))
+
+
+async def test_md(s: ClientSession) -> None:
+    """Hit the @mcp_tool('md') endpoint."""
+    res = await s.call_tool(
+        "md",
+        {
+            "url": "https://example.com",
+            "f": "fit",   # or RAW, BM25, LLM
+            "q": None,
+            "c": "0",
+        },
+    )
+    result = json.loads(res.content[0].text)
+    print("md →", result['markdown'][:100], "...")
+
+async def test_screenshot(s: ClientSession):
+    res = await s.call_tool(
+        "screenshot",
+        {
+            "url": "https://example.com",
+            "screenshot_wait_for": 1.0,
+        },
+    )
+    png_b64 = json.loads(res.content[0].text)["screenshot"]
+    print("screenshot →", png_b64[:60], "… (base64)")
+
+
+async def test_pdf(s: ClientSession):
+    res = await s.call_tool(
+        "pdf",
+        {
+            "url": "https://example.com",
+        },
+    )
+    pdf_b64 = json.loads(res.content[0].text)["pdf"]
+    print("pdf →", pdf_b64[:60], "… (base64)")
+
+async def test_execute_js(s: ClientSession):
+    # click the “More” link on Hacker News front page and wait 1 s
+    res = await s.call_tool(
+        "execute_js",
+        {
+            "url": "https://news.ycombinator.com/news",
+            "js_code": [
+                "await page.click('a.morelink')",
+                "await page.waitForTimeout(1000)",
+            ],
+        },
+    )
+    crawl_result = json.loads(res.content[0].text)
+    print("execute_js → status", crawl_result["success"], "| html len:", len(crawl_result["html"]))
+    
+async def test_html(s: ClientSession):
+    # click the “More” link on Hacker News front page and wait 1 s
+    res = await s.call_tool(
+        "html",
+        {
+            "url": "https://news.ycombinator.com/news",
+        },
+    )
+    crawl_result = json.loads(res.content[0].text)
+    print("execute_js → status", crawl_result["success"], "| html len:", len(crawl_result["html"]))    
+    
+async def test_context(s: ClientSession):
+    # click the “More” link on Hacker News front page and wait 1 s
+    res = await s.call_tool(
+        "ask",
+        {
+            "query": "I hv a question about Crawl4ai library, how to extract internal links when crawling a page?"
+        },
+    )
+    crawl_result = json.loads(res.content[0].text)
+    print("execute_js → status", crawl_result["success"], "| html len:", len(crawl_result["html"]))    
+
+
+async def main() -> None:
+    async with websocket_client("ws://localhost:11235/mcp/ws") as (r, w):
+        async with ClientSession(r, w) as s:
+            await s.initialize()                       # handshake
+            tools = (await s.list_tools()).tools
+            print("tools:", [t.name for t in tools])
+
+            # await test_list()
+            await test_crawl(s)
+            await test_md(s)
+            await test_screenshot(s)
+            await test_pdf(s)
+            await test_execute_js(s)
+            await test_html(s)
+            await test_context(s)
+
+anyio.run(main)
--- a/tests/mcp/test_mcp_sse.py
+++ b/tests/mcp/test_mcp_sse.py
@@ -0,0 +1,11 @@
+from mcp.client.sse import sse_client
+from mcp.client.session import ClientSession
+
+async def main():
+    async with sse_client("http://127.0.0.1:8020/mcp") as (r, w):
+        async with ClientSession(r, w) as sess:
+            print(await sess.list_tools())      # now works
+            
+if __name__ == "__main__":
+    import asyncio
+    asyncio.run(main())
--- a/tests/memory/README.md
+++ b/tests/memory/README.md
@@ -0,0 +1,315 @@
+# Crawl4AI Stress Testing and Benchmarking
+
+This directory contains tools for stress testing Crawl4AI's `arun_many` method and dispatcher system with high volumes of URLs to evaluate performance, concurrency handling, and potentially detect memory issues. It also includes a benchmarking system to track performance over time.
+
+## Quick Start
+
+```bash
+# Run a default stress test (small config) and generate a report
+# (Assumes run_all.sh is updated to call run_benchmark.py)
+./run_all.sh
+```
+*Note: `run_all.sh` might need to be updated if it directly called the old script.*
+
+## Overview
+
+The stress testing system works by:
+
+1.  Generating a local test site with heavy HTML pages (regenerated by default for each test).
+2.  Starting a local HTTP server to serve these pages.
+3.  Running Crawl4AI's `arun_many` method against this local site using the `MemoryAdaptiveDispatcher` with configurable concurrency (`max_sessions`).
+4.  Monitoring performance metrics via the `CrawlerMonitor` and optionally logging memory usage.
+5.  Optionally generating detailed benchmark reports with visualizations using `benchmark_report.py`.
+
+## Available Tools
+
+-   `test_stress_sdk.py` - Main stress testing script utilizing `arun_many` and dispatchers.
+-   `benchmark_report.py` - Report generator for comparing test results (assumes compatibility with `test_stress_sdk.py` outputs).
+-   `run_benchmark.py` - Python script with predefined test configurations that orchestrates tests using `test_stress_sdk.py`.
+-   `run_all.sh` - Simple wrapper script (may need updating).
+
+## Usage Guide
+
+### Using Predefined Configurations (Recommended)
+
+The `run_benchmark.py` script offers the easiest way to run standardized tests:
+
+```bash
+# Quick test (50 URLs, 4 max sessions)
+python run_benchmark.py quick
+
+# Medium test (500 URLs, 16 max sessions)
+python run_benchmark.py medium
+
+# Large test (1000 URLs, 32 max sessions)
+python run_benchmark.py large
+
+# Extreme test (2000 URLs, 64 max sessions)
+python run_benchmark.py extreme
+
+# Custom configuration
+python run_benchmark.py custom --urls 300 --max-sessions 24 --chunk-size 50
+
+# Run 'small' test in streaming mode
+python run_benchmark.py small --stream
+
+# Override max_sessions for the 'medium' config
+python run_benchmark.py medium --max-sessions 20
+
+# Skip benchmark report generation after the test
+python run_benchmark.py small --no-report
+
+# Clean up reports and site files before running
+python run_benchmark.py medium --clean
+```
+
+#### `run_benchmark.py` Parameters
+
+| Parameter            | Default         | Description                                                                 |
+| -------------------- | --------------- | --------------------------------------------------------------------------- |
+| `config`             | *required*      | Test configuration: `quick`, `small`, `medium`, `large`, `extreme`, `custom`|
+| `--urls`             | config-specific | Number of URLs (required for `custom`)                                      |
+| `--max-sessions`     | config-specific | Max concurrent sessions managed by dispatcher (required for `custom`)         |
+| `--chunk-size`       | config-specific | URLs per batch for non-stream logging (required for `custom`)               |
+| `--stream`           | False           | Enable streaming results (disables batch logging)                           |
+| `--monitor-mode`     | DETAILED        | `DETAILED` or `AGGREGATED` display for the live monitor                     |
+| `--use-rate-limiter` | False           | Enable basic rate limiter in the dispatcher                                 |
+| `--port`             | 8000            | HTTP server port                                                            |
+| `--no-report`        | False           | Skip generating comparison report via `benchmark_report.py`                 |
+| `--clean`            | False           | Clean up reports and site files before running                              |
+| `--keep-server-alive`| False           | Keep local HTTP server running after test                                   |
+| `--use-existing-site`| False           | Use existing site on specified port (no local server start/site gen)        |
+| `--skip-generation`  | False           | Use existing site files but start local server                              |
+| `--keep-site`        | False           | Keep generated site files after test                                        |
+
+#### Predefined Configurations
+
+| Configuration | URLs   | Max Sessions | Chunk Size | Description                      |
+| ------------- | ------ | ------------ | ---------- | -------------------------------- |
+| `quick`       | 50     | 4            | 10         | Quick test for basic validation  |
+| `small`       | 100    | 8            | 20         | Small test for routine checks    |
+| `medium`      | 500    | 16           | 50         | Medium test for thorough checks  |
+| `large`       | 1000   | 32           | 100        | Large test for stress testing    |
+| `extreme`     | 2000   | 64           | 200        | Extreme test for limit testing   |
+
+### Direct Usage of `test_stress_sdk.py`
+
+For fine-grained control or debugging, you can run the stress test script directly:
+
+```bash
+# Test with 200 URLs and 32 max concurrent sessions
+python test_stress_sdk.py --urls 200 --max-sessions 32 --chunk-size 40
+
+# Clean up previous test data first
+python test_stress_sdk.py --clean-reports --clean-site --urls 100 --max-sessions 16 --chunk-size 20
+
+# Change the HTTP server port and use aggregated monitor
+python test_stress_sdk.py --port 8088 --urls 100 --max-sessions 16 --monitor-mode AGGREGATED
+
+# Enable streaming mode and use rate limiting
+python test_stress_sdk.py --urls 50 --max-sessions 8 --stream --use-rate-limiter
+
+# Change report output location
+python test_stress_sdk.py --report-path custom_reports --urls 100 --max-sessions 16
+```
+
+#### `test_stress_sdk.py` Parameters
+
+| Parameter            | Default    | Description                                                          |
+| -------------------- | ---------- | -------------------------------------------------------------------- |
+| `--urls`             | 100        | Number of URLs to test                                               |
+| `--max-sessions`     | 16         | Maximum concurrent crawling sessions managed by the dispatcher       |
+| `--chunk-size`       | 10         | Number of URLs per batch (relevant for non-stream logging)           |
+| `--stream`           | False      | Enable streaming results (disables batch logging)                    |
+| `--monitor-mode`     | DETAILED   | `DETAILED` or `AGGREGATED` display for the live `CrawlerMonitor`     |
+| `--use-rate-limiter` | False      | Enable a basic `RateLimiter` within the dispatcher                   |
+| `--site-path`        | "test_site"| Path to store/use the generated test site                            |
+| `--port`             | 8000       | Port for the local HTTP server                                       |
+| `--report-path`      | "reports"  | Path to save test result summary (JSON) and memory samples (CSV)   |
+| `--skip-generation`  | False      | Use existing test site files but still start local server            |
+| `--use-existing-site`| False      | Use existing site on specified port (no local server/site gen)     |
+| `--keep-server-alive`| False      | Keep local HTTP server running after test completion                 |
+| `--keep-site`        | False      | Keep the generated test site files after test completion             |
+| `--clean-reports`    | False      | Clean up report directory before running                             |
+| `--clean-site`       | False      | Clean up site directory before/after running (see script logic)    |
+
+### Generating Reports Only
+
+If you only want to generate a benchmark report from existing test results (assuming `benchmark_report.py` is compatible):
+
+```bash
+# Generate a report from existing test results in ./reports/
+python benchmark_report.py
+
+# Limit to the most recent 5 test results
+python benchmark_report.py --limit 5
+
+# Specify a custom source directory for test results
+python benchmark_report.py --reports-dir alternate_results
+```
+
+#### `benchmark_report.py` Parameters (Assumed)
+
+| Parameter       | Default              | Description                                                 |
+| --------------- | -------------------- | ----------------------------------------------------------- |
+| `--reports-dir` | "reports"            | Directory containing `test_stress_sdk.py` result files      |
+| `--output-dir`  | "benchmark_reports"  | Directory to save generated HTML reports and charts         |
+| `--limit`       | None (all results)   | Limit comparison to N most recent test results              |
+| `--output-file` | Auto-generated       | Custom output filename for the HTML report                  |
+
+## Understanding the Test Output
+
+### Real-time Progress Display (`CrawlerMonitor`)
+
+When running `test_stress_sdk.py`, the `CrawlerMonitor` provides a live view of the crawling process managed by the dispatcher.
+
+-   **DETAILED Mode (Default):** Shows individual task status (Queued, Active, Completed, Failed), timings, memory usage per task (if `psutil` is available), overall queue statistics, and memory pressure status (if `psutil` available).
+-   **AGGREGATED Mode:** Shows summary counts (Queued, Active, Completed, Failed), overall progress percentage, estimated time remaining, average URLs/sec, and memory pressure status.
+
+### Batch Log Output (Non-Streaming Mode Only)
+
+If running `test_stress_sdk.py` **without** the `--stream` flag, you will *also* see per-batch summary lines printed to the console *after* the monitor display, once each chunk of URLs finishes processing:
+
+```
+ Batch | Progress | Start Mem | End Mem   | URLs/sec | Success/Fail | Time (s) | Status
+───────────────────────────────────────────────────────────────────────────────────────────
+ 1     |  10.0%   |  50.1 MB  |  55.3 MB  |    23.8    |    10/0      |     0.42   | Success
+ 2     |  20.0%   |  55.3 MB  |  60.1 MB  |    24.1    |    10/0      |     0.41   | Success
+ ...
+```
+
+This display provides chunk-specific metrics:
+-   **Batch**: The batch number being reported.
+-   **Progress**: Overall percentage of total URLs processed *after* this batch.
+-   **Start Mem / End Mem**: Memory usage before and after processing this batch (if tracked).
+-   **URLs/sec**: Processing speed *for this specific batch*.
+-   **Success/Fail**: Number of successful and failed URLs *in this batch*.
+-   **Time (s)**: Wall-clock time taken to process *this batch*.
+-   **Status**: Color-coded status for the batch outcome.
+
+### Summary Output
+
+After test completion, a final summary is displayed:
+
+```
+================================================================================
+Test Completed
+================================================================================
+Test ID: 20250418_103015
+Configuration: 100 URLs, 16 max sessions, Chunk: 10, Stream: False, Monitor: DETAILED
+Results: 100 successful, 0 failed (100 processed, 100.0% success)
+Performance: 5.85 seconds total, 17.09 URLs/second avg
+Memory Usage: Start: 50.1 MB, End: 75.3 MB, Max: 78.1 MB, Growth: 25.2 MB
+Results summary saved to reports/test_summary_20250418_103015.json
+```
+
+### HTML Report Structure (Generated by `benchmark_report.py`)
+
+(This section remains the same, assuming `benchmark_report.py` generates these)
+The benchmark report contains several sections:
+1.  **Summary**: Overview of the latest test results and trends
+2.  **Performance Comparison**: Charts showing throughput across tests
+3.  **Memory Usage**: Detailed memory usage graphs for each test
+4.  **Detailed Results**: Tabular data of all test metrics
+5.  **Conclusion**: Automated analysis of performance and memory patterns
+
+### Memory Metrics
+
+(This section remains conceptually the same)
+Memory growth is the key metric for detecting leaks...
+
+### Performance Metrics
+
+(This section remains conceptually the same, though "URLs per Worker" is less relevant - focus on overall URLs/sec)
+Key performance indicators include:
+-   **URLs per Second**: Higher is better (throughput)
+-   **Success Rate**: Should be 100% in normal conditions
+-   **Total Processing Time**: Lower is better
+-   **Dispatcher Efficiency**: Observe queue lengths and wait times in the monitor (Detailed mode)
+
+### Raw Data Files
+
+Raw data is saved in the `--report-path` directory (default `./reports/`):
+
+-   **JSON files** (`test_summary_*.json`): Contains the final summary for each test run.
+-   **CSV files** (`memory_samples_*.csv`): Contains time-series memory samples taken during the test run.
+
+Example of reading raw data:
+```python
+import json
+import pandas as pd
+
+# Load test summary
+test_id = "20250418_103015" # Example ID
+with open(f'reports/test_summary_{test_id}.json', 'r') as f:
+    results = json.load(f)
+
+# Load memory samples
+memory_df = pd.read_csv(f'reports/memory_samples_{test_id}.csv')
+
+# Analyze memory_df (e.g., calculate growth, plot)
+if not memory_df['memory_info_mb'].isnull().all():
+    growth = memory_df['memory_info_mb'].iloc[-1] - memory_df['memory_info_mb'].iloc[0]
+    print(f"Total Memory Growth: {growth:.1f} MB")
+else:
+    print("No valid memory samples found.")
+
+print(f"Avg URLs/sec: {results['urls_processed'] / results['total_time_seconds']:.2f}")
+```
+
+## Visualization Dependencies
+
+(This section remains the same)
+For full visualization capabilities in the HTML reports generated by `benchmark_report.py`, install additional dependencies...
+
+## Directory Structure
+
+```
+benchmarking/          # Or your top-level directory name
+├── benchmark_reports/ # Generated HTML reports (by benchmark_report.py)
+├── reports/           # Raw test result data (from test_stress_sdk.py)
+├── test_site/         # Generated test content (temporary)
+├── benchmark_report.py# Report generator
+├── run_benchmark.py   # Test runner with predefined configs
+├── test_stress_sdk.py # Main stress test implementation using arun_many
+└── run_all.sh         # Simple wrapper script (may need updates)
+#└── requirements.txt   # Optional: Visualization dependencies for benchmark_report.py
+```
+
+## Cleanup
+
+To clean up after testing:
+
+```bash
+# Remove the test site content (if not using --keep-site)
+rm -rf test_site
+
+# Remove all raw reports and generated benchmark reports
+rm -rf reports benchmark_reports
+
+# Or use the --clean flag with run_benchmark.py
+python run_benchmark.py medium --clean
+```
+
+## Use in CI/CD
+
+(This section remains conceptually the same, just update script names)
+These tests can be integrated into CI/CD pipelines:
+```bash
+# Example CI script
+python run_benchmark.py medium --no-report # Run test without interactive report gen
+# Check exit code
+if [ $? -ne 0 ]; then echo "Stress test failed!"; exit 1; fi
+# Optionally, run report generator and check its output/metrics
+# python benchmark_report.py
+# check_report_metrics.py reports/test_summary_*.json || exit 1
+exit 0
+```
+
+## Troubleshooting
+
+-   **HTTP Server Port Conflict**: Use `--port` with `run_benchmark.py` or `test_stress_sdk.py`.
+-   **Memory Tracking Issues**: The `SimpleMemoryTracker` uses platform commands (`ps`, `/proc`, `tasklist`). Ensure these are available and the script has permission. If it consistently fails, memory reporting will be limited.
+-   **Visualization Missing**: Related to `benchmark_report.py` and its dependencies.
+-   **Site Generation Issues**: Check permissions for creating `./test_site/`. Use `--skip-generation` if you want to manage the site manually.
+-   **Testing Against External Site**: Ensure the external site is running and use `--use-existing-site --port <correct_port>`.
--- a/tests/memory/benchmark_report.py
+++ b/tests/memory/benchmark_report.py
@@ -0,0 +1,887 @@
+#!/usr/bin/env python3
+"""
+Benchmark reporting tool for Crawl4AI stress tests.
+Generates visual reports and comparisons between test runs.
+"""
+
+import os
+import json
+import glob
+import argparse
+import sys
+from datetime import datetime
+from pathlib import Path
+from rich.console import Console
+from rich.table import Table
+from rich.panel import Panel
+
+# Initialize rich console
+console = Console()
+
+# Try to import optional visualization dependencies
+VISUALIZATION_AVAILABLE = True
+try:
+    import pandas as pd
+    import matplotlib.pyplot as plt
+    import matplotlib as mpl
+    import numpy as np
+    import seaborn as sns
+except ImportError:
+    VISUALIZATION_AVAILABLE = False
+    console.print("[yellow]Warning: Visualization dependencies not found. Install with:[/yellow]")
+    console.print("[yellow]pip install pandas matplotlib seaborn[/yellow]")
+    console.print("[yellow]Only text-based reports will be generated.[/yellow]")
+
+# Configure plotting if available
+if VISUALIZATION_AVAILABLE:
+    # Set plot style for dark theme
+    plt.style.use('dark_background')
+    sns.set_theme(style="darkgrid")
+    
+    # Custom color palette based on Nord theme
+    nord_palette = ["#88c0d0", "#81a1c1", "#a3be8c", "#ebcb8b", "#bf616a", "#b48ead", "#5e81ac"]
+    sns.set_palette(nord_palette)
+
+class BenchmarkReporter:
+    """Generates visual reports and comparisons for Crawl4AI stress tests."""
+    
+    def __init__(self, reports_dir="reports", output_dir="benchmark_reports"):
+        """Initialize the benchmark reporter.
+        
+        Args:
+            reports_dir: Directory containing test result files
+            output_dir: Directory to save generated reports
+        """
+        self.reports_dir = Path(reports_dir)
+        self.output_dir = Path(output_dir)
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+        
+        # Configure matplotlib if available
+        if VISUALIZATION_AVAILABLE:
+            # Ensure the matplotlib backend works in headless environments
+            mpl.use('Agg')
+            
+            # Set up styling for plots with dark theme
+            mpl.rcParams['figure.figsize'] = (12, 8)
+            mpl.rcParams['font.size'] = 12
+            mpl.rcParams['axes.labelsize'] = 14
+            mpl.rcParams['axes.titlesize'] = 16
+            mpl.rcParams['xtick.labelsize'] = 12
+            mpl.rcParams['ytick.labelsize'] = 12
+            mpl.rcParams['legend.fontsize'] = 12
+            mpl.rcParams['figure.facecolor'] = '#1e1e1e'
+            mpl.rcParams['axes.facecolor'] = '#2e3440'
+            mpl.rcParams['savefig.facecolor'] = '#1e1e1e'
+            mpl.rcParams['text.color'] = '#e0e0e0'
+            mpl.rcParams['axes.labelcolor'] = '#e0e0e0'
+            mpl.rcParams['xtick.color'] = '#e0e0e0'
+            mpl.rcParams['ytick.color'] = '#e0e0e0'
+            mpl.rcParams['grid.color'] = '#444444'
+            mpl.rcParams['figure.edgecolor'] = '#444444'
+        
+    def load_test_results(self, limit=None):
+        """Load all test results from the reports directory.
+        
+        Args:
+            limit: Optional limit on number of most recent tests to load
+            
+        Returns:
+            Dictionary mapping test IDs to result data
+        """
+        result_files = glob.glob(str(self.reports_dir / "test_results_*.json"))
+        
+        # Sort files by modification time (newest first)
+        result_files.sort(key=os.path.getmtime, reverse=True)
+        
+        if limit:
+            result_files = result_files[:limit]
+        
+        results = {}
+        for file_path in result_files:
+            try:
+                with open(file_path, 'r') as f:
+                    data = json.load(f)
+                    test_id = data.get('test_id')
+                    if test_id:
+                        results[test_id] = data
+                        
+                        # Try to load the corresponding memory samples
+                        csv_path = self.reports_dir / f"memory_samples_{test_id}.csv"
+                        if csv_path.exists():
+                            try:
+                                memory_df = pd.read_csv(csv_path)
+                                results[test_id]['memory_samples'] = memory_df
+                            except Exception as e:
+                                console.print(f"[yellow]Warning: Could not load memory samples for {test_id}: {e}[/yellow]")
+            except Exception as e:
+                console.print(f"[red]Error loading {file_path}: {e}[/red]")
+        
+        console.print(f"Loaded {len(results)} test results")
+        return results
+    
+    def generate_summary_table(self, results):
+        """Generate a summary table of test results.
+        
+        Args:
+            results: Dictionary mapping test IDs to result data
+            
+        Returns:
+            Rich Table object
+        """
+        table = Table(title="Crawl4AI Stress Test Summary", show_header=True)
+        
+        # Define columns
+        table.add_column("Test ID", style="cyan")
+        table.add_column("Date", style="bright_green")
+        table.add_column("URLs", justify="right")
+        table.add_column("Workers", justify="right")
+        table.add_column("Success %", justify="right")
+        table.add_column("Time (s)", justify="right")
+        table.add_column("Mem Growth", justify="right")
+        table.add_column("URLs/sec", justify="right")
+        
+        # Add rows
+        for test_id, data in sorted(results.items(), key=lambda x: x[0], reverse=True):
+            # Parse timestamp from test_id
+            try:
+                date_str = datetime.strptime(test_id, "%Y%m%d_%H%M%S").strftime("%Y-%m-%d %H:%M")
+            except:
+                date_str = "Unknown"
+            
+            # Calculate success percentage
+            total_urls = data.get('url_count', 0)
+            successful = data.get('successful_urls', 0)
+            success_pct = (successful / total_urls * 100) if total_urls > 0 else 0
+            
+            # Calculate memory growth if available
+            mem_growth = "N/A"
+            if 'memory_samples' in data:
+                samples = data['memory_samples']
+                if len(samples) >= 2:
+                    # Try to extract numeric values from memory_info strings
+                    try:
+                        first_mem = float(samples.iloc[0]['memory_info'].split()[0])
+                        last_mem = float(samples.iloc[-1]['memory_info'].split()[0])
+                        mem_growth = f"{last_mem - first_mem:.1f} MB"
+                    except:
+                        pass
+            
+            # Calculate URLs per second
+            time_taken = data.get('total_time_seconds', 0)
+            urls_per_sec = total_urls / time_taken if time_taken > 0 else 0
+            
+            table.add_row(
+                test_id,
+                date_str,
+                str(total_urls),
+                str(data.get('workers', 'N/A')),
+                f"{success_pct:.1f}%",
+                f"{data.get('total_time_seconds', 0):.2f}",
+                mem_growth,
+                f"{urls_per_sec:.1f}"
+            )
+        
+        return table
+    
+    def generate_performance_chart(self, results, output_file=None):
+        """Generate a performance comparison chart.
+        
+        Args:
+            results: Dictionary mapping test IDs to result data
+            output_file: File path to save the chart
+            
+        Returns:
+            Path to the saved chart file or None if visualization is not available
+        """
+        if not VISUALIZATION_AVAILABLE:
+            console.print("[yellow]Skipping performance chart - visualization dependencies not available[/yellow]")
+            return None
+            
+        # Extract relevant data
+        data = []
+        for test_id, result in results.items():
+            urls = result.get('url_count', 0)
+            workers = result.get('workers', 0)
+            time_taken = result.get('total_time_seconds', 0)
+            urls_per_sec = urls / time_taken if time_taken > 0 else 0
+            
+            # Parse timestamp from test_id for sorting
+            try:
+                timestamp = datetime.strptime(test_id, "%Y%m%d_%H%M%S")
+                data.append({
+                    'test_id': test_id,
+                    'timestamp': timestamp,
+                    'urls': urls,
+                    'workers': workers,
+                    'time_seconds': time_taken,
+                    'urls_per_sec': urls_per_sec
+                })
+            except:
+                console.print(f"[yellow]Warning: Could not parse timestamp from {test_id}[/yellow]")
+        
+        if not data:
+            console.print("[yellow]No valid data for performance chart[/yellow]")
+            return None
+        
+        # Convert to DataFrame and sort by timestamp
+        df = pd.DataFrame(data)
+        df = df.sort_values('timestamp')
+        
+        # Create the plot
+        fig, ax1 = plt.subplots(figsize=(12, 6))
+        
+        # Plot URLs per second as bars with properly set x-axis
+        x_pos = range(len(df['test_id']))
+        bars = ax1.bar(x_pos, df['urls_per_sec'], color='#88c0d0', alpha=0.8)
+        ax1.set_ylabel('URLs per Second', color='#88c0d0')
+        ax1.tick_params(axis='y', labelcolor='#88c0d0')
+        
+        # Properly set x-axis labels
+        ax1.set_xticks(x_pos)
+        ax1.set_xticklabels(df['test_id'].tolist(), rotation=45, ha='right')
+        
+        # Add worker count as text on each bar
+        for i, bar in enumerate(bars):
+            height = bar.get_height()
+            workers = df.iloc[i]['workers']
+            ax1.text(i, height + 0.1,
+                    f'W: {workers}', ha='center', va='bottom', fontsize=9, color='#e0e0e0')
+        
+        # Add a second y-axis for total URLs
+        ax2 = ax1.twinx()
+        ax2.plot(x_pos, df['urls'], '-', color='#bf616a', alpha=0.8, markersize=6, marker='o')
+        ax2.set_ylabel('Total URLs', color='#bf616a')
+        ax2.tick_params(axis='y', labelcolor='#bf616a')
+        
+        # Set title and layout
+        plt.title('Crawl4AI Performance Benchmarks')
+        plt.tight_layout()
+        
+        # Save the figure
+        if output_file is None:
+            output_file = self.output_dir / "performance_comparison.png"
+        plt.savefig(output_file, dpi=100, bbox_inches='tight')
+        plt.close()
+        
+        return output_file
+    
+    def generate_memory_charts(self, results, output_prefix=None):
+        """Generate memory usage charts for each test.
+        
+        Args:
+            results: Dictionary mapping test IDs to result data
+            output_prefix: Prefix for output file names
+            
+        Returns:
+            List of paths to the saved chart files
+        """
+        if not VISUALIZATION_AVAILABLE:
+            console.print("[yellow]Skipping memory charts - visualization dependencies not available[/yellow]")
+            return []
+            
+        output_files = []
+        
+        for test_id, result in results.items():
+            if 'memory_samples' not in result:
+                continue
+            
+            memory_df = result['memory_samples']
+            
+            # Check if we have enough data points
+            if len(memory_df) < 2:
+                continue
+            
+            # Try to extract numeric values from memory_info strings
+            try:
+                memory_values = []
+                for mem_str in memory_df['memory_info']:
+                    # Extract the number from strings like "142.8 MB"
+                    value = float(mem_str.split()[0])
+                    memory_values.append(value)
+                
+                memory_df['memory_mb'] = memory_values
+            except Exception as e:
+                console.print(f"[yellow]Could not parse memory values for {test_id}: {e}[/yellow]")
+                continue
+            
+            # Create the plot
+            plt.figure(figsize=(10, 6))
+            
+            # Plot memory usage over time
+            plt.plot(memory_df['elapsed_seconds'], memory_df['memory_mb'], 
+                     color='#88c0d0', marker='o', linewidth=2, markersize=4)
+            
+            # Add annotations for chunk processing
+            chunk_size = result.get('chunk_size', 0)
+            url_count = result.get('url_count', 0)
+            if chunk_size > 0 and url_count > 0:
+                # Estimate chunk processing times
+                num_chunks = (url_count + chunk_size - 1) // chunk_size  # Ceiling division
+                total_time = result.get('total_time_seconds', memory_df['elapsed_seconds'].max())
+                chunk_times = np.linspace(0, total_time, num_chunks + 1)[1:]
+                
+                for i, time_point in enumerate(chunk_times):
+                    if time_point <= memory_df['elapsed_seconds'].max():
+                        plt.axvline(x=time_point, color='#4c566a', linestyle='--', alpha=0.6)
+                        plt.text(time_point, memory_df['memory_mb'].min(), f'Chunk {i+1}', 
+                                rotation=90, verticalalignment='bottom', fontsize=8, color='#e0e0e0')
+            
+            # Set labels and title
+            plt.xlabel('Elapsed Time (seconds)', color='#e0e0e0')
+            plt.ylabel('Memory Usage (MB)', color='#e0e0e0')
+            plt.title(f'Memory Usage During Test {test_id}\n({url_count} URLs, {result.get("workers", "?")} Workers)', 
+                      color='#e0e0e0')
+            
+            # Add grid and set y-axis to start from zero
+            plt.grid(True, alpha=0.3, color='#4c566a')
+            
+            # Add test metadata as text
+            info_text = (
+                f"URLs: {url_count}\n"
+                f"Workers: {result.get('workers', 'N/A')}\n"
+                f"Chunk Size: {result.get('chunk_size', 'N/A')}\n"
+                f"Total Time: {result.get('total_time_seconds', 0):.2f}s\n"
+            )
+            
+            # Calculate memory growth
+            if len(memory_df) >= 2:
+                first_mem = memory_df.iloc[0]['memory_mb']
+                last_mem = memory_df.iloc[-1]['memory_mb']
+                growth = last_mem - first_mem
+                growth_rate = growth / result.get('total_time_seconds', 1)
+                
+                info_text += f"Memory Growth: {growth:.1f} MB\n"
+                info_text += f"Growth Rate: {growth_rate:.2f} MB/s"
+            
+            plt.figtext(0.02, 0.02, info_text, fontsize=9, color='#e0e0e0',
+                       bbox=dict(facecolor='#3b4252', alpha=0.8, edgecolor='#4c566a'))
+            
+            # Save the figure
+            if output_prefix is None:
+                output_file = self.output_dir / f"memory_chart_{test_id}.png"
+            else:
+                output_file = Path(f"{output_prefix}_memory_{test_id}.png")
+                
+            plt.tight_layout()
+            plt.savefig(output_file, dpi=100, bbox_inches='tight')
+            plt.close()
+            
+            output_files.append(output_file)
+        
+        return output_files
+    
+    def generate_comparison_report(self, results, title=None, output_file=None):
+        """Generate a comprehensive comparison report of multiple test runs.
+        
+        Args:
+            results: Dictionary mapping test IDs to result data
+            title: Optional title for the report
+            output_file: File path to save the report
+            
+        Returns:
+            Path to the saved report file
+        """
+        if not results:
+            console.print("[yellow]No results to generate comparison report[/yellow]")
+            return None
+        
+        if output_file is None:
+            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+            output_file = self.output_dir / f"comparison_report_{timestamp}.html"
+        
+        # Create data for the report
+        rows = []
+        for test_id, data in results.items():
+            # Calculate metrics
+            urls = data.get('url_count', 0)
+            workers = data.get('workers', 0)
+            successful = data.get('successful_urls', 0)
+            failed = data.get('failed_urls', 0)
+            time_seconds = data.get('total_time_seconds', 0)
+            
+            # Calculate additional metrics
+            success_rate = (successful / urls) * 100 if urls > 0 else 0
+            urls_per_second = urls / time_seconds if time_seconds > 0 else 0
+            urls_per_worker = urls / workers if workers > 0 else 0
+            
+            # Calculate memory growth if available
+            mem_start = None
+            mem_end = None
+            mem_growth = None
+            if 'memory_samples' in data:
+                samples = data['memory_samples']
+                if len(samples) >= 2:
+                    try:
+                        first_mem = float(samples.iloc[0]['memory_info'].split()[0])
+                        last_mem = float(samples.iloc[-1]['memory_info'].split()[0])
+                        mem_start = first_mem
+                        mem_end = last_mem
+                        mem_growth = last_mem - first_mem
+                    except:
+                        pass
+            
+            # Parse timestamp from test_id
+            try:
+                timestamp = datetime.strptime(test_id, "%Y%m%d_%H%M%S")
+            except:
+                timestamp = None
+            
+            rows.append({
+                'test_id': test_id,
+                'timestamp': timestamp,
+                'date': timestamp.strftime("%Y-%m-%d %H:%M:%S") if timestamp else "Unknown",
+                'urls': urls,
+                'workers': workers,
+                'chunk_size': data.get('chunk_size', 0),
+                'successful': successful,
+                'failed': failed,
+                'success_rate': success_rate,
+                'time_seconds': time_seconds,
+                'urls_per_second': urls_per_second,
+                'urls_per_worker': urls_per_worker,
+                'memory_start': mem_start,
+                'memory_end': mem_end,
+                'memory_growth': mem_growth
+            })
+        
+        # Sort data by timestamp if possible
+        if VISUALIZATION_AVAILABLE:
+            # Convert to DataFrame and sort by timestamp
+            df = pd.DataFrame(rows)
+            if 'timestamp' in df.columns and not df['timestamp'].isna().all():
+                df = df.sort_values('timestamp', ascending=False)
+        else:
+            # Simple sorting without pandas
+            rows.sort(key=lambda x: x.get('timestamp', datetime.now()), reverse=True)
+            df = None
+        
+        # Generate HTML report
+        html = []
+        html.append('<!DOCTYPE html>')
+        html.append('<html lang="en">')
+        html.append('<head>')
+        html.append('<meta charset="UTF-8">')
+        html.append('<meta name="viewport" content="width=device-width, initial-scale=1.0">')
+        html.append(f'<title>{title or "Crawl4AI Benchmark Comparison"}</title>')
+        html.append('<style>')
+        html.append('''
+        body {
+            font-family: Arial, sans-serif;
+            line-height: 1.6;
+            margin: 0;
+            padding: 20px;
+            max-width: 1200px;
+            margin: 0 auto;
+            color: #e0e0e0;
+            background-color: #1e1e1e;
+        }
+        h1, h2, h3 {
+            color: #81a1c1;
+        }
+        table {
+            border-collapse: collapse;
+            width: 100%;
+            margin-bottom: 20px;
+        }
+        th, td {
+            text-align: left;
+            padding: 12px;
+            border-bottom: 1px solid #444;
+        }
+        th {
+            background-color: #2e3440;
+            font-weight: bold;
+        }
+        tr:hover {
+            background-color: #2e3440;
+        }
+        a {
+            color: #88c0d0;
+            text-decoration: none;
+        }
+        a:hover {
+            text-decoration: underline;
+        }
+        .chart-container {
+            margin: 30px 0;
+            text-align: center;
+            background-color: #2e3440;
+            padding: 20px;
+            border-radius: 8px;
+        }
+        .chart-container img {
+            max-width: 100%;
+            height: auto;
+            border: 1px solid #444;
+            box-shadow: 0 0 10px rgba(0,0,0,0.3);
+        }
+        .card {
+            border: 1px solid #444;
+            border-radius: 8px;
+            padding: 15px;
+            margin-bottom: 20px;
+            background-color: #2e3440;
+            box-shadow: 0 0 10px rgba(0,0,0,0.2);
+        }
+        .highlight {
+            background-color: #3b4252;
+            font-weight: bold;
+        }
+        .status-good {
+            color: #a3be8c;
+        }
+        .status-warning {
+            color: #ebcb8b;
+        }
+        .status-bad {
+            color: #bf616a;
+        }
+        ''')
+        html.append('</style>')
+        html.append('</head>')
+        html.append('<body>')
+        
+        # Header
+        html.append(f'<h1>{title or "Crawl4AI Benchmark Comparison"}</h1>')
+        html.append(f'<p>Report generated on {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}</p>')
+        
+        # Summary section
+        html.append('<div class="card">')
+        html.append('<h2>Summary</h2>')
+        html.append('<p>This report compares the performance of Crawl4AI across multiple test runs.</p>')
+        
+        # Summary metrics
+        data_available = (VISUALIZATION_AVAILABLE and df is not None and not df.empty) or (not VISUALIZATION_AVAILABLE and len(rows) > 0)
+        if data_available:
+            # Get the latest test data
+            if VISUALIZATION_AVAILABLE and df is not None and not df.empty:
+                latest_test = df.iloc[0]
+                latest_id = latest_test['test_id']
+            else:
+                latest_test = rows[0]  # First row (already sorted by timestamp)
+                latest_id = latest_test['test_id']
+            
+            html.append('<h3>Latest Test Results</h3>')
+            html.append('<ul>')
+            html.append(f'<li><strong>Test ID:</strong> {latest_id}</li>')
+            html.append(f'<li><strong>Date:</strong> {latest_test["date"]}</li>')
+            html.append(f'<li><strong>URLs:</strong> {latest_test["urls"]}</li>')
+            html.append(f'<li><strong>Workers:</strong> {latest_test["workers"]}</li>')
+            html.append(f'<li><strong>Success Rate:</strong> {latest_test["success_rate"]:.1f}%</li>')
+            html.append(f'<li><strong>Time:</strong> {latest_test["time_seconds"]:.2f} seconds</li>')
+            html.append(f'<li><strong>Performance:</strong> {latest_test["urls_per_second"]:.1f} URLs/second</li>')
+            
+            # Check memory growth (handle both pandas and dict mode)
+            memory_growth_available = False
+            if VISUALIZATION_AVAILABLE and df is not None:
+                if pd.notna(latest_test["memory_growth"]):
+                    html.append(f'<li><strong>Memory Growth:</strong> {latest_test["memory_growth"]:.1f} MB</li>')
+                    memory_growth_available = True
+            else:
+                if latest_test["memory_growth"] is not None:
+                    html.append(f'<li><strong>Memory Growth:</strong> {latest_test["memory_growth"]:.1f} MB</li>')
+                    memory_growth_available = True
+            
+            html.append('</ul>')
+            
+            # If we have more than one test, show trend
+            if (VISUALIZATION_AVAILABLE and df is not None and len(df) > 1) or (not VISUALIZATION_AVAILABLE and len(rows) > 1):
+                if VISUALIZATION_AVAILABLE and df is not None:
+                    prev_test = df.iloc[1]
+                else:
+                    prev_test = rows[1]
+                
+                # Calculate performance change
+                perf_change = ((latest_test["urls_per_second"] / prev_test["urls_per_second"]) - 1) * 100 if prev_test["urls_per_second"] > 0 else 0
+                
+                status_class = ""
+                if perf_change > 5:
+                    status_class = "status-good"
+                elif perf_change < -5:
+                    status_class = "status-bad"
+                
+                html.append('<h3>Performance Trend</h3>')
+                html.append('<ul>')
+                html.append(f'<li><strong>Performance Change:</strong> <span class="{status_class}">{perf_change:+.1f}%</span> compared to previous test</li>')
+                
+                # Memory trend if available
+                memory_trend_available = False
+                if VISUALIZATION_AVAILABLE and df is not None:
+                    if pd.notna(latest_test["memory_growth"]) and pd.notna(prev_test["memory_growth"]):
+                        mem_change = latest_test["memory_growth"] - prev_test["memory_growth"]
+                        memory_trend_available = True
+                else:
+                    if latest_test["memory_growth"] is not None and prev_test["memory_growth"] is not None:
+                        mem_change = latest_test["memory_growth"] - prev_test["memory_growth"]
+                        memory_trend_available = True
+                
+                if memory_trend_available:
+                    mem_status = ""
+                    if mem_change < -1:  # Improved (less growth)
+                        mem_status = "status-good"
+                    elif mem_change > 1:  # Worse (more growth)
+                        mem_status = "status-bad"
+                    
+                    html.append(f'<li><strong>Memory Trend:</strong> <span class="{mem_status}">{mem_change:+.1f} MB</span> change in memory growth</li>')
+                
+                html.append('</ul>')
+        
+        html.append('</div>')
+        
+        # Generate performance chart if visualization is available
+        if VISUALIZATION_AVAILABLE:
+            perf_chart = self.generate_performance_chart(results)
+            if perf_chart:
+                html.append('<div class="chart-container">')
+                html.append('<h2>Performance Comparison</h2>')
+                html.append(f'<img src="{os.path.relpath(perf_chart, os.path.dirname(output_file))}" alt="Performance Comparison Chart">')
+                html.append('</div>')
+        else:
+            html.append('<div class="chart-container">')
+            html.append('<h2>Performance Comparison</h2>')
+            html.append('<p>Charts not available - install visualization dependencies (pandas, matplotlib, seaborn) to enable.</p>')
+            html.append('</div>')
+        
+        # Generate memory charts if visualization is available
+        if VISUALIZATION_AVAILABLE:
+            memory_charts = self.generate_memory_charts(results)
+            if memory_charts:
+                html.append('<div class="chart-container">')
+                html.append('<h2>Memory Usage</h2>')
+                
+                for chart in memory_charts:
+                    test_id = chart.stem.split('_')[-1]
+                    html.append(f'<h3>Test {test_id}</h3>')
+                    html.append(f'<img src="{os.path.relpath(chart, os.path.dirname(output_file))}" alt="Memory Chart for {test_id}">')
+                
+                html.append('</div>')
+        else:
+            html.append('<div class="chart-container">')
+            html.append('<h2>Memory Usage</h2>')
+            html.append('<p>Charts not available - install visualization dependencies (pandas, matplotlib, seaborn) to enable.</p>')
+            html.append('</div>')
+        
+        # Detailed results table
+        html.append('<h2>Detailed Results</h2>')
+        
+        # Add the results as an HTML table
+        html.append('<table>')
+        
+        # Table headers
+        html.append('<tr>')
+        for col in ['Test ID', 'Date', 'URLs', 'Workers', 'Success %', 'Time (s)', 'URLs/sec', 'Mem Growth (MB)']:
+            html.append(f'<th>{col}</th>')
+        html.append('</tr>')
+        
+        # Table rows - handle both pandas DataFrame and list of dicts
+        if VISUALIZATION_AVAILABLE and df is not None:
+            # Using pandas DataFrame
+            for _, row in df.iterrows():
+                html.append('<tr>')
+                html.append(f'<td>{row["test_id"]}</td>')
+                html.append(f'<td>{row["date"]}</td>')
+                html.append(f'<td>{row["urls"]}</td>')
+                html.append(f'<td>{row["workers"]}</td>')
+                html.append(f'<td>{row["success_rate"]:.1f}%</td>')
+                html.append(f'<td>{row["time_seconds"]:.2f}</td>')
+                html.append(f'<td>{row["urls_per_second"]:.1f}</td>')
+                
+                # Memory growth cell
+                if pd.notna(row["memory_growth"]):
+                    html.append(f'<td>{row["memory_growth"]:.1f}</td>')
+                else:
+                    html.append('<td>N/A</td>')
+                    
+                html.append('</tr>')
+        else:
+            # Using list of dicts (when pandas is not available)
+            for row in rows:
+                html.append('<tr>')
+                html.append(f'<td>{row["test_id"]}</td>')
+                html.append(f'<td>{row["date"]}</td>')
+                html.append(f'<td>{row["urls"]}</td>')
+                html.append(f'<td>{row["workers"]}</td>')
+                html.append(f'<td>{row["success_rate"]:.1f}%</td>')
+                html.append(f'<td>{row["time_seconds"]:.2f}</td>')
+                html.append(f'<td>{row["urls_per_second"]:.1f}</td>')
+                
+                # Memory growth cell
+                if row["memory_growth"] is not None:
+                    html.append(f'<td>{row["memory_growth"]:.1f}</td>')
+                else:
+                    html.append('<td>N/A</td>')
+                    
+                html.append('</tr>')
+        
+        html.append('</table>')
+        
+        # Conclusion section
+        html.append('<div class="card">')
+        html.append('<h2>Conclusion</h2>')
+        
+        if VISUALIZATION_AVAILABLE and df is not None and not df.empty:
+            # Using pandas for statistics (when available)
+            # Calculate some overall statistics
+            avg_urls_per_sec = df['urls_per_second'].mean()
+            max_urls_per_sec = df['urls_per_second'].max()
+            
+            # Determine if we have a trend
+            if len(df) > 1:
+                trend_data = df.sort_values('timestamp')
+                first_perf = trend_data.iloc[0]['urls_per_second']
+                last_perf = trend_data.iloc[-1]['urls_per_second']
+                
+                perf_change = ((last_perf / first_perf) - 1) * 100 if first_perf > 0 else 0
+                
+                if perf_change > 10:
+                    trend_desc = "significantly improved"
+                    trend_class = "status-good"
+                elif perf_change > 5:
+                    trend_desc = "improved"
+                    trend_class = "status-good"
+                elif perf_change < -10:
+                    trend_desc = "significantly decreased"
+                    trend_class = "status-bad"
+                elif perf_change < -5:
+                    trend_desc = "decreased"
+                    trend_class = "status-bad"
+                else:
+                    trend_desc = "remained stable"
+                    trend_class = ""
+                
+                html.append(f'<p>Overall performance has <span class="{trend_class}">{trend_desc}</span> over the test period.</p>')
+            
+            html.append(f'<p>Average throughput: <strong>{avg_urls_per_sec:.1f}</strong> URLs/second</p>')
+            html.append(f'<p>Maximum throughput: <strong>{max_urls_per_sec:.1f}</strong> URLs/second</p>')
+            
+            # Memory leak assessment
+            if 'memory_growth' in df.columns and not df['memory_growth'].isna().all():
+                avg_growth = df['memory_growth'].mean()
+                max_growth = df['memory_growth'].max()
+                
+                if avg_growth < 5:
+                    leak_assessment = "No significant memory leaks detected"
+                    leak_class = "status-good"
+                elif avg_growth < 10:
+                    leak_assessment = "Minor memory growth observed"
+                    leak_class = "status-warning"
+                else:
+                    leak_assessment = "Potential memory leak detected"
+                    leak_class = "status-bad"
+                
+                html.append(f'<p><span class="{leak_class}">{leak_assessment}</span>. Average memory growth: <strong>{avg_growth:.1f} MB</strong> per test.</p>')
+        else:
+            # Manual calculations without pandas
+            if rows:
+                # Calculate average and max throughput
+                total_urls_per_sec = sum(row['urls_per_second'] for row in rows)
+                avg_urls_per_sec = total_urls_per_sec / len(rows)
+                max_urls_per_sec = max(row['urls_per_second'] for row in rows)
+                
+                html.append(f'<p>Average throughput: <strong>{avg_urls_per_sec:.1f}</strong> URLs/second</p>')
+                html.append(f'<p>Maximum throughput: <strong>{max_urls_per_sec:.1f}</strong> URLs/second</p>')
+                
+                # Memory assessment (simplified without pandas)
+                growth_values = [row['memory_growth'] for row in rows if row['memory_growth'] is not None]
+                if growth_values:
+                    avg_growth = sum(growth_values) / len(growth_values)
+                    
+                    if avg_growth < 5:
+                        leak_assessment = "No significant memory leaks detected"
+                        leak_class = "status-good"
+                    elif avg_growth < 10:
+                        leak_assessment = "Minor memory growth observed"
+                        leak_class = "status-warning"
+                    else:
+                        leak_assessment = "Potential memory leak detected"
+                        leak_class = "status-bad"
+                    
+                    html.append(f'<p><span class="{leak_class}">{leak_assessment}</span>. Average memory growth: <strong>{avg_growth:.1f} MB</strong> per test.</p>')
+            else:
+                html.append('<p>No test data available for analysis.</p>')
+        
+        html.append('</div>')
+        
+        # Footer
+        html.append('<div style="margin-top: 30px; text-align: center; color: #777; font-size: 0.9em;">')
+        html.append('<p>Generated by Crawl4AI Benchmark Reporter</p>')
+        html.append('</div>')
+        
+        html.append('</body>')
+        html.append('</html>')
+        
+        # Write the HTML file
+        with open(output_file, 'w') as f:
+            f.write('\n'.join(html))
+        
+        # Print a clickable link for terminals that support it (iTerm, VS Code, etc.)
+        file_url = f"file://{os.path.abspath(output_file)}"
+        console.print(f"[green]Comparison report saved to: {output_file}[/green]")
+        console.print(f"[blue underline]Click to open report: {file_url}[/blue underline]")
+        return output_file
+    
+    def run(self, limit=None, output_file=None):
+        """Generate a full benchmark report.
+        
+        Args:
+            limit: Optional limit on number of most recent tests to include
+            output_file: Optional output file path
+            
+        Returns:
+            Path to the generated report file
+        """
+        # Load test results
+        results = self.load_test_results(limit=limit)
+        
+        if not results:
+            console.print("[yellow]No test results found. Run some tests first.[/yellow]")
+            return None
+        
+        # Generate and display summary table
+        summary_table = self.generate_summary_table(results)
+        console.print(summary_table)
+        
+        # Generate comparison report
+        title = f"Crawl4AI Benchmark Report ({len(results)} test runs)"
+        report_file = self.generate_comparison_report(results, title=title, output_file=output_file)
+        
+        if report_file:
+            console.print(f"[bold green]Report generated successfully: {report_file}[/bold green]")
+            return report_file
+        else:
+            console.print("[bold red]Failed to generate report[/bold red]")
+            return None
+
+
+def main():
+    """Main entry point for the benchmark reporter."""
+    parser = argparse.ArgumentParser(description="Generate benchmark reports for Crawl4AI stress tests")
+    
+    parser.add_argument("--reports-dir", type=str, default="reports",
+                      help="Directory containing test result files")
+    parser.add_argument("--output-dir", type=str, default="benchmark_reports",
+                      help="Directory to save generated reports")
+    parser.add_argument("--limit", type=int, default=None,
+                      help="Limit to most recent N test results")
+    parser.add_argument("--output-file", type=str, default=None,
+                      help="Custom output file path for the report")
+    
+    args = parser.parse_args()
+    
+    # Create the benchmark reporter
+    reporter = BenchmarkReporter(reports_dir=args.reports_dir, output_dir=args.output_dir)
+    
+    # Generate the report
+    report_file = reporter.run(limit=args.limit, output_file=args.output_file)
+    
+    if report_file:
+        print(f"Report generated at: {report_file}")
+        return 0
+    else:
+        print("Failed to generate report")
+        return 1
+
+
+if __name__ == "__main__":
+    import sys
+    sys.exit(main())
--- a/tests/memory/cap_test.py
+++ b/tests/memory/cap_test.py
@@ -0,0 +1,34 @@
+#!/usr/bin/env python3
+"""
+Hammer /crawl with many concurrent requests to prove GLOBAL_SEM works.
+"""
+
+import asyncio, httpx, json, uuid, argparse
+
+API = "http://localhost:8020/crawl"
+URLS_PER_CALL = 1          # keep it minimal so each arun() == 1 page
+CONCURRENT_CALLS = 20      # way above your cap
+
+payload_template = {
+    "browser_config": {"type": "BrowserConfig", "params": {"headless": True}},
+    "crawler_config": {
+        "type": "CrawlerRunConfig",
+        "params": {"cache_mode": "BYPASS", "verbose": False},
+    }
+}
+
+async def one_call(client):
+    payload = payload_template.copy()
+    payload["urls"] = [f"https://httpbin.org/anything/{uuid.uuid4()}"]
+    r = await client.post(API, json=payload)
+    r.raise_for_status()
+    return r.json()["server_peak_memory_mb"]
+
+async def main():
+    async with httpx.AsyncClient(timeout=60) as client:
+        tasks = [asyncio.create_task(one_call(client)) for _ in range(CONCURRENT_CALLS)]
+        mem_usages = await asyncio.gather(*tasks)
+        print("Calls finished OK, server peaks reported:", mem_usages)
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/tests/memory/requirements.txt
+++ b/tests/memory/requirements.txt
@@ -0,0 +1,4 @@
+pandas>=1.5.0
+matplotlib>=3.5.0
+seaborn>=0.12.0
+rich>=12.0.0
--- a/tests/memory/run_benchmark.py
+++ b/tests/memory/run_benchmark.py
@@ -0,0 +1,259 @@
+#!/usr/bin/env python3
+"""
+Run a complete Crawl4AI benchmark test using test_stress_sdk.py and generate a report.
+"""
+
+import sys
+import os
+import glob
+import argparse
+import subprocess
+import time
+from datetime import datetime
+
+from rich.console import Console
+from rich.text import Text
+
+console = Console()
+
+# Updated TEST_CONFIGS to use max_sessions
+TEST_CONFIGS = {
+    "quick":   {"urls": 50,   "max_sessions": 4,  "chunk_size": 10, "description": "Quick test (50 URLs, 4 sessions)"},
+    "small":   {"urls": 100,  "max_sessions": 8,  "chunk_size": 20, "description": "Small test (100 URLs, 8 sessions)"},
+    "medium":  {"urls": 500,  "max_sessions": 16, "chunk_size": 50, "description": "Medium test (500 URLs, 16 sessions)"},
+    "large":   {"urls": 1000, "max_sessions": 32, "chunk_size": 100,"description": "Large test (1000 URLs, 32 sessions)"},
+    "extreme": {"urls": 2000, "max_sessions": 64, "chunk_size": 200,"description": "Extreme test (2000 URLs, 64 sessions)"},
+}
+
+# Arguments to forward directly if present in custom_args
+FORWARD_ARGS = {
+    "urls": "--urls",
+    "max_sessions": "--max-sessions",
+    "chunk_size": "--chunk-size",
+    "port": "--port",
+    "monitor_mode": "--monitor-mode",
+}
+# Boolean flags to forward if True
+FORWARD_FLAGS = {
+    "stream": "--stream",
+    "use_rate_limiter": "--use-rate-limiter",
+    "keep_server_alive": "--keep-server-alive",
+    "use_existing_site": "--use-existing-site",
+    "skip_generation": "--skip-generation",
+    "keep_site": "--keep-site",
+    "clean_reports": "--clean-reports", # Note: clean behavior is handled here, but pass flag if needed
+    "clean_site": "--clean-site",     # Note: clean behavior is handled here, but pass flag if needed
+}
+
+def run_benchmark(config_name, custom_args=None, compare=True, clean=False):
+    """Runs the stress test and optionally the report generator."""
+    if config_name not in TEST_CONFIGS and config_name != "custom":
+        console.print(f"[bold red]Unknown configuration: {config_name}[/bold red]")
+        return False
+
+    # Print header
+    title = "Crawl4AI SDK Benchmark Test"
+    if config_name != "custom":
+        title += f" - {TEST_CONFIGS[config_name]['description']}"
+    else:
+        # Safely get custom args for title
+        urls = custom_args.get('urls', '?') if custom_args else '?'
+        sessions = custom_args.get('max_sessions', '?') if custom_args else '?'
+        title += f" - Custom ({urls} URLs, {sessions} sessions)"
+
+    console.print(f"\n[bold blue]{title}[/bold blue]")
+    console.print("=" * (len(title) + 4)) # Adjust underline length
+
+    console.print("\n[bold white]Preparing test...[/bold white]")
+
+    # --- Command Construction ---
+    # Use the new script name
+    cmd = ["python", "test_stress_sdk.py"]
+
+    # Apply config or custom args
+    args_to_use = {}
+    if config_name != "custom":
+        args_to_use = TEST_CONFIGS[config_name].copy()
+        # If custom args are provided (e.g., boolean flags), overlay them
+        if custom_args:
+            args_to_use.update(custom_args)
+    elif custom_args: # Custom config
+        args_to_use = custom_args.copy()
+
+    # Add arguments with values
+    for key, arg_name in FORWARD_ARGS.items():
+        if key in args_to_use:
+            cmd.extend([arg_name, str(args_to_use[key])])
+
+    # Add boolean flags
+    for key, flag_name in FORWARD_FLAGS.items():
+        if args_to_use.get(key, False): # Check if key exists and is True
+             # Special handling for clean flags - apply locally, don't forward?
+             # Decide if test_stress_sdk.py also needs --clean flags or if run_benchmark handles it.
+             # For now, let's assume run_benchmark handles cleaning based on its own --clean flag.
+             # We'll forward other flags.
+            if key not in ["clean_reports", "clean_site"]:
+                 cmd.append(flag_name)
+
+    # Handle the top-level --clean flag for run_benchmark
+    if clean:
+        # Pass clean flags to the stress test script as well, if needed
+        # This assumes test_stress_sdk.py also uses --clean-reports and --clean-site
+        cmd.append("--clean-reports")
+        cmd.append("--clean-site")
+        console.print("[yellow]Applying --clean: Cleaning reports and site before test.[/yellow]")
+        # Actual cleaning logic might reside here or be delegated entirely
+
+    console.print(f"\n[bold white]Running stress test:[/bold white] {' '.join(cmd)}")
+    start = time.time()
+
+    # Execute the stress test script
+    # Use Popen to stream output
+    try:
+        proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, encoding='utf-8', errors='replace')
+        while True:
+            line = proc.stdout.readline()
+            if not line:
+                break
+            console.print(line.rstrip()) # Print line by line
+        proc.wait() # Wait for the process to complete
+    except FileNotFoundError:
+         console.print(f"[bold red]Error: Script 'test_stress_sdk.py' not found. Make sure it's in the correct directory.[/bold red]")
+         return False
+    except Exception as e:
+         console.print(f"[bold red]Error running stress test subprocess: {e}[/bold red]")
+         return False
+
+
+    if proc.returncode != 0:
+        console.print(f"[bold red]Stress test failed with exit code {proc.returncode}[/bold red]")
+        return False
+
+    duration = time.time() - start
+    console.print(f"[bold green]Stress test completed in {duration:.1f} seconds[/bold green]")
+
+    # --- Report Generation (Optional) ---
+    if compare:
+        # Assuming benchmark_report.py exists and works with the generated reports
+        report_script = "benchmark_report.py" # Keep configurable if needed
+        report_cmd = ["python", report_script]
+        console.print(f"\n[bold white]Generating benchmark report: {' '.join(report_cmd)}[/bold white]")
+
+        # Run the report command and capture output
+        try:
+             report_proc = subprocess.run(report_cmd, capture_output=True, text=True, check=False, encoding='utf-8', errors='replace') # Use check=False to handle potential errors
+
+             # Print the captured output from benchmark_report.py
+             if report_proc.stdout:
+                 console.print("\n" + report_proc.stdout)
+             if report_proc.stderr:
+                 console.print("[yellow]Report generator stderr:[/yellow]\n" + report_proc.stderr)
+
+             if report_proc.returncode != 0:
+                 console.print(f"[bold yellow]Benchmark report generation script '{report_script}' failed with exit code {report_proc.returncode}[/bold yellow]")
+                 # Don't return False here, test itself succeeded
+             else:
+                  console.print(f"[bold green]Benchmark report script '{report_script}' completed.[/bold green]")
+
+             # Find and print clickable links to the reports
+             # Assuming reports are saved in 'benchmark_reports' by benchmark_report.py
+             report_dir = "benchmark_reports"
+             if os.path.isdir(report_dir):
+                 report_files = glob.glob(os.path.join(report_dir, "comparison_report_*.html"))
+                 if report_files:
+                     try:
+                         latest_report = max(report_files, key=os.path.getctime)
+                         report_path = os.path.abspath(latest_report)
+                         report_url = pathlib.Path(report_path).as_uri() # Better way to create file URI
+                         console.print(f"[bold cyan]Click to open report: [link={report_url}]{report_url}[/link][/bold cyan]")
+                     except Exception as e:
+                          console.print(f"[yellow]Could not determine latest report: {e}[/yellow]")
+
+                 chart_files = glob.glob(os.path.join(report_dir, "memory_chart_*.png"))
+                 if chart_files:
+                      try:
+                         latest_chart = max(chart_files, key=os.path.getctime)
+                         chart_path = os.path.abspath(latest_chart)
+                         chart_url = pathlib.Path(chart_path).as_uri()
+                         console.print(f"[cyan]Memory chart: [link={chart_url}]{chart_url}[/link][/cyan]")
+                      except Exception as e:
+                           console.print(f"[yellow]Could not determine latest chart: {e}[/yellow]")
+             else:
+                  console.print(f"[yellow]Benchmark report directory '{report_dir}' not found. Cannot link reports.[/yellow]")
+
+        except FileNotFoundError:
+             console.print(f"[bold red]Error: Report script '{report_script}' not found.[/bold red]")
+        except Exception as e:
+             console.print(f"[bold red]Error running report generation subprocess: {e}[/bold red]")
+
+
+    # Prompt to exit
+    console.print("\n[bold green]Benchmark run finished. Press Enter to exit.[/bold green]")
+    try:
+        input() # Wait for user input
+    except EOFError:
+        pass # Handle case where input is piped or unavailable
+
+    return True
+
+def main():
+    parser = argparse.ArgumentParser(description="Run a Crawl4AI SDK benchmark test and generate a report")
+
+    # --- Arguments ---
+    parser.add_argument("config", choices=list(TEST_CONFIGS) + ["custom"],
+                        help="Test configuration: quick, small, medium, large, extreme, or custom")
+
+    # Arguments for 'custom' config or to override presets
+    parser.add_argument("--urls", type=int, help="Number of URLs")
+    parser.add_argument("--max-sessions", type=int, help="Max concurrent sessions (replaces --workers)")
+    parser.add_argument("--chunk-size", type=int, help="URLs per batch (for non-stream logging)")
+    parser.add_argument("--port", type=int, help="HTTP server port")
+    parser.add_argument("--monitor-mode", type=str, choices=["DETAILED", "AGGREGATED"], help="Monitor display mode")
+
+    # Boolean flags / options
+    parser.add_argument("--stream", action="store_true", help="Enable streaming results (disables batch logging)")
+    parser.add_argument("--use-rate-limiter", action="store_true", help="Enable basic rate limiter")
+    parser.add_argument("--no-report", action="store_true", help="Skip generating comparison report")
+    parser.add_argument("--clean", action="store_true", help="Clean up reports and site before running")
+    parser.add_argument("--keep-server-alive", action="store_true", help="Keep HTTP server running after test")
+    parser.add_argument("--use-existing-site", action="store_true", help="Use existing site on specified port")
+    parser.add_argument("--skip-generation", action="store_true", help="Use existing site files without regenerating")
+    parser.add_argument("--keep-site", action="store_true", help="Keep generated site files after test")
+    # Removed url_level_logging as it's implicitly handled by stream/batch mode now
+
+    args = parser.parse_args()
+
+    custom_args = {}
+
+    # Populate custom_args from explicit command-line args
+    if args.urls is not None: custom_args["urls"] = args.urls
+    if args.max_sessions is not None: custom_args["max_sessions"] = args.max_sessions
+    if args.chunk_size is not None: custom_args["chunk_size"] = args.chunk_size
+    if args.port is not None: custom_args["port"] = args.port
+    if args.monitor_mode is not None: custom_args["monitor_mode"] = args.monitor_mode
+    if args.stream: custom_args["stream"] = True
+    if args.use_rate_limiter: custom_args["use_rate_limiter"] = True
+    if args.keep_server_alive: custom_args["keep_server_alive"] = True
+    if args.use_existing_site: custom_args["use_existing_site"] = True
+    if args.skip_generation: custom_args["skip_generation"] = True
+    if args.keep_site: custom_args["keep_site"] = True
+    # Clean flags are handled by the 'clean' argument passed to run_benchmark
+
+    # Validate custom config requirements
+    if args.config == "custom":
+        required_custom = ["urls", "max_sessions", "chunk_size"]
+        missing = [f"--{arg}" for arg in required_custom if arg not in custom_args]
+        if missing:
+            console.print(f"[bold red]Error: 'custom' config requires: {', '.join(missing)}[/bold red]")
+            return 1
+
+    success = run_benchmark(
+        config_name=args.config,
+        custom_args=custom_args, # Pass all collected custom args
+        compare=not args.no_report,
+        clean=args.clean
+    )
+    return 0 if success else 1
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/tests/memory/test_crawler_monitor.py
+++ b/tests/memory/test_crawler_monitor.py
@@ -0,0 +1,168 @@
+"""
+Test script for the CrawlerMonitor component.
+This script simulates a crawler with multiple tasks to demonstrate the real-time monitoring capabilities.
+"""
+
+import time
+import uuid
+import random
+import threading
+import sys
+import os
+
+# Add the parent directory to the path to import crawl4ai
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../..")))
+
+from crawl4ai.components.crawler_monitor import CrawlerMonitor
+from crawl4ai.models import CrawlStatus
+
+def simulate_crawler_task(monitor, task_id, url, simulate_failure=False):
+    """Simulate a crawler task with different states."""
+    # Task starts in the QUEUED state
+    wait_time = random.uniform(0.5, 3.0)
+    time.sleep(wait_time)
+    
+    # Update to IN_PROGRESS state
+    monitor.update_task(
+        task_id=task_id,
+        status=CrawlStatus.IN_PROGRESS,
+        start_time=time.time(),
+        wait_time=wait_time
+    )
+    
+    # Simulate task running
+    process_time = random.uniform(1.0, 5.0)
+    for i in range(int(process_time * 2)):
+        # Simulate memory usage changes
+        memory_usage = random.uniform(5.0, 25.0)
+        monitor.update_task(
+            task_id=task_id,
+            memory_usage=memory_usage,
+            peak_memory=max(memory_usage, monitor.get_task_stats(task_id).get("peak_memory", 0))
+        )
+        time.sleep(0.5)
+    
+    # Update to COMPLETED or FAILED state
+    if simulate_failure and random.random() < 0.8:  # 80% chance of failure if simulate_failure is True
+        monitor.update_task(
+            task_id=task_id,
+            status=CrawlStatus.FAILED,
+            end_time=time.time(),
+            error_message="Simulated failure: Connection timeout",
+            memory_usage=0.0
+        )
+    else:
+        monitor.update_task(
+            task_id=task_id,
+            status=CrawlStatus.COMPLETED,
+            end_time=time.time(),
+            memory_usage=0.0
+        )
+
+def update_queue_stats(monitor, num_queued_tasks):
+    """Update queue statistics periodically."""
+    while monitor.is_running:
+        queued_tasks = [
+            task for task_id, task in monitor.get_all_task_stats().items()
+            if task["status"] == CrawlStatus.QUEUED.name
+        ]
+        
+        total_queued = len(queued_tasks)
+        
+        if total_queued > 0:
+            current_time = time.time()
+            wait_times = [
+                current_time - task.get("enqueue_time", current_time)
+                for task in queued_tasks
+            ]
+            highest_wait_time = max(wait_times) if wait_times else 0.0
+            avg_wait_time = sum(wait_times) / len(wait_times) if wait_times else 0.0
+        else:
+            highest_wait_time = 0.0
+            avg_wait_time = 0.0
+        
+        monitor.update_queue_statistics(
+            total_queued=total_queued,
+            highest_wait_time=highest_wait_time,
+            avg_wait_time=avg_wait_time
+        )
+        
+        # Simulate memory pressure based on number of active tasks
+        active_tasks = len([
+            task for task_id, task in monitor.get_all_task_stats().items()
+            if task["status"] == CrawlStatus.IN_PROGRESS.name
+        ])
+        
+        if active_tasks > 8:
+            monitor.update_memory_status("CRITICAL")
+        elif active_tasks > 4:
+            monitor.update_memory_status("PRESSURE")
+        else:
+            monitor.update_memory_status("NORMAL")
+            
+        time.sleep(1.0)
+
+def test_crawler_monitor():
+    """Test the CrawlerMonitor with simulated crawler tasks."""
+    # Total number of URLs to crawl
+    total_urls = 50
+    
+    # Initialize the monitor
+    monitor = CrawlerMonitor(urls_total=total_urls, refresh_rate=0.5)
+    
+    # Start the monitor
+    monitor.start()
+    
+    # Start thread to update queue statistics
+    queue_stats_thread = threading.Thread(target=update_queue_stats, args=(monitor, total_urls))
+    queue_stats_thread.daemon = True
+    queue_stats_thread.start()
+    
+    try:
+        # Create task threads
+        threads = []
+        for i in range(total_urls):
+            task_id = str(uuid.uuid4())
+            url = f"https://example.com/page{i}"
+            
+            # Add task to monitor
+            monitor.add_task(task_id, url)
+            
+            # Determine if this task should simulate failure
+            simulate_failure = (i % 10 == 0)  # Every 10th task
+            
+            # Create and start thread for this task
+            thread = threading.Thread(
+                target=simulate_crawler_task,
+                args=(monitor, task_id, url, simulate_failure)
+            )
+            thread.daemon = True
+            threads.append(thread)
+        
+        # Start threads with delay to simulate tasks being added over time
+        batch_size = 5
+        for i in range(0, len(threads), batch_size):
+            batch = threads[i:i+batch_size]
+            for thread in batch:
+                thread.start()
+                time.sleep(0.5)  # Small delay between starting threads
+            
+            # Wait a bit before starting the next batch
+            time.sleep(2.0)
+        
+        # Wait for all threads to complete
+        for thread in threads:
+            thread.join()
+            
+        # Keep monitor running a bit longer to see the final state
+        time.sleep(5.0)
+        
+    except KeyboardInterrupt:
+        print("\nTest interrupted by user")
+    finally:
+        # Stop the monitor
+        monitor.stop()
+        print("\nCrawler monitor test completed")
+
+if __name__ == "__main__":
+    test_crawler_monitor()
--- a/tests/memory/test_dispatcher_stress.py
+++ b/tests/memory/test_dispatcher_stress.py
@@ -0,0 +1,410 @@
+import asyncio
+import time
+import psutil
+import logging
+import random
+from typing import List, Dict
+import uuid
+import sys
+import os
+
+# Import your crawler components
+from crawl4ai.models import DisplayMode, CrawlStatus, CrawlResult
+from crawl4ai.async_configs import CrawlerRunConfig, BrowserConfig, CacheMode
+from crawl4ai import AsyncWebCrawler
+from crawl4ai import MemoryAdaptiveDispatcher, CrawlerMonitor
+
+# Global configuration
+STREAM = False  # Toggle between streaming and non-streaming modes
+
+# Configure logging to file only (to avoid breaking the rich display)
+os.makedirs("logs", exist_ok=True)
+file_handler = logging.FileHandler("logs/memory_stress_test.log")
+file_handler.setFormatter(logging.Formatter('%(asctime)s [%(levelname)s] %(message)s'))
+
+# Root logger - only to file, not console
+root_logger = logging.getLogger()
+root_logger.setLevel(logging.INFO)
+root_logger.addHandler(file_handler)
+
+# Our test logger also writes to file only
+logger = logging.getLogger("memory_stress_test")
+logger.setLevel(logging.INFO)
+logger.addHandler(file_handler)
+logger.propagate = False  # Don't propagate to root logger
+
+# Create a memory restrictor to simulate limited memory environment
+class MemorySimulator:
+    def __init__(self, target_percent: float = 85.0, aggressive: bool = False):
+        """Simulates memory pressure by allocating memory"""
+        self.target_percent = target_percent
+        self.memory_blocks: List[bytearray] = []
+        self.aggressive = aggressive
+        
+    def apply_pressure(self, additional_percent: float = 0.0):
+        """Fill memory until we reach target percentage"""
+        current_percent = psutil.virtual_memory().percent
+        target = self.target_percent + additional_percent
+        
+        if current_percent >= target:
+            return  # Already at target
+            
+        logger.info(f"Current memory: {current_percent}%, target: {target}%")
+        
+        # Calculate how much memory we need to allocate
+        total_memory = psutil.virtual_memory().total
+        target_usage = (target / 100.0) * total_memory
+        current_usage = (current_percent / 100.0) * total_memory
+        bytes_to_allocate = int(target_usage - current_usage)
+        
+        if bytes_to_allocate <= 0:
+            return
+            
+        # Allocate in smaller chunks to avoid overallocation
+        if self.aggressive:
+            # Use larger chunks for faster allocation in aggressive mode
+            chunk_size = min(bytes_to_allocate, 200 * 1024 * 1024)  # 200MB chunks
+        else:
+            chunk_size = min(bytes_to_allocate, 50 * 1024 * 1024)   # 50MB chunks
+        
+        try:
+            logger.info(f"Allocating {chunk_size / (1024 * 1024):.1f}MB to reach target memory usage")
+            self.memory_blocks.append(bytearray(chunk_size))
+            time.sleep(0.5)  # Give system time to register the allocation
+        except MemoryError:
+            logger.warning("Unable to allocate more memory")
+            
+    def release_pressure(self, percent: float = None):
+        """
+        Release allocated memory
+        If percent is specified, release that percentage of blocks
+        """
+        if not self.memory_blocks:
+            return
+            
+        if percent is None:
+            # Release all
+            logger.info(f"Releasing all {len(self.memory_blocks)} memory blocks")
+            self.memory_blocks.clear()
+        else:
+            # Release specified percentage
+            blocks_to_release = int(len(self.memory_blocks) * (percent / 100.0))
+            if blocks_to_release > 0:
+                logger.info(f"Releasing {blocks_to_release} of {len(self.memory_blocks)} memory blocks ({percent}%)")
+                self.memory_blocks = self.memory_blocks[blocks_to_release:]
+                
+    def spike_pressure(self, duration: float = 5.0):
+        """
+        Create a temporary spike in memory pressure then release
+        Useful for forcing requeues
+        """
+        logger.info(f"Creating memory pressure spike for {duration} seconds")
+        # Save current blocks count
+        initial_blocks = len(self.memory_blocks)
+        
+        # Create spike with extra 5%
+        self.apply_pressure(additional_percent=5.0)
+        
+        # Schedule release after duration
+        asyncio.create_task(self._delayed_release(duration, initial_blocks))
+        
+    async def _delayed_release(self, delay: float, target_blocks: int):
+        """Helper for spike_pressure - releases extra blocks after delay"""
+        await asyncio.sleep(delay)
+        
+        # Remove blocks added since spike started
+        if len(self.memory_blocks) > target_blocks:
+            logger.info(f"Releasing memory spike ({len(self.memory_blocks) - target_blocks} blocks)")
+            self.memory_blocks = self.memory_blocks[:target_blocks]
+            
+# Test statistics collector
+class TestResults:
+    def __init__(self):
+        self.start_time = time.time()
+        self.completed_urls: List[str] = []
+        self.failed_urls: List[str] = []
+        self.requeued_count = 0
+        self.memory_warnings = 0
+        self.max_memory_usage = 0.0
+        self.max_queue_size = 0
+        self.max_wait_time = 0.0
+        self.url_to_attempt: Dict[str, int] = {}  # Track retries per URL
+        
+    def log_summary(self):
+        duration = time.time() - self.start_time
+        logger.info("===== TEST SUMMARY =====")
+        logger.info(f"Stream mode: {'ON' if STREAM else 'OFF'}")
+        logger.info(f"Total duration: {duration:.1f} seconds")
+        logger.info(f"Completed URLs: {len(self.completed_urls)}")
+        logger.info(f"Failed URLs: {len(self.failed_urls)}")
+        logger.info(f"Requeue events: {self.requeued_count}")
+        logger.info(f"Memory warnings: {self.memory_warnings}")
+        logger.info(f"Max memory usage: {self.max_memory_usage:.1f}%")
+        logger.info(f"Max queue size: {self.max_queue_size}")
+        logger.info(f"Max wait time: {self.max_wait_time:.1f} seconds")
+        
+        # Log URLs with multiple attempts
+        retried_urls = {url: count for url, count in self.url_to_attempt.items() if count > 1}
+        if retried_urls:
+            logger.info(f"URLs with retries: {len(retried_urls)}")
+            # Log the top 5 most retried
+            top_retries = sorted(retried_urls.items(), key=lambda x: x[1], reverse=True)[:5]
+            for url, count in top_retries:
+                logger.info(f"  URL {url[-30:]} had {count} attempts")
+        
+        # Write summary to a separate human-readable file
+        with open("logs/test_summary.txt", "w") as f:
+            f.write(f"Stream mode: {'ON' if STREAM else 'OFF'}\n")
+            f.write(f"Total duration: {duration:.1f} seconds\n")
+            f.write(f"Completed URLs: {len(self.completed_urls)}\n")
+            f.write(f"Failed URLs: {len(self.failed_urls)}\n")
+            f.write(f"Requeue events: {self.requeued_count}\n")
+            f.write(f"Memory warnings: {self.memory_warnings}\n")
+            f.write(f"Max memory usage: {self.max_memory_usage:.1f}%\n")
+            f.write(f"Max queue size: {self.max_queue_size}\n")
+            f.write(f"Max wait time: {self.max_wait_time:.1f} seconds\n")
+        
+# Custom monitor with stats tracking
+# Custom monitor that extends CrawlerMonitor with test-specific tracking
+class StressTestMonitor(CrawlerMonitor):
+    def __init__(self, test_results: TestResults, **kwargs):
+        # Initialize the parent CrawlerMonitor
+        super().__init__(**kwargs)
+        self.test_results = test_results
+        
+    def update_memory_status(self, status: str):
+        if status != self.memory_status:
+            logger.info(f"Memory status changed: {self.memory_status} -> {status}")
+            if "CRITICAL" in status or "PRESSURE" in status:
+                self.test_results.memory_warnings += 1
+                
+        # Track peak memory usage in test results
+        current_memory = psutil.virtual_memory().percent
+        self.test_results.max_memory_usage = max(self.test_results.max_memory_usage, current_memory)
+        
+        # Call parent method to update the dashboard
+        super().update_memory_status(status)
+        
+    def update_queue_statistics(self, total_queued: int, highest_wait_time: float, avg_wait_time: float):
+        # Track queue metrics in test results
+        self.test_results.max_queue_size = max(self.test_results.max_queue_size, total_queued)
+        self.test_results.max_wait_time = max(self.test_results.max_wait_time, highest_wait_time)
+        
+        # Call parent method to update the dashboard
+        super().update_queue_statistics(total_queued, highest_wait_time, avg_wait_time)
+        
+    def update_task(self, task_id: str, **kwargs):
+        # Track URL status changes for test results
+        if task_id in self.stats:
+            old_status = self.stats[task_id].status
+            
+            # If this is a requeue event (requeued due to memory pressure)
+            if 'error_message' in kwargs and 'requeued' in kwargs['error_message']:
+                if not hasattr(self.stats[task_id], 'counted_requeue') or not self.stats[task_id].counted_requeue:
+                    self.test_results.requeued_count += 1
+                    self.stats[task_id].counted_requeue = True
+                    
+            # Track completion status for test results
+            if 'status' in kwargs:
+                new_status = kwargs['status']
+                if old_status != new_status:
+                    if new_status == CrawlStatus.COMPLETED:
+                        if task_id not in self.test_results.completed_urls:
+                            self.test_results.completed_urls.append(task_id)
+                    elif new_status == CrawlStatus.FAILED:
+                        if task_id not in self.test_results.failed_urls:
+                            self.test_results.failed_urls.append(task_id)
+        
+        # Call parent method to update the dashboard
+        super().update_task(task_id, **kwargs)
+        self.live.update(self._create_table())
+
+# Generate test URLs - use example.com with unique paths to avoid browser caching
+def generate_test_urls(count: int) -> List[str]:
+    urls = []
+    for i in range(count):
+        # Add random path and query parameters to create unique URLs
+        path = f"/path/{uuid.uuid4()}"
+        query = f"?test={i}&random={random.randint(1, 100000)}"
+        urls.append(f"https://example.com{path}{query}")
+    return urls
+
+# Process result callback
+async def process_result(result, test_results: TestResults):
+    # Track attempt counts
+    if result.url not in test_results.url_to_attempt:
+        test_results.url_to_attempt[result.url] = 1
+    else:
+        test_results.url_to_attempt[result.url] += 1
+    
+    if "requeued" in result.error_message:
+        test_results.requeued_count += 1
+        logger.debug(f"Requeued due to memory pressure: {result.url}")
+    elif result.success:
+        test_results.completed_urls.append(result.url)
+        logger.debug(f"Successfully processed: {result.url}")
+    else:
+        test_results.failed_urls.append(result.url)
+        logger.warning(f"Failed to process: {result.url} - {result.error_message}")
+
+# Process multiple results (used in non-streaming mode)
+async def process_results(results, test_results: TestResults):
+    for result in results:
+        await process_result(result, test_results)
+
+# Main test function for extreme memory pressure simulation
+async def run_memory_stress_test(
+    url_count: int = 100,
+    target_memory_percent: float = 92.0,  # Push to dangerous levels
+    chunk_size: int = 20,  # Larger chunks for more chaos
+    aggressive: bool = False,
+    spikes: bool = True
+):
+    test_results = TestResults()
+    memory_simulator = MemorySimulator(target_percent=target_memory_percent, aggressive=aggressive)
+    
+    logger.info(f"Starting stress test with {url_count} URLs in {'STREAM' if STREAM else 'NON-STREAM'} mode")
+    logger.info(f"Target memory usage: {target_memory_percent}%")
+    
+    # First, elevate memory usage to create pressure
+    logger.info("Creating initial memory pressure...")
+    memory_simulator.apply_pressure()
+    
+    # Create test URLs in chunks to simulate real-world crawling where URLs are discovered
+    all_urls = generate_test_urls(url_count)
+    url_chunks = [all_urls[i:i+chunk_size] for i in range(0, len(all_urls), chunk_size)]
+    
+    # Set up the crawler components - low memory thresholds to create more requeues
+    browser_config = BrowserConfig(headless=True, verbose=False)
+    run_config = CrawlerRunConfig(
+        cache_mode=CacheMode.BYPASS,
+        verbose=False,
+        stream=STREAM  # Use the global STREAM variable to set mode
+    )
+    
+    # Create monitor with reference to test results
+    monitor = StressTestMonitor(
+        test_results=test_results,
+        display_mode=DisplayMode.DETAILED,
+        max_visible_rows=20,
+        total_urls=url_count  # Pass total URLs count
+    )
+    
+    # Create dispatcher with EXTREME settings - pure survival mode
+    # These settings are designed to create a memory battleground
+    dispatcher = MemoryAdaptiveDispatcher(
+        memory_threshold_percent=63.0,  # Start throttling at just 60% memory
+        critical_threshold_percent=70.0,  # Start requeuing at 70% - incredibly aggressive  
+        recovery_threshold_percent=55.0,  # Only resume normal ops when plenty of memory available
+        check_interval=0.1,  # Check extremely frequently (100ms)
+        max_session_permit=20 if aggressive else 10,  # Double the concurrent sessions - pure chaos
+        fairness_timeout=10.0,  # Extremely low timeout - rapid priority changes
+        monitor=monitor
+    )
+    
+    # Set up spike schedule if enabled
+    if spikes:
+        spike_intervals = []
+        # Create 3-5 random spike times
+        num_spikes = random.randint(3, 5)
+        for _ in range(num_spikes):
+            # Schedule spikes at random chunks
+            chunk_index = random.randint(1, len(url_chunks) - 1)
+            spike_intervals.append(chunk_index)
+        logger.info(f"Scheduled memory spikes at chunks: {spike_intervals}")
+    
+    try:
+        async with AsyncWebCrawler(config=browser_config) as crawler:
+            # Process URLs in chunks to simulate discovering URLs over time
+            for chunk_index, url_chunk in enumerate(url_chunks):
+                logger.info(f"Processing chunk {chunk_index+1}/{len(url_chunks)} ({len(url_chunk)} URLs)")
+                
+                # Regular pressure increases
+                if chunk_index % 2 == 0:
+                    logger.info("Increasing memory pressure...")
+                    memory_simulator.apply_pressure()
+                
+                # Memory spike if scheduled for this chunk
+                if spikes and chunk_index in spike_intervals:
+                    logger.info(f"⚠️ CREATING MASSIVE MEMORY SPIKE at chunk {chunk_index+1} ⚠️")
+                    # Create a nightmare scenario - multiple overlapping spikes
+                    memory_simulator.spike_pressure(duration=10.0)  # 10-second spike
+                    
+                    # 50% chance of double-spike (pure evil)
+                    if random.random() < 0.5:
+                        await asyncio.sleep(2.0)  # Wait 2 seconds
+                        logger.info("💀 DOUBLE SPIKE - EXTREME MEMORY PRESSURE 💀")
+                        memory_simulator.spike_pressure(duration=8.0)  # 8-second overlapping spike
+                
+                if STREAM:
+                    # Stream mode - process results as they come in
+                    async for result in dispatcher.run_urls_stream(
+                        urls=url_chunk,
+                        crawler=crawler,
+                        config=run_config
+                    ):
+                        await process_result(result, test_results)
+                else:
+                    # Non-stream mode - get all results at once
+                    results = await dispatcher.run_urls(
+                        urls=url_chunk,
+                        crawler=crawler,
+                        config=run_config
+                    )
+                    await process_results(results, test_results)
+                    
+                # Simulate discovering more URLs while others are still processing
+                await asyncio.sleep(1)
+                
+                # RARELY release pressure - make the system fight for resources
+                if chunk_index % 5 == 4:  # Less frequent releases
+                    release_percent = random.choice([10, 15, 20])  # Smaller, inconsistent releases
+                    logger.info(f"Releasing {release_percent}% of memory blocks - brief respite")
+                    memory_simulator.release_pressure(percent=release_percent)
+    
+    except Exception as e:
+        logger.error(f"Test error: {str(e)}")
+        raise
+    finally:
+        # Release memory pressure
+        memory_simulator.release_pressure()
+        # Log final results
+        test_results.log_summary()
+        
+        # Check for success criteria
+        if len(test_results.completed_urls) + len(test_results.failed_urls) < url_count:
+            logger.error(f"TEST FAILED: Not all URLs were processed. {url_count - len(test_results.completed_urls) - len(test_results.failed_urls)} URLs missing.")
+            return False
+            
+        logger.info("TEST PASSED: All URLs were processed without crashing.")
+        return True
+
+# Command-line entry point
+if __name__ == "__main__":
+    # Parse command line arguments
+    url_count = int(sys.argv[1]) if len(sys.argv) > 1 else 100
+    target_memory = float(sys.argv[2]) if len(sys.argv) > 2 else 85.0
+    
+    # Check if stream mode is specified
+    if len(sys.argv) > 3:
+        STREAM = sys.argv[3].lower() in ('true', 'yes', '1', 'stream')
+    
+    # Check if aggressive mode is specified
+    aggressive = False
+    if len(sys.argv) > 4:
+        aggressive = sys.argv[4].lower() in ('true', 'yes', '1', 'aggressive')
+    
+    print(f"Starting test with {url_count} URLs, {target_memory}% memory target")
+    print(f"Stream mode: {STREAM}, Aggressive: {aggressive}")
+    print("Logs will be written to the logs directory")
+    print("Live display starting now...")
+    
+    # Run the test
+    result = asyncio.run(run_memory_stress_test(
+        url_count=url_count, 
+        target_memory_percent=target_memory,
+        aggressive=aggressive
+    ))
+    
+    # Exit with status code
+    sys.exit(0 if result else 1)
--- a/tests/memory/test_docker_config_gen.py
+++ b/tests/memory/test_docker_config_gen.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python3
+"""
+Quick sanity‑check for /config/dump endpoint.
+
+Usage:
+    python test_config_dump.py  [http://localhost:8020]
+
+If the server isn’t running, start it first:
+    uvicorn deploy.docker.server:app --port 8020
+"""
+
+import sys, json, textwrap, requests
+
+# BASE = sys.argv[1] if len(sys.argv) > 1 else "http://localhost:8020"
+BASE = sys.argv[1] if len(sys.argv) > 1 else "http://localhost:11235"
+URL  = f"{BASE.rstrip('/')}/config/dump"
+
+CASES = [
+    # --- CrawlRunConfig variants ---
+    "CrawlerRunConfig()",
+    "CrawlerRunConfig(stream=True, cache_mode=CacheMode.BYPASS)",
+    "CrawlerRunConfig(js_only=True, wait_until='networkidle')",
+
+    # --- BrowserConfig variants ---
+    "BrowserConfig()",
+    "BrowserConfig(headless=False, extra_args=['--disable-gpu'])",
+    "BrowserConfig(browser_mode='builtin', proxy='http://1.2.3.4:8080')",
+]
+
+for code in CASES:
+    print("\n===  POST:", code)
+    resp = requests.post(URL, json={"code": code}, timeout=15)
+    if resp.ok:
+        print(json.dumps(resp.json(), indent=2)[:400] + "...")
+    else:
+        print("ERROR", resp.status_code, resp.text[:200])
--- a/tests/memory/test_stress_api.py
+++ b/tests/memory/test_stress_api.py
@@ -0,0 +1,520 @@
+#!/usr/bin/env python3
+"""
+Stress test for Crawl4AI's Docker API server (/crawl and /crawl/stream endpoints).
+
+This version targets a running Crawl4AI API server, sending concurrent requests
+to test its ability to handle multiple crawl jobs simultaneously.
+It uses httpx for async HTTP requests and logs results per batch of requests,
+including server-side memory usage reported by the API.
+"""
+
+import asyncio
+import time
+import uuid
+import argparse
+import json
+import sys
+import os
+import shutil
+from typing import List, Dict, Optional, Union, AsyncGenerator, Tuple
+import httpx
+import pathlib # Import pathlib explicitly
+from rich.console import Console
+from rich.panel import Panel
+from rich.syntax import Syntax
+
+# --- Constants ---
+DEFAULT_API_URL = "http://localhost:11235" # Default port
+DEFAULT_API_URL = "http://localhost:8020" # Default port
+DEFAULT_URL_COUNT = 100
+DEFAULT_MAX_CONCURRENT_REQUESTS = 1
+DEFAULT_CHUNK_SIZE = 10
+DEFAULT_REPORT_PATH = "reports_api"
+DEFAULT_STREAM_MODE = True
+REQUEST_TIMEOUT = 180.0
+
+# Initialize Rich console
+console = Console()
+
+# --- API Health Check (Unchanged) ---
+async def check_server_health(client: httpx.AsyncClient, health_endpoint: str = "/health"):
+    """Check if the API server is healthy."""
+    console.print(f"[bold cyan]Checking API server health at {client.base_url}{health_endpoint}...[/]", end="")
+    try:
+        response = await client.get(health_endpoint, timeout=10.0)
+        response.raise_for_status()
+        health_data = response.json()
+        version = health_data.get('version', 'N/A')
+        console.print(f"[bold green] Server OK! Version: {version}[/]")
+        return True
+    except (httpx.RequestError, httpx.HTTPStatusError) as e:
+        console.print(f"\n[bold red]Server health check FAILED:[/]")
+        console.print(f"Error: {e}")
+        console.print(f"Is the server running and accessible at {client.base_url}?")
+        return False
+    except Exception as e:
+        console.print(f"\n[bold red]An unexpected error occurred during health check:[/]")
+        console.print(e)
+        return False
+
+# --- API Stress Test Class ---
+class ApiStressTest:
+    """Orchestrates the stress test by sending concurrent requests to the API."""
+
+    def __init__(
+        self,
+        api_url: str,
+        url_count: int,
+        max_concurrent_requests: int,
+        chunk_size: int,
+        report_path: str,
+        stream_mode: bool,
+    ):
+        self.api_base_url = api_url.rstrip('/')
+        self.url_count = url_count
+        self.max_concurrent_requests = max_concurrent_requests
+        self.chunk_size = chunk_size
+        self.report_path = pathlib.Path(report_path)
+        self.report_path.mkdir(parents=True, exist_ok=True)
+        self.stream_mode = stream_mode
+        
+        # Ignore repo path and set it to current file path
+        self.repo_path = pathlib.Path(__file__).parent.resolve()
+
+
+        self.test_id = time.strftime("%Y%m%d_%H%M%S")
+        self.results_summary = {
+            "test_id": self.test_id, "api_url": api_url, "url_count": url_count,
+            "max_concurrent_requests": max_concurrent_requests, "chunk_size": chunk_size,
+            "stream_mode": stream_mode, "start_time": "", "end_time": "",
+            "total_time_seconds": 0, "successful_requests": 0, "failed_requests": 0,
+            "successful_urls": 0, "failed_urls": 0, "total_urls_processed": 0,
+            "total_api_calls": 0,
+            "server_memory_metrics": { # To store aggregated server memory info
+                 "batch_mode_avg_delta_mb": None,
+                 "batch_mode_max_delta_mb": None,
+                 "stream_mode_avg_max_snapshot_mb": None,
+                 "stream_mode_max_max_snapshot_mb": None,
+                 "samples": [] # Store individual request memory results
+             }
+        }
+        self.http_client = httpx.AsyncClient(base_url=self.api_base_url, timeout=REQUEST_TIMEOUT, limits=httpx.Limits(max_connections=max_concurrent_requests + 5, max_keepalive_connections=max_concurrent_requests))
+
+    async def close_client(self):
+        """Close the httpx client."""
+        await self.http_client.aclose()
+
+    async def run(self) -> Dict:
+        """Run the API stress test."""
+        # No client memory tracker needed
+        urls_to_process = [f"https://httpbin.org/anything/{uuid.uuid4()}" for _ in range(self.url_count)]
+        url_chunks = [urls_to_process[i:i+self.chunk_size] for i in range(0, len(urls_to_process), self.chunk_size)]
+
+        self.results_summary["start_time"] = time.strftime("%Y-%m-%d %H:%M:%S")
+        start_time = time.time()
+
+        console.print(f"\n[bold cyan]Crawl4AI API Stress Test - {self.url_count} URLs, {self.max_concurrent_requests} concurrent requests[/bold cyan]")
+        console.print(f"[bold cyan]Target API:[/bold cyan] {self.api_base_url}, [bold cyan]Mode:[/bold cyan] {'Streaming' if self.stream_mode else 'Batch'}, [bold cyan]URLs per Request:[/bold cyan] {self.chunk_size}")
+        # Removed client memory log
+
+        semaphore = asyncio.Semaphore(self.max_concurrent_requests)
+
+        # Updated Batch logging header
+        console.print("\n[bold]API Request Batch Progress:[/bold]")
+        # Adjusted spacing and added Peak
+        console.print("[bold] Batch | Progress | SrvMem Peak / Δ|Max (MB) | Reqs/sec | S/F URLs | Time (s) | Status  [/bold]")
+        # Adjust separator length if needed, looks okay for now
+        console.print("─" * 95) 
+
+        # No client memory monitor task needed
+
+        tasks = []
+        total_api_calls = len(url_chunks)
+        self.results_summary["total_api_calls"] = total_api_calls
+
+        try:
+            for i, chunk in enumerate(url_chunks):
+                task = asyncio.create_task(self._make_api_request(
+                    chunk=chunk,
+                    batch_idx=i + 1,
+                    total_batches=total_api_calls,
+                    semaphore=semaphore
+                    # No memory tracker passed
+                ))
+                tasks.append(task)
+
+            api_results = await asyncio.gather(*tasks)
+
+            # Process aggregated results including server memory
+            total_successful_requests = sum(1 for r in api_results if r['request_success'])
+            total_failed_requests = total_api_calls - total_successful_requests
+            total_successful_urls = sum(r['success_urls'] for r in api_results)
+            total_failed_urls = sum(r['failed_urls'] for r in api_results)
+            total_urls_processed = total_successful_urls + total_failed_urls
+
+            # Aggregate server memory metrics
+            valid_samples = [r for r in api_results if r.get('server_delta_or_max_mb') is not None] # Filter results with valid mem data
+            self.results_summary["server_memory_metrics"]["samples"] = valid_samples # Store raw samples with both peak and delta/max
+
+            if valid_samples:
+                 delta_or_max_values = [r['server_delta_or_max_mb'] for r in valid_samples]
+                 if self.stream_mode:
+                     # Stream mode: delta_or_max holds max snapshot
+                     self.results_summary["server_memory_metrics"]["stream_mode_avg_max_snapshot_mb"] = sum(delta_or_max_values) / len(delta_or_max_values)
+                     self.results_summary["server_memory_metrics"]["stream_mode_max_max_snapshot_mb"] = max(delta_or_max_values)
+                 else: # Batch mode
+                     # delta_or_max holds delta
+                     self.results_summary["server_memory_metrics"]["batch_mode_avg_delta_mb"] = sum(delta_or_max_values) / len(delta_or_max_values)
+                     self.results_summary["server_memory_metrics"]["batch_mode_max_delta_mb"] = max(delta_or_max_values)
+
+                     # Aggregate peak values for batch mode
+                     peak_values = [r['server_peak_memory_mb'] for r in valid_samples if r.get('server_peak_memory_mb') is not None]
+                     if peak_values:
+                          self.results_summary["server_memory_metrics"]["batch_mode_avg_peak_mb"] = sum(peak_values) / len(peak_values)
+                          self.results_summary["server_memory_metrics"]["batch_mode_max_peak_mb"] = max(peak_values)
+
+
+            self.results_summary.update({
+                "successful_requests": total_successful_requests,
+                "failed_requests": total_failed_requests,
+                "successful_urls": total_successful_urls,
+                "failed_urls": total_failed_urls,
+                "total_urls_processed": total_urls_processed,
+            })
+
+        except Exception as e:
+             console.print(f"[bold red]An error occurred during task execution: {e}[/bold red]")
+             import traceback
+             traceback.print_exc()
+        # No finally block needed for monitor task
+
+        end_time = time.time()
+        self.results_summary.update({
+            "end_time": time.strftime("%Y-%m-%d %H:%M:%S"),
+            "total_time_seconds": end_time - start_time,
+            # No client memory report
+        })
+        self._save_results()
+        return self.results_summary
+
+    async def _make_api_request(
+        self,
+        chunk: List[str],
+        batch_idx: int,
+        total_batches: int,
+        semaphore: asyncio.Semaphore
+        # No memory tracker
+    ) -> Dict:
+        """Makes a single API request for a chunk of URLs, handling concurrency and logging server memory."""
+        request_success = False
+        success_urls = 0
+        failed_urls = 0
+        status = "Pending"
+        status_color = "grey"
+        server_memory_metric = None # Store delta (batch) or max snapshot (stream)
+        api_call_start_time = time.time()
+
+        async with semaphore:
+            try:
+                # No client memory sampling
+
+                endpoint = "/crawl/stream" if self.stream_mode else "/crawl"
+                payload = {
+                    "urls": chunk,
+                    "browser_config": {"type": "BrowserConfig", "params": {"headless": True}},
+                    "crawler_config": {
+                        "type": "CrawlerRunConfig",
+                        "params": {"cache_mode": "BYPASS", "stream": self.stream_mode}
+                    }
+                }
+
+                if self.stream_mode:
+                    max_server_mem_snapshot = 0.0 # Track max memory seen in this stream
+                    async with self.http_client.stream("POST", endpoint, json=payload) as response:
+                        initial_status_code = response.status_code
+                        response.raise_for_status()
+
+                        completed_marker_received = False
+                        async for line in response.aiter_lines():
+                            if line:
+                                try:
+                                    data = json.loads(line)
+                                    if data.get("status") == "completed":
+                                        completed_marker_received = True
+                                        break
+                                    elif data.get("url"):
+                                        if data.get("success"): success_urls += 1
+                                        else: failed_urls += 1
+                                        # Extract server memory snapshot per result
+                                        mem_snapshot = data.get('server_memory_mb')
+                                        if mem_snapshot is not None:
+                                            max_server_mem_snapshot = max(max_server_mem_snapshot, float(mem_snapshot))
+                                except json.JSONDecodeError:
+                                    console.print(f"[Batch {batch_idx}] [red]Stream decode error for line:[/red] {line}")
+                                    failed_urls = len(chunk)
+                                    break
+                        request_success = completed_marker_received
+                        if not request_success:
+                             failed_urls = len(chunk) - success_urls
+                        server_memory_metric = max_server_mem_snapshot # Use max snapshot for stream logging
+
+                else: # Batch mode
+                    response = await self.http_client.post(endpoint, json=payload)
+                    response.raise_for_status()
+                    data = response.json()
+
+                    # Extract server memory delta from the response
+                    server_memory_metric = data.get('server_memory_delta_mb')
+                    server_peak_mem_mb = data.get('server_peak_memory_mb') 
+
+                    if data.get("success") and "results" in data:
+                        request_success = True
+                        results_list = data.get("results", [])
+                        for result_item in results_list:
+                            if result_item.get("success"): success_urls += 1
+                            else: failed_urls += 1
+                        if len(results_list) != len(chunk):
+                             console.print(f"[Batch {batch_idx}] [yellow]Warning: Result count ({len(results_list)}) doesn't match URL count ({len(chunk)})[/yellow]")
+                             failed_urls = len(chunk) - success_urls
+                    else:
+                        request_success = False
+                        failed_urls = len(chunk)
+                        # Try to get memory from error detail if available
+                        detail = data.get('detail')
+                        if isinstance(detail, str):
+                            try: detail_json = json.loads(detail)
+                            except: detail_json = {}
+                        elif isinstance(detail, dict):
+                            detail_json = detail
+                        else: detail_json = {}
+                        server_peak_mem_mb = detail_json.get('server_peak_memory_mb', None)
+                        server_memory_metric = detail_json.get('server_memory_delta_mb', None)
+                        console.print(f"[Batch {batch_idx}] [red]API request failed:[/red] {detail_json.get('error', 'No details')}")
+
+
+            except httpx.HTTPStatusError as e:
+                request_success = False
+                failed_urls = len(chunk)
+                console.print(f"[Batch {batch_idx}] [bold red]HTTP Error {e.response.status_code}:[/] {e.request.url}")
+                try:
+                    error_detail = e.response.json()
+                    # Attempt to extract memory info even from error responses
+                    detail_content = error_detail.get('detail', {})
+                    if isinstance(detail_content, str): # Handle if detail is stringified JSON
+                         try: detail_content = json.loads(detail_content)
+                         except: detail_content = {}
+                    server_memory_metric = detail_content.get('server_memory_delta_mb', None)
+                    server_peak_mem_mb = detail_content.get('server_peak_memory_mb', None)
+                    console.print(f"Response: {error_detail}")
+                except Exception:
+                     console.print(f"Response Text: {e.response.text[:200]}...")
+            except httpx.RequestError as e:
+                request_success = False
+                failed_urls = len(chunk)
+                console.print(f"[Batch {batch_idx}] [bold red]Request Error:[/bold] {e.request.url} - {e}")
+            except Exception as e:
+                request_success = False
+                failed_urls = len(chunk)
+                console.print(f"[Batch {batch_idx}] [bold red]Unexpected Error:[/bold] {e}")
+                import traceback
+                traceback.print_exc()
+
+            finally:
+                api_call_time = time.time() - api_call_start_time
+                total_processed_urls = success_urls + failed_urls
+
+                if request_success and failed_urls == 0: status_color, status = "green", "Success"
+                elif request_success and success_urls > 0: status_color, status = "yellow", "Partial"
+                else: status_color, status = "red", "Failed"
+
+                current_total_urls = batch_idx * self.chunk_size
+                progress_pct = min(100.0, (current_total_urls / self.url_count) * 100)
+                reqs_per_sec = 1.0 / api_call_time if api_call_time > 0 else float('inf')
+
+                # --- New Memory Formatting ---
+                mem_display = " N/A " # Default
+                peak_mem_value = None
+                delta_or_max_value = None
+
+                if self.stream_mode:
+                    # server_memory_metric holds max snapshot for stream
+                    if server_memory_metric is not None:
+                        mem_display = f"{server_memory_metric:.1f} (Max)"
+                        delta_or_max_value = server_memory_metric # Store for aggregation
+                else: # Batch mode - expect peak and delta
+                    # We need to get peak and delta from the API response
+                    peak_mem_value = locals().get('server_peak_mem_mb', None) # Get from response data if available
+                    delta_value = server_memory_metric # server_memory_metric holds delta for batch
+
+                    if peak_mem_value is not None and delta_value is not None:
+                        mem_display = f"{peak_mem_value:.1f} / {delta_value:+.1f}"
+                        delta_or_max_value = delta_value # Store delta for aggregation
+                    elif peak_mem_value is not None:
+                         mem_display = f"{peak_mem_value:.1f} / N/A"
+                    elif delta_value is not None:
+                         mem_display = f"N/A / {delta_value:+.1f}"
+                         delta_or_max_value = delta_value # Store delta for aggregation
+
+                # --- Updated Print Statement with Adjusted Padding ---
+                console.print(
+                    f" {batch_idx:<5} | {progress_pct:6.1f}% | {mem_display:>24} | {reqs_per_sec:8.1f} | " # Increased width for memory column
+                    f"{success_urls:^7}/{failed_urls:<6} | {api_call_time:8.2f} | [{status_color}]{status:<7}[/{status_color}] " # Added trailing space
+                )
+
+                # --- Updated Return Dictionary ---
+                return_data = {
+                    "batch_idx": batch_idx,
+                    "request_success": request_success,
+                    "success_urls": success_urls,
+                    "failed_urls": failed_urls,
+                    "time": api_call_time,
+                    # Return both peak (if available) and delta/max
+                    "server_peak_memory_mb": peak_mem_value, # Will be None for stream mode
+                    "server_delta_or_max_mb": delta_or_max_value # Delta for batch, Max for stream
+                }
+                # Add back the specific batch mode delta if needed elsewhere, but delta_or_max covers it
+                # if not self.stream_mode:
+                #    return_data["server_memory_delta_mb"] = delta_value
+                return return_data
+
+    # No _periodic_memory_sample needed
+
+    def _save_results(self) -> None:
+        """Saves the results summary to a JSON file."""
+        results_path = self.report_path / f"api_test_summary_{self.test_id}.json"
+        try:
+            # No client memory path to convert
+            with open(results_path, 'w', encoding='utf-8') as f:
+                json.dump(self.results_summary, f, indent=2, default=str)
+        except Exception as e:
+            console.print(f"[bold red]Failed to save results summary: {e}[/bold red]")
+
+
+# --- run_full_test Function ---
+async def run_full_test(args):
+    """Runs the full API stress test process."""
+    client = httpx.AsyncClient(base_url=args.api_url, timeout=REQUEST_TIMEOUT)
+
+    if not await check_server_health(client):
+        console.print("[bold red]Aborting test due to server health check failure.[/]")
+        await client.aclose()
+        return
+    await client.aclose()
+
+    test = ApiStressTest(
+        api_url=args.api_url,
+        url_count=args.urls,
+        max_concurrent_requests=args.max_concurrent_requests,
+        chunk_size=args.chunk_size,
+        report_path=args.report_path,
+        stream_mode=args.stream,
+    )
+    results = {}
+    try:
+        results = await test.run()
+    finally:
+        await test.close_client()
+
+    if not results:
+        console.print("[bold red]Test did not produce results.[/bold red]")
+        return
+
+    console.print("\n" + "=" * 80)
+    console.print("[bold green]API Stress Test Completed[/bold green]")
+    console.print("=" * 80)
+
+    success_rate_reqs = results["successful_requests"] / results["total_api_calls"] * 100 if results["total_api_calls"] > 0 else 0
+    success_rate_urls = results["successful_urls"] / results["url_count"] * 100 if results["url_count"] > 0 else 0
+    urls_per_second = results["total_urls_processed"] / results["total_time_seconds"] if results["total_time_seconds"] > 0 else 0
+    reqs_per_second = results["total_api_calls"] / results["total_time_seconds"] if results["total_time_seconds"] > 0 else 0
+
+
+    console.print(f"[bold cyan]Test ID:[/bold cyan] {results['test_id']}")
+    console.print(f"[bold cyan]Target API:[/bold cyan] {results['api_url']}")
+    console.print(f"[bold cyan]Configuration:[/bold cyan] {results['url_count']} URLs, {results['max_concurrent_requests']} concurrent client requests, URLs/Req: {results['chunk_size']}, Stream: {results['stream_mode']}")
+    console.print(f"[bold cyan]API Requests:[/bold cyan] {results['successful_requests']} successful, {results['failed_requests']} failed ({results['total_api_calls']} total, {success_rate_reqs:.1f}% success)")
+    console.print(f"[bold cyan]URL Processing:[/bold cyan] {results['successful_urls']} successful, {results['failed_urls']} failed ({results['total_urls_processed']} processed, {success_rate_urls:.1f}% success)")
+    console.print(f"[bold cyan]Performance:[/bold cyan] {results['total_time_seconds']:.2f}s total | Avg Reqs/sec: {reqs_per_second:.2f} | Avg URLs/sec: {urls_per_second:.2f}")
+
+    # Report Server Memory
+    mem_metrics = results.get("server_memory_metrics", {})
+    mem_samples = mem_metrics.get("samples", [])
+    if mem_samples:
+         num_samples = len(mem_samples)
+         if results['stream_mode']:
+             avg_mem = mem_metrics.get("stream_mode_avg_max_snapshot_mb")
+             max_mem = mem_metrics.get("stream_mode_max_max_snapshot_mb")
+             avg_str = f"{avg_mem:.1f}" if avg_mem is not None else "N/A"
+             max_str = f"{max_mem:.1f}" if max_mem is not None else "N/A"
+             console.print(f"[bold cyan]Server Memory (Stream):[/bold cyan] Avg Max Snapshot: {avg_str} MB | Max Max Snapshot: {max_str} MB (across {num_samples} requests)")
+         else: # Batch mode
+             avg_delta = mem_metrics.get("batch_mode_avg_delta_mb")
+             max_delta = mem_metrics.get("batch_mode_max_delta_mb")
+             avg_peak = mem_metrics.get("batch_mode_avg_peak_mb")
+             max_peak = mem_metrics.get("batch_mode_max_peak_mb")
+
+             avg_delta_str = f"{avg_delta:.1f}" if avg_delta is not None else "N/A"
+             max_delta_str = f"{max_delta:.1f}" if max_delta is not None else "N/A"
+             avg_peak_str = f"{avg_peak:.1f}" if avg_peak is not None else "N/A"
+             max_peak_str = f"{max_peak:.1f}" if max_peak is not None else "N/A"
+
+             console.print(f"[bold cyan]Server Memory (Batch):[/bold cyan] Avg Peak: {avg_peak_str} MB | Max Peak: {max_peak_str} MB | Avg Delta: {avg_delta_str} MB | Max Delta: {max_delta_str} MB (across {num_samples} requests)")
+    else:
+        console.print("[bold cyan]Server Memory:[/bold cyan] No memory data reported by server.")
+
+
+    # No client memory report
+    summary_path = pathlib.Path(args.report_path) / f"api_test_summary_{results['test_id']}.json"
+    console.print(f"[bold green]Results summary saved to {summary_path}[/bold green]")
+
+    if results["failed_requests"] > 0:
+        console.print(f"\n[bold yellow]Warning: {results['failed_requests']} API requests failed ({100-success_rate_reqs:.1f}% failure rate)[/bold yellow]")
+    if results["failed_urls"] > 0:
+         console.print(f"[bold yellow]Warning: {results['failed_urls']} URLs failed to process ({100-success_rate_urls:.1f}% URL failure rate)[/bold yellow]")
+    if results["total_urls_processed"] < results["url_count"]:
+        console.print(f"\n[bold red]Error: Only {results['total_urls_processed']} out of {results['url_count']} target URLs were processed![/bold red]")
+
+
+# --- main Function (Argument parsing mostly unchanged) ---
+def main():
+    """Main entry point for the script."""
+    parser = argparse.ArgumentParser(description="Crawl4AI API Server Stress Test")
+
+    parser.add_argument("--api-url", type=str, default=DEFAULT_API_URL, help=f"Base URL of the Crawl4AI API server (default: {DEFAULT_API_URL})")
+    parser.add_argument("--urls", type=int, default=DEFAULT_URL_COUNT, help=f"Total number of unique URLs to process via API calls (default: {DEFAULT_URL_COUNT})")
+    parser.add_argument("--max-concurrent-requests", type=int, default=DEFAULT_MAX_CONCURRENT_REQUESTS, help=f"Maximum concurrent API requests from this client (default: {DEFAULT_MAX_CONCURRENT_REQUESTS})")
+    parser.add_argument("--chunk-size", type=int, default=DEFAULT_CHUNK_SIZE, help=f"Number of URLs per API request payload (default: {DEFAULT_CHUNK_SIZE})")
+    parser.add_argument("--stream", action="store_true", default=DEFAULT_STREAM_MODE, help=f"Use the /crawl/stream endpoint instead of /crawl (default: {DEFAULT_STREAM_MODE})")
+    parser.add_argument("--report-path", type=str, default=DEFAULT_REPORT_PATH, help=f"Path to save reports and logs (default: {DEFAULT_REPORT_PATH})")
+    parser.add_argument("--clean-reports", action="store_true", help="Clean up report directory before running")
+
+    args = parser.parse_args()
+
+    console.print("[bold underline]Crawl4AI API Stress Test Configuration[/bold underline]")
+    console.print(f"API URL: {args.api_url}")
+    console.print(f"Total URLs: {args.urls}, Concurrent Client Requests: {args.max_concurrent_requests}, URLs per Request: {args.chunk_size}")
+    console.print(f"Mode: {'Streaming' if args.stream else 'Batch'}")
+    console.print(f"Report Path: {args.report_path}")
+    console.print("-" * 40)
+    if args.clean_reports: console.print("[cyan]Option: Clean reports before test[/cyan]")
+    console.print("-" * 40)
+
+    if args.clean_reports:
+        report_dir = pathlib.Path(args.report_path)
+        if report_dir.exists():
+            console.print(f"[yellow]Cleaning up reports directory: {args.report_path}[/yellow]")
+            shutil.rmtree(args.report_path)
+        report_dir.mkdir(parents=True, exist_ok=True)
+
+    try:
+        asyncio.run(run_full_test(args))
+    except KeyboardInterrupt:
+        console.print("\n[bold yellow]Test interrupted by user.[/bold yellow]")
+    except Exception as e:
+        console.print(f"\n[bold red]An unexpected error occurred:[/bold red] {e}")
+        import traceback
+        traceback.print_exc()
+
+if __name__ == "__main__":
+    # No need to modify sys.path for SimpleMemoryTracker as it's removed
+    main()
--- a/tests/memory/test_stress_api_xs.py
+++ b/tests/memory/test_stress_api_xs.py
@@ -0,0 +1,203 @@
+"""Lite Crawl4AI API stress‑tester.
+
+✔ batch or stream mode (single unified path)
+✔ global stats + JSON summary
+✔ rich table progress
+✔ Typer CLI with presets (quick / soak)
+
+Usage examples:
+    python api_stress_test.py               # uses quick preset
+    python api_stress_test.py soak          # 5 K URLs stress run
+    python api_stress_test.py --urls 200 --concurrent 10 --chunk 20
+"""
+
+from __future__ import annotations
+
+import asyncio, json, time, uuid, pathlib, statistics
+from typing import List, Dict, Optional
+
+import httpx, typer
+from rich.console import Console
+from rich.table import Table
+
+# ───────────────────────── defaults / presets ──────────────────────────
+PRESETS = {
+    "quick": dict(urls=1, concurrent=1, chunk=1, stream=False),
+    "debug": dict(urls=10, concurrent=2, chunk=5, stream=False),
+    "soak": dict(urls=5000, concurrent=20, chunk=50, stream=True),
+}
+
+API_HEALTH_ENDPOINT = "/health"
+REQUEST_TIMEOUT = 180.0
+
+console = Console()
+app = typer.Typer(add_completion=False, rich_markup_mode="rich")
+
+# ───────────────────────── helpers ─────────────────────────────────────
+async def _check_health(client: httpx.AsyncClient) -> None:
+    resp = await client.get(API_HEALTH_ENDPOINT, timeout=10)
+    resp.raise_for_status()
+    console.print(f"[green]Server healthy — version {resp.json().get('version','?')}[/]")
+
+async def _iter_results(resp: httpx.Response, stream: bool):
+    """Yield result dicts from batch JSON or ND‑JSON stream."""
+    if stream:
+        async for line in resp.aiter_lines():
+            if not line:
+                continue
+            rec = json.loads(line)
+            if rec.get("status") == "completed":
+                break
+            yield rec
+    else:
+        data = resp.json()
+        for rec in data.get("results", []):
+            yield rec, data  # rec + whole payload for memory delta/peak
+
+async def _consume_stream(resp: httpx.Response) -> Dict:
+    stats = {"success_urls": 0, "failed_urls": 0, "mem_metric": 0.0}
+    async for line in resp.aiter_lines():
+        if not line:
+            continue
+        rec = json.loads(line)
+        if rec.get("status") == "completed":
+            break
+        if rec.get("success"):
+            stats["success_urls"] += 1
+        else:
+            stats["failed_urls"] += 1
+        mem = rec.get("server_memory_mb")
+        if mem is not None:
+            stats["mem_metric"] = max(stats["mem_metric"], float(mem))
+    return stats
+
+def _consume_batch(body: Dict) -> Dict:
+    stats = {"success_urls": 0, "failed_urls": 0}
+    for rec in body.get("results", []):
+        if rec.get("success"):
+            stats["success_urls"] += 1
+        else:
+            stats["failed_urls"] += 1
+    stats["mem_metric"] = body.get("server_memory_delta_mb")
+    stats["peak"] = body.get("server_peak_memory_mb")
+    return stats
+
+async def _fetch_chunk(
+    client: httpx.AsyncClient,
+    urls: List[str],
+    stream: bool,
+    semaphore: asyncio.Semaphore,
+) -> Dict:
+    endpoint = "/crawl/stream" if stream else "/crawl"
+    payload = {
+        "urls": urls,
+        "browser_config": {"type": "BrowserConfig", "params": {"headless": True}},
+        "crawler_config": {"type": "CrawlerRunConfig",
+                           "params": {"cache_mode": "BYPASS", "stream": stream}},
+    }
+
+    async with semaphore:
+        start = time.perf_counter()
+
+        if stream:
+            # ---- streaming request ----
+            async with client.stream("POST", endpoint, json=payload) as resp:
+                resp.raise_for_status()
+                stats = await _consume_stream(resp)
+        else:
+            # ---- batch request ----
+            resp = await client.post(endpoint, json=payload)
+            resp.raise_for_status()
+            stats = _consume_batch(resp.json())
+
+        stats["elapsed"] = time.perf_counter() - start
+        return stats
+
+
+# ───────────────────────── core runner ─────────────────────────────────
+async def _run(api: str, urls: int, concurrent: int, chunk: int, stream: bool, report: pathlib.Path):
+    client = httpx.AsyncClient(base_url=api, timeout=REQUEST_TIMEOUT, limits=httpx.Limits(max_connections=concurrent+5))
+    await _check_health(client)
+
+    url_list = [f"https://httpbin.org/anything/{uuid.uuid4()}" for _ in range(urls)]
+    chunks = [url_list[i:i+chunk] for i in range(0, len(url_list), chunk)]
+    sem = asyncio.Semaphore(concurrent)
+
+    table = Table(show_header=True, header_style="bold magenta")
+    table.add_column("Batch", style="dim", width=6)
+    table.add_column("Success/Fail", width=12)
+    table.add_column("Mem", width=14)
+    table.add_column("Time (s)")
+
+    agg_success = agg_fail = 0
+    deltas, peaks = [], []
+
+    start = time.perf_counter()
+    tasks = [asyncio.create_task(_fetch_chunk(client, c, stream, sem)) for c in chunks]
+    for idx, coro in enumerate(asyncio.as_completed(tasks), 1):
+        res = await coro
+        agg_success += res["success_urls"]
+        agg_fail += res["failed_urls"]
+        if res["mem_metric"] is not None:
+            deltas.append(res["mem_metric"])
+        if res["peak"] is not None:
+            peaks.append(res["peak"])
+
+        mem_txt = f"{res['mem_metric']:.1f}" if res["mem_metric"] is not None else "‑"
+        if res["peak"] is not None:
+            mem_txt = f"{res['peak']:.1f}/{mem_txt}"
+
+        table.add_row(str(idx), f"{res['success_urls']}/{res['failed_urls']}", mem_txt, f"{res['elapsed']:.2f}")
+
+    console.print(table)
+    total_time = time.perf_counter() - start
+
+    summary = {
+        "urls": urls,
+        "concurrent": concurrent,
+        "chunk": chunk,
+        "stream": stream,
+        "success_urls": agg_success,
+        "failed_urls": agg_fail,
+        "elapsed_sec": round(total_time, 2),
+        "avg_mem": round(statistics.mean(deltas), 2) if deltas else None,
+        "max_mem": max(deltas) if deltas else None,
+        "avg_peak": round(statistics.mean(peaks), 2) if peaks else None,
+        "max_peak": max(peaks) if peaks else None,
+    }
+    console.print("\n[bold green]Done:[/]" , summary)
+
+    report.mkdir(parents=True, exist_ok=True)
+    path = report / f"api_test_{int(time.time())}.json"
+    path.write_text(json.dumps(summary, indent=2))
+    console.print(f"[green]Summary → {path}")
+
+    await client.aclose()
+
+# ───────────────────────── Typer CLI ──────────────────────────────────
+@app.command()
+def main(
+    preset: str = typer.Argument("quick", help="quick / debug / soak or custom"),
+    api_url: str = typer.Option("http://localhost:8020", show_default=True),
+    urls: int = typer.Option(None, help="Total URLs to crawl"),
+    concurrent: int = typer.Option(None, help="Concurrent API requests"),
+    chunk: int = typer.Option(None, help="URLs per request"),
+    stream: bool = typer.Option(None, help="Use /crawl/stream"),
+    report: pathlib.Path = typer.Option("reports_api", help="Where to save JSON summary"),
+):
+    """Run a stress test against a running Crawl4AI API server."""
+    if preset not in PRESETS and any(v is None for v in (urls, concurrent, chunk, stream)):
+        console.print(f"[red]Unknown preset '{preset}' and custom params missing[/]")
+        raise typer.Exit(1)
+
+    cfg = PRESETS.get(preset, {})
+    urls = urls or cfg.get("urls")
+    concurrent = concurrent or cfg.get("concurrent")
+    chunk = chunk or cfg.get("chunk")
+    stream = stream if stream is not None else cfg.get("stream", False)
+
+    console.print(f"[cyan]API:[/] {api_url} | URLs: {urls} | Concurrency: {concurrent} | Chunk: {chunk} | Stream: {stream}")
+    asyncio.run(_run(api_url, urls, concurrent, chunk, stream, report))
+
+if __name__ == "__main__":
+    app()
--- a/tests/memory/test_stress_docker_api.py
+++ b/tests/memory/test_stress_docker_api.py
@@ -0,0 +1,129 @@
+"""
+Crawl4AI Docker API stress tester.
+
+Examples
+--------
+python test_stress_docker_api.py --urls 1000 --concurrency 32
+python test_stress_docker_api.py --urls 1000 --concurrency 32 --stream
+python test_stress_docker_api.py --base-url http://10.0.0.42:11235 --http2
+"""
+
+import argparse, asyncio, json, secrets, statistics, time
+from typing import List, Tuple
+import httpx
+from rich.console import Console
+from rich.progress import Progress, BarColumn, TimeElapsedColumn, TimeRemainingColumn
+from rich.table import Table
+
+console = Console()
+
+
+# ───────────────────────── helpers ─────────────────────────
+def make_fake_urls(n: int) -> List[str]:
+    base = "https://httpbin.org/anything/"
+    return [f"{base}{secrets.token_hex(8)}" for _ in range(n)]
+
+
+async def fire(
+    client: httpx.AsyncClient, endpoint: str, payload: dict, sem: asyncio.Semaphore
+) -> Tuple[bool, float]:
+    async with sem:
+        print(f"POST {endpoint} with {len(payload['urls'])} URLs")
+        t0 = time.perf_counter()
+        try:
+            if endpoint.endswith("/stream"):
+                async with client.stream("POST", endpoint, json=payload) as r:
+                    r.raise_for_status()
+                    async for _ in r.aiter_lines():
+                        pass
+            else:
+                r = await client.post(endpoint, json=payload)                
+                r.raise_for_status()
+            return True, time.perf_counter() - t0
+        except Exception:
+            return False, time.perf_counter() - t0
+
+
+def pct(lat: List[float], p: float) -> str:
+    """Return percentile string even for tiny samples."""
+    if not lat:
+        return "-"
+    if len(lat) == 1:
+        return f"{lat[0]:.2f}s"
+    lat_sorted = sorted(lat)
+    k = (p / 100) * (len(lat_sorted) - 1)
+    lo = int(k)
+    hi = min(lo + 1, len(lat_sorted) - 1)
+    frac = k - lo
+    val = lat_sorted[lo] * (1 - frac) + lat_sorted[hi] * frac
+    return f"{val:.2f}s"
+
+
+# ───────────────────────── main ─────────────────────────
+def parse_args() -> argparse.Namespace:
+    p = argparse.ArgumentParser(description="Stress test Crawl4AI Docker API")
+    p.add_argument("--urls", type=int, default=100, help="number of URLs")
+    p.add_argument("--concurrency", type=int, default=1, help="max POSTs in flight")
+    p.add_argument("--chunk-size", type=int, default=50, help="URLs per request")
+    p.add_argument("--base-url", default="http://localhost:11235", help="API root")
+    # p.add_argument("--base-url", default="http://localhost:8020", help="API root")
+    p.add_argument("--stream", action="store_true", help="use /crawl/stream")
+    p.add_argument("--http2", action="store_true", help="enable HTTP/2")
+    p.add_argument("--headless", action="store_true", default=True)
+    return p.parse_args()
+
+
+async def main() -> None:
+    args = parse_args()
+
+    urls = make_fake_urls(args.urls)
+    batches = [urls[i : i + args.chunk_size] for i in range(0, len(urls), args.chunk_size)]
+    endpoint = "/crawl/stream" if args.stream else "/crawl"
+    sem = asyncio.Semaphore(args.concurrency)
+
+    async with httpx.AsyncClient(base_url=args.base_url, http2=args.http2, timeout=None) as client:
+        with Progress(
+            "[progress.description]{task.description}",
+            BarColumn(),
+            "[progress.percentage]{task.percentage:>3.0f}%",
+            TimeElapsedColumn(),
+            TimeRemainingColumn(),
+        ) as progress:
+            task_id = progress.add_task("[cyan]bombarding…", total=len(batches))
+            tasks = []
+            for chunk in batches:
+                payload = {
+                    "urls": chunk,
+                    "browser_config": {"type": "BrowserConfig", "params": {"headless": args.headless}},
+                    "crawler_config": {"type": "CrawlerRunConfig", "params": {"cache_mode": "BYPASS", "stream": args.stream}},
+                }
+                tasks.append(asyncio.create_task(fire(client, endpoint, payload, sem)))
+                progress.advance(task_id)
+
+            results = await asyncio.gather(*tasks)
+
+    ok_latencies = [dt for ok, dt in results if ok]
+    err_count = sum(1 for ok, _ in results if not ok)
+
+    table = Table(title="Docker API Stress‑Test Summary")
+    table.add_column("total", justify="right")
+    table.add_column("errors", justify="right")
+    table.add_column("p50", justify="right")
+    table.add_column("p95", justify="right")
+    table.add_column("max", justify="right")
+
+    table.add_row(
+        str(len(results)),
+        str(err_count),
+        pct(ok_latencies, 50),
+        pct(ok_latencies, 95),
+        f"{max(ok_latencies):.2f}s" if ok_latencies else "-",
+    )
+    console.print(table)
+
+
+if __name__ == "__main__":
+    try:
+        asyncio.run(main())
+    except KeyboardInterrupt:
+        console.print("\n[yellow]aborted by user[/]")
--- a/tests/memory/test_stress_sdk.py
+++ b/tests/memory/test_stress_sdk.py
@@ -0,0 +1,500 @@
+#!/usr/bin/env python3
+"""
+Stress test for Crawl4AI's arun_many and dispatcher system.
+This version uses a local HTTP server and focuses on testing
+the SDK's ability to handle multiple URLs concurrently, with per-batch logging.
+"""
+
+import asyncio
+import os
+import time
+import pathlib
+import random
+import secrets
+import argparse
+import json
+import sys
+import subprocess
+import signal
+from typing import List, Dict, Optional, Union, AsyncGenerator
+import shutil
+from rich.console import Console
+
+# Crawl4AI components
+from crawl4ai import (
+    AsyncWebCrawler,
+    CrawlerRunConfig,
+    BrowserConfig,
+    MemoryAdaptiveDispatcher,
+    CrawlerMonitor,
+    DisplayMode,
+    CrawlResult,
+    RateLimiter,
+    CacheMode,
+)
+
+# Constants
+DEFAULT_SITE_PATH = "test_site"
+DEFAULT_PORT = 8000
+DEFAULT_MAX_SESSIONS = 16
+DEFAULT_URL_COUNT = 1
+DEFAULT_CHUNK_SIZE = 1 # Define chunk size for batch logging
+DEFAULT_REPORT_PATH = "reports"
+DEFAULT_STREAM_MODE = False
+DEFAULT_MONITOR_MODE = "DETAILED"
+
+# Initialize Rich console
+console = Console()
+
+# --- SiteGenerator Class (Unchanged) ---
+class SiteGenerator:
+    """Generates a local test site with heavy pages for stress testing."""
+
+    def __init__(self, site_path: str = DEFAULT_SITE_PATH, page_count: int = DEFAULT_URL_COUNT):
+        self.site_path = pathlib.Path(site_path)
+        self.page_count = page_count
+        self.images_dir = self.site_path / "images"
+        self.lorem_words = " ".join("lorem ipsum dolor sit amet " * 100).split()
+
+        self.html_template = """<!doctype html>
+<html>
+<head>
+    <title>Test Page {page_num}</title>
+    <meta charset="utf-8">
+</head>
+<body>
+    <h1>Test Page {page_num}</h1>
+    {paragraphs}
+    {images}
+</body>
+</html>
+"""
+
+    def generate_site(self) -> None:
+        self.site_path.mkdir(parents=True, exist_ok=True)
+        self.images_dir.mkdir(exist_ok=True)
+        console.print(f"Generating {self.page_count} test pages...")
+        for i in range(self.page_count):
+            paragraphs = "\n".join(f"<p>{' '.join(random.choices(self.lorem_words, k=200))}</p>" for _ in range(5))
+            images = "\n".join(f'<img src="https://picsum.photos/seed/{secrets.token_hex(8)}/300/200" loading="lazy" alt="Random image {j}"/>' for j in range(3))
+            page_path = self.site_path / f"page_{i}.html"
+            page_path.write_text(self.html_template.format(page_num=i, paragraphs=paragraphs, images=images), encoding="utf-8")
+            if (i + 1) % (self.page_count // 10 or 1) == 0 or i == self.page_count - 1:
+                 console.print(f"Generated {i+1}/{self.page_count} pages")
+        self._create_index_page()
+        console.print(f"[bold green]Successfully generated {self.page_count} test pages in [cyan]{self.site_path}[/cyan][/bold green]")
+
+    def _create_index_page(self) -> None:
+        index_content = """<!doctype html><html><head><title>Test Site Index</title><meta charset="utf-8"></head><body><h1>Test Site Index</h1><p>This is an automatically generated site for testing Crawl4AI.</p><div class="page-links">\n"""
+        for i in range(self.page_count):
+            index_content += f'        <a href="page_{i}.html">Test Page {i}</a><br>\n'
+        index_content += """    </div></body></html>"""
+        (self.site_path / "index.html").write_text(index_content, encoding="utf-8")
+
+# --- LocalHttpServer Class (Unchanged) ---
+class LocalHttpServer:
+    """Manages a local HTTP server for serving test pages."""
+    def __init__(self, site_path: str = DEFAULT_SITE_PATH, port: int = DEFAULT_PORT):
+        self.site_path = pathlib.Path(site_path)
+        self.port = port
+        self.process = None
+
+    def start(self) -> None:
+        if not self.site_path.exists(): raise FileNotFoundError(f"Site directory {self.site_path} does not exist")
+        console.print(f"Attempting to start HTTP server in [cyan]{self.site_path}[/cyan] on port {self.port}...")
+        try:
+            cmd = ["python", "-m", "http.server", str(self.port)]
+            creationflags = 0; preexec_fn = None
+            if sys.platform == 'win32': creationflags = subprocess.CREATE_NEW_PROCESS_GROUP
+            self.process = subprocess.Popen(cmd, cwd=str(self.site_path), stdout=subprocess.PIPE, stderr=subprocess.PIPE, creationflags=creationflags)
+            time.sleep(1.5)
+            if self.is_running(): console.print(f"[bold green]HTTP server started successfully (PID: {self.process.pid})[/bold green]")
+            else:
+                console.print("[bold red]Failed to start HTTP server. Checking logs...[/bold red]")
+                stdout, stderr = self.process.communicate(); print(stdout.decode(errors='ignore')); print(stderr.decode(errors='ignore'))
+                self.stop(); raise RuntimeError("HTTP server failed to start.")
+        except Exception as e: console.print(f"[bold red]Error starting HTTP server: {str(e)}[/bold red]"); self.stop(); raise
+
+    def stop(self) -> None:
+        if self.process and self.is_running():
+            console.print(f"Stopping HTTP server (PID: {self.process.pid})...")
+            try:
+                if sys.platform == 'win32': self.process.send_signal(signal.CTRL_BREAK_EVENT); time.sleep(0.5)
+                self.process.terminate()
+                try: stdout, stderr = self.process.communicate(timeout=5); console.print("[bold yellow]HTTP server stopped[/bold yellow]")
+                except subprocess.TimeoutExpired: console.print("[bold red]Server did not terminate gracefully, killing...[/bold red]"); self.process.kill(); stdout, stderr = self.process.communicate(); console.print("[bold yellow]HTTP server killed[/bold yellow]")
+            except Exception as e: console.print(f"[bold red]Error stopping HTTP server: {str(e)}[/bold red]"); self.process.kill()
+            finally: self.process = None
+        elif self.process: console.print("[dim]HTTP server process already stopped.[/dim]"); self.process = None
+
+    def is_running(self) -> bool:
+        if not self.process: return False
+        return self.process.poll() is None
+
+# --- SimpleMemoryTracker Class (Unchanged) ---
+class SimpleMemoryTracker:
+    """Basic memory tracker that doesn't rely on psutil."""
+    def __init__(self, report_path: str = DEFAULT_REPORT_PATH, test_id: Optional[str] = None):
+        self.report_path = pathlib.Path(report_path); self.report_path.mkdir(parents=True, exist_ok=True)
+        self.test_id = test_id or time.strftime("%Y%m%d_%H%M%S")
+        self.start_time = time.time(); self.memory_samples = []; self.pid = os.getpid()
+        self.csv_path = self.report_path / f"memory_samples_{self.test_id}.csv"
+        with open(self.csv_path, 'w', encoding='utf-8') as f: f.write("timestamp,elapsed_seconds,memory_info_mb\n")
+
+    def sample(self) -> Dict:
+        try:
+            memory_mb = self._get_memory_info_mb()
+            memory_str = f"{memory_mb:.1f} MB" if memory_mb is not None else "Unknown"
+            timestamp = time.time(); elapsed = timestamp - self.start_time
+            sample = {"timestamp": timestamp, "elapsed_seconds": elapsed, "memory_mb": memory_mb, "memory_str": memory_str}
+            self.memory_samples.append(sample)
+            with open(self.csv_path, 'a', encoding='utf-8') as f: f.write(f"{timestamp},{elapsed:.2f},{memory_mb if memory_mb is not None else ''}\n")
+            return sample
+        except Exception as e: return {"memory_mb": None, "memory_str": "Error"}
+
+    def _get_memory_info_mb(self) -> Optional[float]:
+        pid_str = str(self.pid)
+        try:
+            if sys.platform == 'darwin': result = subprocess.run(["ps", "-o", "rss=", "-p", pid_str], capture_output=True, text=True, check=True, encoding='utf-8'); return int(result.stdout.strip()) / 1024.0
+            elif sys.platform == 'linux':
+                with open(f"/proc/{pid_str}/status", encoding='utf-8') as f:
+                    for line in f:
+                        if line.startswith("VmRSS:"): return int(line.split()[1]) / 1024.0
+                return None
+            elif sys.platform == 'win32': result = subprocess.run(["tasklist", "/fi", f"PID eq {pid_str}", "/fo", "csv", "/nh"], capture_output=True, text=True, check=True, encoding='cp850', errors='ignore'); parts = result.stdout.strip().split('","'); return int(parts[4].strip().replace('"', '').replace(' K', '').replace(',', '')) / 1024.0 if len(parts) >= 5 else None
+            else: return None
+        except: return None # Catch all exceptions for robustness
+
+    def get_report(self) -> Dict:
+        if not self.memory_samples: return {"error": "No memory samples collected"}
+        total_time = time.time() - self.start_time; valid_samples = [s['memory_mb'] for s in self.memory_samples if s['memory_mb'] is not None]
+        start_mem = valid_samples[0] if valid_samples else None; end_mem = valid_samples[-1] if valid_samples else None
+        max_mem = max(valid_samples) if valid_samples else None; avg_mem = sum(valid_samples) / len(valid_samples) if valid_samples else None
+        growth = (end_mem - start_mem) if start_mem is not None and end_mem is not None else None
+        return {"test_id": self.test_id, "total_time_seconds": total_time, "sample_count": len(self.memory_samples), "valid_sample_count": len(valid_samples), "csv_path": str(self.csv_path), "platform": sys.platform, "start_memory_mb": start_mem, "end_memory_mb": end_mem, "max_memory_mb": max_mem, "average_memory_mb": avg_mem, "memory_growth_mb": growth}
+
+
+# --- CrawlerStressTest Class (Refactored for Per-Batch Logging) ---
+class CrawlerStressTest:
+    """Orchestrates the stress test using arun_many per chunk and a dispatcher."""
+
+    def __init__(
+        self,
+        url_count: int = DEFAULT_URL_COUNT,
+        port: int = DEFAULT_PORT,
+        max_sessions: int = DEFAULT_MAX_SESSIONS,
+        chunk_size: int = DEFAULT_CHUNK_SIZE, # Added chunk_size
+        report_path: str = DEFAULT_REPORT_PATH,
+        stream_mode: bool = DEFAULT_STREAM_MODE,
+        monitor_mode: str = DEFAULT_MONITOR_MODE,
+        use_rate_limiter: bool = False
+    ):
+        self.url_count = url_count
+        self.server_port = port
+        self.max_sessions = max_sessions
+        self.chunk_size = chunk_size # Store chunk size
+        self.report_path = pathlib.Path(report_path)
+        self.report_path.mkdir(parents=True, exist_ok=True)
+        self.stream_mode = stream_mode
+        self.monitor_mode = DisplayMode[monitor_mode.upper()]
+        self.use_rate_limiter = use_rate_limiter
+
+        self.test_id = time.strftime("%Y%m%d_%H%M%S")
+        self.results_summary = {
+            "test_id": self.test_id, "url_count": url_count, "max_sessions": max_sessions,
+            "chunk_size": chunk_size, "stream_mode": stream_mode, "monitor_mode": monitor_mode,
+            "rate_limiter_used": use_rate_limiter, "start_time": "", "end_time": "",
+            "total_time_seconds": 0, "successful_urls": 0, "failed_urls": 0,
+            "urls_processed": 0, "chunks_processed": 0
+        }
+
+    async def run(self) -> Dict:
+        """Run the stress test and return results."""
+        memory_tracker = SimpleMemoryTracker(report_path=self.report_path, test_id=self.test_id)
+        urls = [f"http://localhost:{self.server_port}/page_{i}.html" for i in range(self.url_count)]
+        # Split URLs into chunks based on self.chunk_size
+        url_chunks = [urls[i:i+self.chunk_size] for i in range(0, len(urls), self.chunk_size)]
+
+        self.results_summary["start_time"] = time.strftime("%Y-%m-%d %H:%M:%S")
+        start_time = time.time()
+
+        config = CrawlerRunConfig(
+            wait_for_images=False, verbose=False,
+            stream=self.stream_mode, # Still pass stream mode, affects arun_many return type
+            cache_mode=CacheMode.BYPASS
+        )
+
+        total_successful_urls = 0
+        total_failed_urls = 0
+        total_urls_processed = 0
+        start_memory_sample = memory_tracker.sample()
+        start_memory_str = start_memory_sample.get("memory_str", "Unknown")
+
+        # monitor = CrawlerMonitor(display_mode=self.monitor_mode, total_urls=self.url_count)
+        monitor = None
+        rate_limiter = RateLimiter(base_delay=(0.1, 0.3)) if self.use_rate_limiter else None
+        dispatcher = MemoryAdaptiveDispatcher(max_session_permit=self.max_sessions, monitor=monitor, rate_limiter=rate_limiter)
+
+        console.print(f"\n[bold cyan]Crawl4AI Stress Test - {self.url_count} URLs, {self.max_sessions} max sessions[/bold cyan]")
+        console.print(f"[bold cyan]Mode:[/bold cyan] {'Streaming' if self.stream_mode else 'Batch'}, [bold cyan]Monitor:[/bold cyan] {self.monitor_mode.name}, [bold cyan]Chunk Size:[/bold cyan] {self.chunk_size}")
+        console.print(f"[bold cyan]Initial Memory:[/bold cyan] {start_memory_str}")
+
+        # Print batch log header only if not streaming
+        if not self.stream_mode:
+            console.print("\n[bold]Batch Progress:[/bold] (Monitor below shows overall progress)")
+            console.print("[bold] Batch | Progress | Start Mem | End Mem   | URLs/sec | Success/Fail | Time (s) | Status [/bold]")
+            console.print("─" * 90)
+
+        monitor_task = asyncio.create_task(self._periodic_memory_sample(memory_tracker, 2.0))
+
+        try:
+            async with AsyncWebCrawler(
+                    config=BrowserConfig( verbose = False)
+                ) as crawler:
+                # Process URLs chunk by chunk
+                for chunk_idx, url_chunk in enumerate(url_chunks):
+                    batch_start_time = time.time()
+                    chunk_success = 0
+                    chunk_failed = 0
+
+                    # Sample memory before the chunk
+                    start_mem_sample = memory_tracker.sample()
+                    start_mem_str = start_mem_sample.get("memory_str", "Unknown")
+
+                    # --- Call arun_many for the current chunk ---
+                    try:
+                        # Note: dispatcher/monitor persist across calls
+                        results_gen_or_list: Union[AsyncGenerator[CrawlResult, None], List[CrawlResult]] = \
+                            await crawler.arun_many(
+                                urls=url_chunk,
+                                config=config,
+                                dispatcher=dispatcher # Reuse the same dispatcher
+                            )
+
+                        if self.stream_mode:
+                            # Process stream results if needed, but batch logging is less relevant
+                            async for result in results_gen_or_list:
+                                total_urls_processed += 1
+                                if result.success: chunk_success += 1
+                                else: chunk_failed += 1
+                            # In stream mode, batch summary isn't as meaningful here
+                            # We could potentially track completion per chunk async, but it's complex
+
+                        else: # Batch mode
+                            # Process the list of results for this chunk
+                            for result in results_gen_or_list:
+                                total_urls_processed += 1
+                                if result.success: chunk_success += 1
+                                else: chunk_failed += 1
+
+                    except Exception as e:
+                        console.print(f"[bold red]Error processing chunk {chunk_idx+1}: {e}[/bold red]")
+                        chunk_failed = len(url_chunk) # Assume all failed in the chunk on error
+                        total_urls_processed += len(url_chunk) # Count them as processed (failed)
+
+                    # --- Log batch results (only if not streaming) ---
+                    if not self.stream_mode:
+                        batch_time = time.time() - batch_start_time
+                        urls_per_sec = len(url_chunk) / batch_time if batch_time > 0 else 0
+                        end_mem_sample = memory_tracker.sample()
+                        end_mem_str = end_mem_sample.get("memory_str", "Unknown")
+
+                        progress_pct = (total_urls_processed / self.url_count) * 100
+
+                        if chunk_failed == 0: status_color, status = "green", "Success"
+                        elif chunk_success == 0: status_color, status = "red", "Failed"
+                        else: status_color, status = "yellow", "Partial"
+
+                        console.print(
+                             f" {chunk_idx+1:<5} | {progress_pct:6.1f}% | {start_mem_str:>9} | {end_mem_str:>9} | {urls_per_sec:8.1f} | "
+                            f"{chunk_success:^7}/{chunk_failed:<6} | {batch_time:8.2f} | [{status_color}]{status:<7}[/{status_color}]"
+                        )
+
+                    # Accumulate totals
+                    total_successful_urls += chunk_success
+                    total_failed_urls += chunk_failed
+                    self.results_summary["chunks_processed"] += 1
+
+                    # Optional small delay between starting chunks if needed
+                    # await asyncio.sleep(0.1)
+
+        except Exception as e:
+             console.print(f"[bold red]An error occurred during the main crawl loop: {e}[/bold red]")
+        finally:
+            if 'monitor_task' in locals() and not monitor_task.done():
+                 monitor_task.cancel()
+                 try: await monitor_task
+                 except asyncio.CancelledError: pass
+
+        end_time = time.time()
+        self.results_summary.update({
+            "end_time": time.strftime("%Y-%m-%d %H:%M:%S"),
+            "total_time_seconds": end_time - start_time,
+            "successful_urls": total_successful_urls,
+            "failed_urls": total_failed_urls,
+            "urls_processed": total_urls_processed,
+            "memory": memory_tracker.get_report()
+        })
+        self._save_results()
+        return self.results_summary
+
+    async def _periodic_memory_sample(self, tracker: SimpleMemoryTracker, interval: float):
+        """Background task to sample memory periodically."""
+        while True:
+            tracker.sample()
+            try:
+                await asyncio.sleep(interval)
+            except asyncio.CancelledError:
+                break # Exit loop on cancellation
+
+    def _save_results(self) -> None:
+        results_path = self.report_path / f"test_summary_{self.test_id}.json"
+        try:
+            with open(results_path, 'w', encoding='utf-8') as f: json.dump(self.results_summary, f, indent=2, default=str)
+            # console.print(f"\n[bold green]Results summary saved to {results_path}[/bold green]") # Moved summary print to run_full_test
+        except Exception as e: console.print(f"[bold red]Failed to save results summary: {e}[/bold red]")
+
+
+# --- run_full_test Function (Adjusted) ---
+async def run_full_test(args):
+    """Run the complete test process from site generation to crawling."""
+    server = None
+    site_generated = False
+
+    # --- Site Generation --- (Same as before)
+    if not args.use_existing_site and not args.skip_generation:
+        if os.path.exists(args.site_path): console.print(f"[yellow]Removing existing site directory: {args.site_path}[/yellow]"); shutil.rmtree(args.site_path)
+        site_generator = SiteGenerator(site_path=args.site_path, page_count=args.urls); site_generator.generate_site(); site_generated = True
+    elif args.use_existing_site: console.print(f"[cyan]Using existing site assumed to be running on port {args.port}[/cyan]")
+    elif args.skip_generation:
+         console.print(f"[cyan]Skipping site generation, using existing directory: {args.site_path}[/cyan]")
+         if not os.path.exists(args.site_path) or not os.path.isdir(args.site_path): console.print(f"[bold red]Error: Site path '{args.site_path}' does not exist or is not a directory.[/bold red]"); return
+
+    # --- Start Local Server --- (Same as before)
+    server_started = False
+    if not args.use_existing_site:
+        server = LocalHttpServer(site_path=args.site_path, port=args.port)
+        try: server.start(); server_started = True
+        except Exception as e:
+            console.print(f"[bold red]Failed to start local server. Aborting test.[/bold red]")
+            if site_generated and not args.keep_site: console.print(f"[yellow]Cleaning up generated site: {args.site_path}[/yellow]"); shutil.rmtree(args.site_path)
+            return
+
+    try:
+        # --- Run the Stress Test ---
+        test = CrawlerStressTest(
+            url_count=args.urls,
+            port=args.port,
+            max_sessions=args.max_sessions,
+            chunk_size=args.chunk_size, # Pass chunk_size
+            report_path=args.report_path,
+            stream_mode=args.stream,
+            monitor_mode=args.monitor_mode,
+            use_rate_limiter=args.use_rate_limiter
+        )
+        results = await test.run() # Run the test which now handles chunks internally
+
+        # --- Print Summary ---
+        console.print("\n" + "=" * 80)
+        console.print("[bold green]Test Completed[/bold green]")
+        console.print("=" * 80)
+
+        # (Summary printing logic remains largely the same)
+        success_rate = results["successful_urls"] / results["url_count"] * 100 if results["url_count"] > 0 else 0
+        urls_per_second = results["urls_processed"] / results["total_time_seconds"] if results["total_time_seconds"] > 0 else 0
+
+        console.print(f"[bold cyan]Test ID:[/bold cyan] {results['test_id']}")
+        console.print(f"[bold cyan]Configuration:[/bold cyan] {results['url_count']} URLs, {results['max_sessions']} sessions, Chunk: {results['chunk_size']}, Stream: {results['stream_mode']}, Monitor: {results['monitor_mode']}")
+        console.print(f"[bold cyan]Results:[/bold cyan] {results['successful_urls']} successful, {results['failed_urls']} failed ({results['urls_processed']} processed, {success_rate:.1f}% success)")
+        console.print(f"[bold cyan]Performance:[/bold cyan] {results['total_time_seconds']:.2f} seconds total, {urls_per_second:.2f} URLs/second avg")
+
+        mem_report = results.get("memory", {})
+        mem_info_str = "Memory tracking data unavailable."
+        if mem_report and not mem_report.get("error"):
+            start_mb = mem_report.get('start_memory_mb'); end_mb = mem_report.get('end_memory_mb'); max_mb = mem_report.get('max_memory_mb'); growth_mb = mem_report.get('memory_growth_mb')
+            mem_parts = []
+            if start_mb is not None: mem_parts.append(f"Start: {start_mb:.1f} MB")
+            if end_mb is not None: mem_parts.append(f"End: {end_mb:.1f} MB")
+            if max_mb is not None: mem_parts.append(f"Max: {max_mb:.1f} MB")
+            if growth_mb is not None: mem_parts.append(f"Growth: {growth_mb:.1f} MB")
+            if mem_parts: mem_info_str = ", ".join(mem_parts)
+            csv_path = mem_report.get('csv_path')
+            if csv_path: console.print(f"[dim]Memory samples saved to: {csv_path}[/dim]")
+
+        console.print(f"[bold cyan]Memory Usage:[/bold cyan] {mem_info_str}")
+        console.print(f"[bold green]Results summary saved to {results['memory']['csv_path'].replace('memory_samples', 'test_summary').replace('.csv', '.json')}[/bold green]") # Infer summary path
+
+
+        if results["failed_urls"] > 0: console.print(f"\n[bold yellow]Warning: {results['failed_urls']} URLs failed to process ({100-success_rate:.1f}% failure rate)[/bold yellow]")
+        if results["urls_processed"] < results["url_count"]: console.print(f"\n[bold red]Error: Only {results['urls_processed']} out of {results['url_count']} URLs were processed![/bold red]")
+
+
+    finally:
+        # --- Stop Server / Cleanup --- (Same as before)
+        if server_started and server and not args.keep_server_alive: server.stop()
+        elif server_started and server and args.keep_server_alive:
+            console.print(f"[bold cyan]Server is kept running on port {args.port}. Press Ctrl+C to stop it.[/bold cyan]")
+            try: await asyncio.Future() # Keep running indefinitely
+            except KeyboardInterrupt: console.print("\n[bold yellow]Stopping server due to user interrupt...[/bold yellow]"); server.stop()
+
+        if site_generated and not args.keep_site: console.print(f"[yellow]Cleaning up generated site: {args.site_path}[/yellow]"); shutil.rmtree(args.site_path)
+        elif args.clean_site and os.path.exists(args.site_path): console.print(f"[yellow]Cleaning up site directory as requested: {args.site_path}[/yellow]"); shutil.rmtree(args.site_path)
+
+
+# --- main Function (Added chunk_size argument) ---
+def main():
+    """Main entry point for the script."""
+    parser = argparse.ArgumentParser(description="Crawl4AI SDK High Volume Stress Test using arun_many")
+
+    # Test parameters
+    parser.add_argument("--urls", type=int, default=DEFAULT_URL_COUNT, help=f"Number of URLs to test (default: {DEFAULT_URL_COUNT})")
+    parser.add_argument("--max-sessions", type=int, default=DEFAULT_MAX_SESSIONS, help=f"Maximum concurrent crawling sessions (default: {DEFAULT_MAX_SESSIONS})")
+    parser.add_argument("--chunk-size", type=int, default=DEFAULT_CHUNK_SIZE, help=f"Number of URLs per batch for logging (default: {DEFAULT_CHUNK_SIZE})") # Added
+    parser.add_argument("--stream", action="store_true", default=DEFAULT_STREAM_MODE, help=f"Enable streaming mode (disables batch logging) (default: {DEFAULT_STREAM_MODE})")
+    parser.add_argument("--monitor-mode", type=str, default=DEFAULT_MONITOR_MODE, choices=["DETAILED", "AGGREGATED"], help=f"Display mode for the live monitor (default: {DEFAULT_MONITOR_MODE})")
+    parser.add_argument("--use-rate-limiter", action="store_true", default=False, help="Enable a basic rate limiter (default: False)")
+
+    # Environment parameters
+    parser.add_argument("--site-path", type=str, default=DEFAULT_SITE_PATH, help=f"Path to generate/use the test site (default: {DEFAULT_SITE_PATH})")
+    parser.add_argument("--port", type=int, default=DEFAULT_PORT, help=f"Port for the local HTTP server (default: {DEFAULT_PORT})")
+    parser.add_argument("--report-path", type=str, default=DEFAULT_REPORT_PATH, help=f"Path to save reports and logs (default: {DEFAULT_REPORT_PATH})")
+
+    # Site/Server management
+    parser.add_argument("--skip-generation", action="store_true", help="Use existing test site folder without regenerating")
+    parser.add_argument("--use-existing-site", action="store_true", help="Do not generate site or start local server; assume site exists on --port")
+    parser.add_argument("--keep-server-alive", action="store_true", help="Keep the local HTTP server running after test")
+    parser.add_argument("--keep-site", action="store_true", help="Keep the generated test site files after test")
+    parser.add_argument("--clean-reports", action="store_true", help="Clean up report directory before running")
+    parser.add_argument("--clean-site", action="store_true", help="Clean up site directory before running (if generating) or after")
+
+    args = parser.parse_args()
+
+    # Display config
+    console.print("[bold underline]Crawl4AI SDK Stress Test Configuration[/bold underline]")
+    console.print(f"URLs: {args.urls}, Max Sessions: {args.max_sessions}, Chunk Size: {args.chunk_size}") # Added chunk size
+    console.print(f"Mode: {'Streaming' if args.stream else 'Batch'}, Monitor: {args.monitor_mode}, Rate Limit: {args.use_rate_limiter}")
+    console.print(f"Site Path: {args.site_path}, Port: {args.port}, Report Path: {args.report_path}")
+    console.print("-" * 40)
+    # (Rest of config display and cleanup logic is the same)
+    if args.use_existing_site: console.print("[cyan]Mode: Using existing external site/server[/cyan]")
+    elif args.skip_generation: console.print("[cyan]Mode: Using existing site files, starting local server[/cyan]")
+    else: console.print("[cyan]Mode: Generating site files, starting local server[/cyan]")
+    if args.keep_server_alive: console.print("[cyan]Option: Keep server alive after test[/cyan]")
+    if args.keep_site: console.print("[cyan]Option: Keep site files after test[/cyan]")
+    if args.clean_reports: console.print("[cyan]Option: Clean reports before test[/cyan]")
+    if args.clean_site: console.print("[cyan]Option: Clean site directory[/cyan]")
+    console.print("-" * 40)
+
+    if args.clean_reports:
+        if os.path.exists(args.report_path): console.print(f"[yellow]Cleaning up reports directory: {args.report_path}[/yellow]"); shutil.rmtree(args.report_path)
+        os.makedirs(args.report_path, exist_ok=True)
+    if args.clean_site and not args.use_existing_site:
+         if os.path.exists(args.site_path): console.print(f"[yellow]Cleaning up site directory as requested: {args.site_path}[/yellow]"); shutil.rmtree(args.site_path)
+
+    # Run
+    try: asyncio.run(run_full_test(args))
+    except KeyboardInterrupt: console.print("\n[bold yellow]Test interrupted by user.[/bold yellow]")
+    except Exception as e: console.print(f"\n[bold red]An unexpected error occurred:[/bold red] {e}"); import traceback; traceback.print_exc()
+
+if __name__ == "__main__":
+    main()
--- a/tests/profiler/test_crteate_profile.py
+++ b/tests/profiler/test_crteate_profile.py
@@ -0,0 +1,32 @@
+from crawl4ai import BrowserProfiler
+import asyncio
+
+
+if __name__ == "__main__":
+    # Example usage
+    profiler = BrowserProfiler()
+    
+    # Create a new profile
+    import os
+    from pathlib import Path
+    home_dir = Path.home()
+    profile_path = asyncio.run(profiler.create_profile( str(home_dir / ".crawl4ai/profiles/test-profile")))
+    
+    print(f"Profile created at: {profile_path}")
+
+        
+            
+    # # Launch a standalone browser
+    # asyncio.run(profiler.launch_standalone_browser())
+    
+    # # List profiles
+    # profiles = profiler.list_profiles()
+    # for profile in profiles:
+    #     print(f"Profile: {profile['name']}, Path: {profile['path']}")
+    
+    # # Delete a profile
+    # success = profiler.delete_profile("my-profile")
+    # if success:
+    #     print("Profile deleted successfully")
+    # else:
+    #     print("Failed to delete profile")
--- a/tests/test_scraping_strategy.py
+++ b/tests/test_scraping_strategy.py
@@ -19,7 +19,7 @@ async def main():
    async with AsyncWebCrawler() as crawler:
        result = await crawler.arun(url="https://example.com", config=config)
        print(f"Success: {result.success}")
-        print(f"Markdown length: {len(result.markdown_v2.raw_markdown)}")
+        print(f"Markdown length: {len(result.markdown.raw_markdown)}")


 if __name__ == "__main__":
--- a/tests/test_web_crawler.py
+++ b/tests/test_web_crawler.py
@@ -1,4 +1,5 @@
 import unittest, os
+from crawl4ai import LLMConfig
 from crawl4ai.web_crawler import WebCrawler
 from crawl4ai.chunking_strategy import (
    RegexChunking,
@@ -42,7 +43,7 @@ class TestWebCrawler(unittest.TestCase):
            word_count_threshold=5,
            chunking_strategy=FixedLengthWordChunking(chunk_size=100),
            extraction_strategy=LLMExtractionStrategy(
-                provider="openai/gpt-3.5-turbo", api_token=os.getenv("OPENAI_API_KEY")
+                llm_config=LLMConfig(provider="openai/gpt-3.5-turbo", api_token=os.getenv("OPENAI_API_KEY"))
            ),
            bypass_cache=True,
        )