Improve CDP connection handling

2026-01-31 11:07:26 +00:00
parent 0104db6de2
commit 19b9140c68
4 changed files with 653 additions and 16 deletions
--- a/crawl4ai/async_configs.py
+++ b/crawl4ai/async_configs.py
@@ -425,6 +425,15 @@ class BrowserConfig:
                                     the local Playwright client resources. Useful for cloud/server scenarios
                                     where you don't own the remote browser but need to prevent memory leaks
                                     from accumulated Playwright instances. Default: False.
+        cdp_close_delay (float): Seconds to wait after disconnecting a CDP WebSocket before stopping the
+                                 Playwright subprocess. Gives the connection time to fully release. Set to
+                                 0 to skip the delay entirely. Only applies when cdp_cleanup_on_close=True.
+                                 Default: 1.0.
+        cache_cdp_connection (bool): When True and using cdp_url, the Playwright subprocess and CDP WebSocket
+                                     are cached at the class level and shared across multiple BrowserManager
+                                     instances connecting to the same cdp_url. Reference-counted; the connection
+                                     is only closed when the last user releases it. Eliminates the overhead of
+                                     repeated Playwright/CDP setup and teardown. Default: False.
        create_isolated_context (bool): When True and using cdp_url, forces creation of a new browser context
                                        instead of reusing the default context. Essential for concurrent crawls
                                        on the same browser to prevent navigation conflicts. Default: False.
@@ -485,6 +494,8 @@ class BrowserConfig:
        browser_context_id: str = None,
        target_id: str = None,
        cdp_cleanup_on_close: bool = False,
+        cdp_close_delay: float = 1.0,
+        cache_cdp_connection: bool = False,
        create_isolated_context: bool = False,
        use_persistent_context: bool = False,
        user_data_dir: str = None,
@@ -529,6 +540,8 @@ class BrowserConfig:
        self.browser_context_id = browser_context_id
        self.target_id = target_id
        self.cdp_cleanup_on_close = cdp_cleanup_on_close
+        self.cdp_close_delay = cdp_close_delay
+        self.cache_cdp_connection = cache_cdp_connection
        self.create_isolated_context = create_isolated_context
        self.use_persistent_context = use_persistent_context
        self.user_data_dir = user_data_dir
--- a/crawl4ai/async_crawler_strategy.py
+++ b/crawl4ai/async_crawler_strategy.py
@@ -139,8 +139,9 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
        Close the browser and clean up resources.
        """
        await self.browser_manager.close()
-        # Explicitly reset the static Playwright instance
-        BrowserManager._playwright_instance = None
+        # Explicitly reset the static Playwright instance (skip if using cached CDP)
+        if not self.browser_manager._using_cached_cdp:
+            BrowserManager._playwright_instance = None

    async def kill_session(self, session_id: str):
        """
--- a/crawl4ai/browser_manager.py
+++ b/crawl4ai/browser_manager.py
@@ -1,6 +1,6 @@
 import asyncio
 import time
-from typing import List, Optional
+from typing import Dict, List, Optional, Tuple
 import os
 import sys
 import shutil
@@ -559,6 +559,91 @@ async def clone_runtime_state(



+class _CDPConnectionCache:
+    """
+    Class-level cache for Playwright + CDP browser connections.
+
+    When enabled via BrowserConfig(cache_cdp_connection=True), multiple
+    BrowserManager instances connecting to the same cdp_url will share
+    a single Playwright subprocess and CDP WebSocket. Reference-counted;
+    the connection is closed when the last user releases it.
+    """
+
+    _cache: Dict[str, Tuple] = {}  # cdp_url -> (playwright, browser, ref_count)
+    _lock: Optional[asyncio.Lock] = None  # lazy-init to avoid event loop issues
+    _lock_loop: Optional[asyncio.AbstractEventLoop] = None
+
+    @classmethod
+    def _get_lock(cls) -> asyncio.Lock:
+        loop = asyncio.get_running_loop()
+        if cls._lock is None or cls._lock_loop is not loop:
+            cls._lock = asyncio.Lock()
+            cls._lock_loop = loop
+        return cls._lock
+
+    @classmethod
+    async def acquire(cls, cdp_url: str, use_undetected: bool = False):
+        """Get or create a cached (playwright, browser) for this cdp_url."""
+        async with cls._get_lock():
+            if cdp_url in cls._cache:
+                pw, browser, count = cls._cache[cdp_url]
+                if browser.is_connected():
+                    cls._cache[cdp_url] = (pw, browser, count + 1)
+                    return pw, browser
+                # Stale connection — clean up and fall through to create new
+                try:
+                    await pw.stop()
+                except Exception:
+                    pass
+                del cls._cache[cdp_url]
+
+            # Create new connection
+            if use_undetected:
+                from patchright.async_api import async_playwright
+            else:
+                from playwright.async_api import async_playwright
+            pw = await async_playwright().start()
+            browser = await pw.chromium.connect_over_cdp(cdp_url)
+            cls._cache[cdp_url] = (pw, browser, 1)
+            return pw, browser
+
+    @classmethod
+    async def release(cls, cdp_url: str):
+        """Decrement ref count; close connection when last user releases."""
+        async with cls._get_lock():
+            if cdp_url not in cls._cache:
+                return
+            pw, browser, count = cls._cache[cdp_url]
+            if count <= 1:
+                try:
+                    await browser.close()
+                except Exception:
+                    pass
+                try:
+                    await pw.stop()
+                except Exception:
+                    pass
+                del cls._cache[cdp_url]
+            else:
+                cls._cache[cdp_url] = (pw, browser, count - 1)
+
+    @classmethod
+    async def close_all(cls):
+        """Force-close all cached connections. Call on application shutdown."""
+        async with cls._get_lock():
+            for cdp_url in list(cls._cache.keys()):
+                pw, browser, _ = cls._cache[cdp_url]
+                try:
+                    await browser.close()
+                except Exception:
+                    pass
+                try:
+                    await pw.stop()
+                except Exception:
+                    pass
+            cls._cache.clear()
+
+
 class BrowserManager:
    """
    Manages the browser instance and context.
@@ -616,6 +701,7 @@ class BrowserManager:
        self.default_context = None
        self.managed_browser = None
        self.playwright = None
+        self._using_cached_cdp = False

        # Session management
        self.sessions = {}
@@ -666,23 +752,35 @@ class BrowserManager:
        if self.playwright is not None:
            await self.close()

-        if self.use_undetected:
-            from patchright.async_api import async_playwright
+        # Use cached CDP connection if enabled and cdp_url is set
+        if self.config.cache_cdp_connection and self.config.cdp_url:
+            self._using_cached_cdp = True
+            self.config.use_managed_browser = True
+            self.playwright, self.browser = await _CDPConnectionCache.acquire(
+                self.config.cdp_url, self.use_undetected
+            )
        else:
-            from playwright.async_api import async_playwright
+            self._using_cached_cdp = False
+            if self.use_undetected:
+                from patchright.async_api import async_playwright
+            else:
+                from playwright.async_api import async_playwright

-        # Initialize playwright
-        self.playwright = await async_playwright().start()
+            # Initialize playwright
+            self.playwright = await async_playwright().start()

        if self.config.cdp_url or self.config.use_managed_browser:
            self.config.use_managed_browser = True
-            cdp_url = await self.managed_browser.start() if not self.config.cdp_url else self.config.cdp_url

-            # Add CDP endpoint verification before connecting
-            if not await self._verify_cdp_ready(cdp_url):
-                raise Exception(f"CDP endpoint at {cdp_url} is not ready after startup")
+            if not self._using_cached_cdp:
+                cdp_url = await self.managed_browser.start() if not self.config.cdp_url else self.config.cdp_url
+
+                # Add CDP endpoint verification before connecting
+                if not await self._verify_cdp_ready(cdp_url):
+                    raise Exception(f"CDP endpoint at {cdp_url} is not ready after startup")
+
+                self.browser = await self.playwright.chromium.connect_over_cdp(cdp_url)

-            self.browser = await self.playwright.chromium.connect_over_cdp(cdp_url)
            contexts = self.browser.contexts

            # If browser_context_id is provided, we're using a pre-created context
@@ -1409,6 +1507,24 @@ class BrowserManager:

    async def close(self):
        """Close all browser resources and clean up."""
+        # Cached CDP path: only clean up this instance's sessions/contexts,
+        # then release the shared connection reference.
+        if self._using_cached_cdp:
+            session_ids = list(self.sessions.keys())
+            for session_id in session_ids:
+                await self.kill_session(session_id)
+            for ctx in self.contexts_by_config.values():
+                try:
+                    await ctx.close()
+                except Exception:
+                    pass
+            self.contexts_by_config.clear()
+            await _CDPConnectionCache.release(self.config.cdp_url)
+            self.browser = None
+            self.playwright = None
+            self._using_cached_cdp = False
+            return
+
        if self.config.cdp_url:
            # When using external CDP, we don't own the browser process.
            # If cdp_cleanup_on_close is True, properly disconnect from the browser
@@ -1440,7 +1556,8 @@ class BrowserManager:
                            )
                    self.browser = None
                    # Allow time for CDP connection to fully release before another client connects
-                    await asyncio.sleep(1.0)
+                    if self.config.cdp_close_delay > 0:
+                        await asyncio.sleep(self.config.cdp_close_delay)

                # Stop Playwright instance to prevent memory leaks
                if self.playwright:
--- a/tests/test_cdp_changes.py
+++ b/tests/test_cdp_changes.py
@@ -0,0 +1,506 @@
+"""
+Tests for CDP connection caching and configurable close delay.
+
+Two test suites:
+  1. Regression — default behavior unchanged, basic CDP still works
+  2. Stress    — race conditions, parallel crawlers, locking, cache correctness
+
+All tests are real (no mocks). Requires a running Chrome on port 9222:
+    chrome --headless=new --no-sandbox --remote-debugging-port=9222
+"""
+
+import asyncio
+import time
+import pytest
+
+from crawl4ai import AsyncWebCrawler
+from crawl4ai.async_configs import BrowserConfig, CrawlerRunConfig
+from crawl4ai.browser_manager import _CDPConnectionCache, BrowserManager
+
+CDP_URL = "http://localhost:9222"
+TEST_URL = "https://example.com"
+
+
+# ---------------------------------------------------------------------------
+#  Helpers
+# ---------------------------------------------------------------------------
+
+async def _quick_crawl(browser_cfg: BrowserConfig, url: str = TEST_URL) -> bool:
+    """Run a single crawl, return True if the page loaded successfully."""
+    run_cfg = CrawlerRunConfig(
+        wait_until="domcontentloaded",
+        page_timeout=15000,
+        verbose=False,
+    )
+    async with AsyncWebCrawler(config=browser_cfg) as crawler:
+        result = await crawler.arun(url=url, config=run_cfg)
+    return result.success and result.status_code == 200
+
+
+# ===========================================================================
+#  SUITE 1 — REGRESSION (default behaviour, no new flags)
+# ===========================================================================
+
+class TestRegression:
+    """Verify nothing is broken when the new parameters keep their defaults."""
+
+    @pytest.mark.asyncio
+    async def test_default_cdp_crawl(self):
+        """Basic CDP crawl with default settings still works."""
+        cfg = BrowserConfig(cdp_url=CDP_URL, headless=True)
+        assert await _quick_crawl(cfg)
+
+    @pytest.mark.asyncio
+    async def test_default_params_values(self):
+        """BrowserConfig defaults are backward-compatible."""
+        cfg = BrowserConfig()
+        assert cfg.cdp_close_delay == 1.0
+        assert cfg.cache_cdp_connection is False
+
+    @pytest.mark.asyncio
+    async def test_cdp_cleanup_on_close_still_works(self):
+        """cdp_cleanup_on_close=True path (existing feature) still works."""
+        cfg = BrowserConfig(
+            cdp_url=CDP_URL,
+            headless=True,
+            cdp_cleanup_on_close=True,
+        )
+        assert await _quick_crawl(cfg)
+
+    @pytest.mark.asyncio
+    async def test_sequential_crawls_default(self):
+        """Two sequential crawls with default settings both succeed."""
+        cfg = BrowserConfig(cdp_url=CDP_URL, headless=True)
+        assert await _quick_crawl(cfg)
+        assert await _quick_crawl(cfg)
+
+    @pytest.mark.asyncio
+    async def test_cdp_close_delay_default_timing(self):
+        """Default close delay is ~1s (not 0, not much more)."""
+        cfg = BrowserConfig(
+            cdp_url=CDP_URL,
+            headless=True,
+            cdp_cleanup_on_close=True,
+        )
+        run_cfg = CrawlerRunConfig(
+            wait_until="domcontentloaded",
+            page_timeout=15000,
+            verbose=False,
+        )
+        t0 = time.monotonic()
+        async with AsyncWebCrawler(config=cfg) as crawler:
+            await crawler.arun(url=TEST_URL, config=run_cfg)
+        elapsed = time.monotonic() - t0
+        # The 1s sleep must be present (at least ~0.9s of close overhead)
+        assert elapsed >= 0.9, f"Close was too fast ({elapsed:.2f}s), sleep may be missing"
+
+
+# ===========================================================================
+#  SUITE 2 — STRESS (cache, parallelism, race conditions, locking)
+# ===========================================================================
+
+class TestStress:
+    """Hammer the CDP cache and configurable delay under pressure."""
+
+    # -- cdp_close_delay -------------------------------------------------------
+
+    @pytest.mark.asyncio
+    async def test_close_delay_zero_skips_sleep(self):
+        """cdp_close_delay=0 should make close noticeably faster."""
+        cfg = BrowserConfig(
+            cdp_url=CDP_URL,
+            headless=True,
+            cdp_cleanup_on_close=True,
+            cdp_close_delay=0,
+        )
+        run_cfg = CrawlerRunConfig(
+            wait_until="domcontentloaded",
+            page_timeout=15000,
+            verbose=False,
+        )
+        t0 = time.monotonic()
+        async with AsyncWebCrawler(config=cfg) as crawler:
+            await crawler.arun(url=TEST_URL, config=run_cfg)
+        elapsed = time.monotonic() - t0
+        # Without the 1s sleep the whole thing should be well under 1s of close overhead
+        assert elapsed < 5.0, f"Close too slow with delay=0 ({elapsed:.2f}s)"
+
+    @pytest.mark.asyncio
+    async def test_close_delay_custom_value(self):
+        """A custom delay value is respected."""
+        cfg = BrowserConfig(
+            cdp_url=CDP_URL,
+            headless=True,
+            cdp_cleanup_on_close=True,
+            cdp_close_delay=0.2,
+        )
+        run_cfg = CrawlerRunConfig(
+            wait_until="domcontentloaded",
+            page_timeout=15000,
+            verbose=False,
+        )
+        t0 = time.monotonic()
+        async with AsyncWebCrawler(config=cfg) as crawler:
+            await crawler.arun(url=TEST_URL, config=run_cfg)
+        elapsed = time.monotonic() - t0
+        # Should include at least the 0.2s delay
+        assert elapsed >= 0.2
+
+    # -- cache_cdp_connection: basic -----------------------------------------
+
+    @pytest.mark.asyncio
+    async def test_cache_basic_crawl(self):
+        """A single crawl with caching enabled works."""
+        cfg = BrowserConfig(
+            cdp_url=CDP_URL,
+            headless=True,
+            cache_cdp_connection=True,
+        )
+        assert await _quick_crawl(cfg)
+        # Clean up cache after test
+        await _CDPConnectionCache.close_all()
+
+    @pytest.mark.asyncio
+    async def test_cache_sequential_reuse(self):
+        """Two sequential crawlers reuse the same cached connection."""
+        cfg = BrowserConfig(
+            cdp_url=CDP_URL,
+            headless=True,
+            cache_cdp_connection=True,
+        )
+
+        # First crawl — creates the cache entry
+        async with AsyncWebCrawler(config=cfg) as crawler:
+            r1 = await crawler.arun(
+                url=TEST_URL,
+                config=CrawlerRunConfig(wait_until="domcontentloaded", page_timeout=15000, verbose=False),
+            )
+        assert r1.success
+
+        # Cache should still have an entry (ref went 1 -> 0, so it closed).
+        # But if we acquire again immediately it should create a fresh one.
+        # The key thing: this must not crash or hang.
+
+        async with AsyncWebCrawler(config=cfg) as crawler:
+            r2 = await crawler.arun(
+                url=TEST_URL,
+                config=CrawlerRunConfig(wait_until="domcontentloaded", page_timeout=15000, verbose=False),
+            )
+        assert r2.success
+
+        await _CDPConnectionCache.close_all()
+
+    @pytest.mark.asyncio
+    async def test_cache_overlapping_instances(self):
+        """Two crawlers alive at the same time share the cache (ref_count=2)."""
+        cfg = BrowserConfig(
+            cdp_url=CDP_URL,
+            headless=True,
+            cache_cdp_connection=True,
+        )
+        run_cfg = CrawlerRunConfig(
+            wait_until="domcontentloaded",
+            page_timeout=15000,
+            verbose=False,
+        )
+
+        crawler1 = AsyncWebCrawler(config=cfg)
+        await crawler1.start()
+
+        crawler2 = AsyncWebCrawler(config=cfg)
+        await crawler2.start()
+
+        # Both should work concurrently
+        r1, r2 = await asyncio.gather(
+            crawler1.arun(url=TEST_URL, config=run_cfg),
+            crawler2.arun(url=TEST_URL, config=run_cfg),
+        )
+        assert r1.success
+        assert r2.success
+
+        # Close one — connection must survive for the other
+        await crawler1.close()
+
+        # Second crawler should still work after first closed
+        r3 = await crawler2.arun(url=TEST_URL, config=run_cfg)
+        assert r3.success
+
+        await crawler2.close()
+        await _CDPConnectionCache.close_all()
+
+    # -- cache: ref counting -------------------------------------------------
+
+    @pytest.mark.asyncio
+    async def test_cache_ref_count_lifecycle(self):
+        """Verify ref count goes up and comes back down correctly."""
+        cfg = BrowserConfig(
+            cdp_url=CDP_URL,
+            headless=True,
+            cache_cdp_connection=True,
+        )
+
+        c1 = AsyncWebCrawler(config=cfg)
+        await c1.start()
+        # Cache should have ref_count=1
+        assert CDP_URL in _CDPConnectionCache._cache
+        _, _, count = _CDPConnectionCache._cache[CDP_URL]
+        assert count == 1
+
+        c2 = AsyncWebCrawler(config=cfg)
+        await c2.start()
+        _, _, count = _CDPConnectionCache._cache[CDP_URL]
+        assert count == 2
+
+        await c1.close()
+        _, _, count = _CDPConnectionCache._cache[CDP_URL]
+        assert count == 1
+
+        await c2.close()
+        # Last reference released — entry should be gone
+        assert CDP_URL not in _CDPConnectionCache._cache
+
+    # -- cache: speed benefit ------------------------------------------------
+
+    @pytest.mark.asyncio
+    async def test_cache_faster_than_uncached(self):
+        """Cached sequential crawls should be faster than uncached."""
+        run_cfg = CrawlerRunConfig(
+            wait_until="domcontentloaded",
+            page_timeout=15000,
+            verbose=False,
+        )
+
+        # Uncached: two sequential crawls (each does full playwright start/stop)
+        cfg_no_cache = BrowserConfig(
+            cdp_url=CDP_URL,
+            headless=True,
+            cdp_cleanup_on_close=True,
+            cdp_close_delay=0.5,
+        )
+        t0 = time.monotonic()
+        for _ in range(2):
+            async with AsyncWebCrawler(config=cfg_no_cache) as crawler:
+                await crawler.arun(url=TEST_URL, config=run_cfg)
+        uncached_time = time.monotonic() - t0
+
+        # Cached: two sequential crawls (share playwright/CDP)
+        cfg_cache = BrowserConfig(
+            cdp_url=CDP_URL,
+            headless=True,
+            cache_cdp_connection=True,
+        )
+        t0 = time.monotonic()
+        for _ in range(2):
+            async with AsyncWebCrawler(config=cfg_cache) as crawler:
+                await crawler.arun(url=TEST_URL, config=run_cfg)
+        cached_time = time.monotonic() - t0
+
+        await _CDPConnectionCache.close_all()
+
+        # Cached should be faster (the uncached has 2x 0.5s delay alone)
+        assert cached_time < uncached_time, (
+            f"Cached ({cached_time:.2f}s) was not faster than uncached ({uncached_time:.2f}s)"
+        )
+
+    # -- parallel stress -----------------------------------------------------
+
+    @pytest.mark.asyncio
+    async def test_parallel_cached_crawlers(self):
+        """Launch 5 crawlers in parallel, all sharing the cache."""
+        cfg = BrowserConfig(
+            cdp_url=CDP_URL,
+            headless=True,
+            cache_cdp_connection=True,
+            create_isolated_context=True,
+        )
+        run_cfg = CrawlerRunConfig(
+            wait_until="domcontentloaded",
+            page_timeout=20000,
+            verbose=False,
+        )
+
+        async def crawl_one(idx: int):
+            async with AsyncWebCrawler(config=cfg) as crawler:
+                result = await crawler.arun(url=TEST_URL, config=run_cfg)
+            return idx, result.success
+
+        results = await asyncio.gather(*[crawl_one(i) for i in range(5)])
+        for idx, success in results:
+            assert success, f"Crawler {idx} failed"
+
+        await _CDPConnectionCache.close_all()
+
+    @pytest.mark.asyncio
+    async def test_parallel_mixed_cached_uncached(self):
+        """Mix of cached and uncached crawlers running in parallel.
+
+        Uses create_isolated_context=True because parallel crawlers sharing
+        a single default context will cause navigation conflicts — this is
+        the expected pattern for concurrent CDP access.
+        """
+        run_cfg = CrawlerRunConfig(
+            wait_until="domcontentloaded",
+            page_timeout=20000,
+            verbose=False,
+        )
+
+        async def crawl_cached():
+            cfg = BrowserConfig(
+                cdp_url=CDP_URL, headless=True,
+                cache_cdp_connection=True, create_isolated_context=True,
+            )
+            async with AsyncWebCrawler(config=cfg) as crawler:
+                r = await crawler.arun(url=TEST_URL, config=run_cfg)
+            return "cached", r.success
+
+        async def crawl_uncached():
+            cfg = BrowserConfig(
+                cdp_url=CDP_URL, headless=True,
+                create_isolated_context=True,
+            )
+            async with AsyncWebCrawler(config=cfg) as crawler:
+                r = await crawler.arun(url=TEST_URL, config=run_cfg)
+            return "uncached", r.success
+
+        tasks = [crawl_cached(), crawl_uncached(), crawl_cached(), crawl_uncached()]
+        results = await asyncio.gather(*tasks)
+        for label, success in results:
+            assert success, f"{label} crawler failed"
+
+        await _CDPConnectionCache.close_all()
+
+    @pytest.mark.asyncio
+    async def test_rapid_open_close_cache(self):
+        """Rapidly open and close 10 crawlers sequentially — no leaks/hangs."""
+        cfg = BrowserConfig(
+            cdp_url=CDP_URL,
+            headless=True,
+            cache_cdp_connection=True,
+        )
+        run_cfg = CrawlerRunConfig(
+            wait_until="domcontentloaded",
+            page_timeout=15000,
+            verbose=False,
+        )
+
+        for i in range(10):
+            async with AsyncWebCrawler(config=cfg) as crawler:
+                result = await crawler.arun(url=TEST_URL, config=run_cfg)
+                assert result.success, f"Iteration {i} failed"
+
+        await _CDPConnectionCache.close_all()
+
+    @pytest.mark.asyncio
+    async def test_cache_close_all_idempotent(self):
+        """Calling close_all() multiple times doesn't crash."""
+        cfg = BrowserConfig(
+            cdp_url=CDP_URL,
+            headless=True,
+            cache_cdp_connection=True,
+        )
+        async with AsyncWebCrawler(config=cfg) as crawler:
+            r = await crawler.arun(
+                url=TEST_URL,
+                config=CrawlerRunConfig(wait_until="domcontentloaded", page_timeout=15000, verbose=False),
+            )
+            assert r.success
+
+        await _CDPConnectionCache.close_all()
+        await _CDPConnectionCache.close_all()  # second call must not raise
+        await _CDPConnectionCache.close_all()  # third call must not raise
+
+    @pytest.mark.asyncio
+    async def test_stale_connection_recovery(self):
+        """If the cached browser disconnects, next acquire recovers."""
+        cfg = BrowserConfig(
+            cdp_url=CDP_URL,
+            headless=True,
+            cache_cdp_connection=True,
+        )
+        run_cfg = CrawlerRunConfig(
+            wait_until="domcontentloaded",
+            page_timeout=15000,
+            verbose=False,
+        )
+
+        # Build up a cache entry
+        c1 = AsyncWebCrawler(config=cfg)
+        await c1.start()
+
+        # Forcibly disconnect the cached browser to simulate staleness
+        if CDP_URL in _CDPConnectionCache._cache:
+            _, browser, _ = _CDPConnectionCache._cache[CDP_URL]
+            try:
+                await browser.close()
+            except Exception:
+                pass
+
+        await c1.close()
+
+        # Next crawler should detect stale and create a fresh connection
+        async with AsyncWebCrawler(config=cfg) as crawler:
+            result = await crawler.arun(url=TEST_URL, config=run_cfg)
+            assert result.success
+
+        await _CDPConnectionCache.close_all()
+
+    @pytest.mark.asyncio
+    async def test_parallel_start_race(self):
+        """Multiple crawlers calling start() simultaneously — lock prevents races."""
+        cfg = BrowserConfig(
+            cdp_url=CDP_URL,
+            headless=True,
+            cache_cdp_connection=True,
+        )
+
+        crawlers = [AsyncWebCrawler(config=cfg) for _ in range(5)]
+
+        # Start all at once — this hammers _CDPConnectionCache.acquire() concurrently
+        await asyncio.gather(*[c.start() for c in crawlers])
+
+        # All should have the same browser reference
+        browsers = set()
+        for c in crawlers:
+            bm = c.crawler_strategy.browser_manager
+            browsers.add(id(bm.browser))
+
+        # With caching, they should all share the same browser object
+        assert len(browsers) == 1, f"Expected 1 shared browser, got {len(browsers)}"
+
+        # Ref count should be 5
+        _, _, count = _CDPConnectionCache._cache[CDP_URL]
+        assert count == 5
+
+        # Close all
+        await asyncio.gather(*[c.close() for c in crawlers])
+
+        # Cache should be empty
+        assert CDP_URL not in _CDPConnectionCache._cache
+
+    @pytest.mark.asyncio
+    async def test_parallel_close_race(self):
+        """Multiple crawlers closing simultaneously — no double-free."""
+        cfg = BrowserConfig(
+            cdp_url=CDP_URL,
+            headless=True,
+            cache_cdp_connection=True,
+        )
+
+        crawlers = [AsyncWebCrawler(config=cfg) for _ in range(5)]
+        await asyncio.gather(*[c.start() for c in crawlers])
+
+        # Close all at once — hammers _CDPConnectionCache.release() concurrently
+        await asyncio.gather(*[c.close() for c in crawlers])
+
+        # Cache must be clean
+        assert CDP_URL not in _CDPConnectionCache._cache
+
+        # Must still work after everything is closed
+        async with AsyncWebCrawler(config=cfg) as crawler:
+            r = await crawler.arun(
+                url=TEST_URL,
+                config=CrawlerRunConfig(wait_until="domcontentloaded", page_timeout=15000, verbose=False),
+            )
+            assert r.success
+
+        await _CDPConnectionCache.close_all()