From 19b9140c68d876462a64ae18e0e18029bd56be52 Mon Sep 17 00:00:00 2001 From: unclecode Date: Sat, 31 Jan 2026 11:07:26 +0000 Subject: [PATCH] Improve CDP connection handling --- crawl4ai/async_configs.py | 13 + crawl4ai/async_crawler_strategy.py | 5 +- crawl4ai/browser_manager.py | 145 ++++++++- tests/test_cdp_changes.py | 506 +++++++++++++++++++++++++++++ 4 files changed, 653 insertions(+), 16 deletions(-) create mode 100644 tests/test_cdp_changes.py diff --git a/crawl4ai/async_configs.py b/crawl4ai/async_configs.py index 94113b89..d3a4d98a 100644 --- a/crawl4ai/async_configs.py +++ b/crawl4ai/async_configs.py @@ -425,6 +425,15 @@ class BrowserConfig: the local Playwright client resources. Useful for cloud/server scenarios where you don't own the remote browser but need to prevent memory leaks from accumulated Playwright instances. Default: False. + cdp_close_delay (float): Seconds to wait after disconnecting a CDP WebSocket before stopping the + Playwright subprocess. Gives the connection time to fully release. Set to + 0 to skip the delay entirely. Only applies when cdp_cleanup_on_close=True. + Default: 1.0. + cache_cdp_connection (bool): When True and using cdp_url, the Playwright subprocess and CDP WebSocket + are cached at the class level and shared across multiple BrowserManager + instances connecting to the same cdp_url. Reference-counted; the connection + is only closed when the last user releases it. Eliminates the overhead of + repeated Playwright/CDP setup and teardown. Default: False. create_isolated_context (bool): When True and using cdp_url, forces creation of a new browser context instead of reusing the default context. Essential for concurrent crawls on the same browser to prevent navigation conflicts. Default: False. @@ -485,6 +494,8 @@ class BrowserConfig: browser_context_id: str = None, target_id: str = None, cdp_cleanup_on_close: bool = False, + cdp_close_delay: float = 1.0, + cache_cdp_connection: bool = False, create_isolated_context: bool = False, use_persistent_context: bool = False, user_data_dir: str = None, @@ -529,6 +540,8 @@ class BrowserConfig: self.browser_context_id = browser_context_id self.target_id = target_id self.cdp_cleanup_on_close = cdp_cleanup_on_close + self.cdp_close_delay = cdp_close_delay + self.cache_cdp_connection = cache_cdp_connection self.create_isolated_context = create_isolated_context self.use_persistent_context = use_persistent_context self.user_data_dir = user_data_dir diff --git a/crawl4ai/async_crawler_strategy.py b/crawl4ai/async_crawler_strategy.py index 0ebacd5a..a61eff39 100644 --- a/crawl4ai/async_crawler_strategy.py +++ b/crawl4ai/async_crawler_strategy.py @@ -139,8 +139,9 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy): Close the browser and clean up resources. """ await self.browser_manager.close() - # Explicitly reset the static Playwright instance - BrowserManager._playwright_instance = None + # Explicitly reset the static Playwright instance (skip if using cached CDP) + if not self.browser_manager._using_cached_cdp: + BrowserManager._playwright_instance = None async def kill_session(self, session_id: str): """ diff --git a/crawl4ai/browser_manager.py b/crawl4ai/browser_manager.py index 1f84bdc3..90337c02 100644 --- a/crawl4ai/browser_manager.py +++ b/crawl4ai/browser_manager.py @@ -1,6 +1,6 @@ import asyncio import time -from typing import List, Optional +from typing import Dict, List, Optional, Tuple import os import sys import shutil @@ -559,6 +559,91 @@ async def clone_runtime_state( +class _CDPConnectionCache: + """ + Class-level cache for Playwright + CDP browser connections. + + When enabled via BrowserConfig(cache_cdp_connection=True), multiple + BrowserManager instances connecting to the same cdp_url will share + a single Playwright subprocess and CDP WebSocket. Reference-counted; + the connection is closed when the last user releases it. + """ + + _cache: Dict[str, Tuple] = {} # cdp_url -> (playwright, browser, ref_count) + _lock: Optional[asyncio.Lock] = None # lazy-init to avoid event loop issues + _lock_loop: Optional[asyncio.AbstractEventLoop] = None + + @classmethod + def _get_lock(cls) -> asyncio.Lock: + loop = asyncio.get_running_loop() + if cls._lock is None or cls._lock_loop is not loop: + cls._lock = asyncio.Lock() + cls._lock_loop = loop + return cls._lock + + @classmethod + async def acquire(cls, cdp_url: str, use_undetected: bool = False): + """Get or create a cached (playwright, browser) for this cdp_url.""" + async with cls._get_lock(): + if cdp_url in cls._cache: + pw, browser, count = cls._cache[cdp_url] + if browser.is_connected(): + cls._cache[cdp_url] = (pw, browser, count + 1) + return pw, browser + # Stale connection — clean up and fall through to create new + try: + await pw.stop() + except Exception: + pass + del cls._cache[cdp_url] + + # Create new connection + if use_undetected: + from patchright.async_api import async_playwright + else: + from playwright.async_api import async_playwright + pw = await async_playwright().start() + browser = await pw.chromium.connect_over_cdp(cdp_url) + cls._cache[cdp_url] = (pw, browser, 1) + return pw, browser + + @classmethod + async def release(cls, cdp_url: str): + """Decrement ref count; close connection when last user releases.""" + async with cls._get_lock(): + if cdp_url not in cls._cache: + return + pw, browser, count = cls._cache[cdp_url] + if count <= 1: + try: + await browser.close() + except Exception: + pass + try: + await pw.stop() + except Exception: + pass + del cls._cache[cdp_url] + else: + cls._cache[cdp_url] = (pw, browser, count - 1) + + @classmethod + async def close_all(cls): + """Force-close all cached connections. Call on application shutdown.""" + async with cls._get_lock(): + for cdp_url in list(cls._cache.keys()): + pw, browser, _ = cls._cache[cdp_url] + try: + await browser.close() + except Exception: + pass + try: + await pw.stop() + except Exception: + pass + cls._cache.clear() + + class BrowserManager: """ Manages the browser instance and context. @@ -616,6 +701,7 @@ class BrowserManager: self.default_context = None self.managed_browser = None self.playwright = None + self._using_cached_cdp = False # Session management self.sessions = {} @@ -665,24 +751,36 @@ class BrowserManager: """ if self.playwright is not None: await self.close() - - if self.use_undetected: - from patchright.async_api import async_playwright - else: - from playwright.async_api import async_playwright - # Initialize playwright - self.playwright = await async_playwright().start() + # Use cached CDP connection if enabled and cdp_url is set + if self.config.cache_cdp_connection and self.config.cdp_url: + self._using_cached_cdp = True + self.config.use_managed_browser = True + self.playwright, self.browser = await _CDPConnectionCache.acquire( + self.config.cdp_url, self.use_undetected + ) + else: + self._using_cached_cdp = False + if self.use_undetected: + from patchright.async_api import async_playwright + else: + from playwright.async_api import async_playwright + + # Initialize playwright + self.playwright = await async_playwright().start() if self.config.cdp_url or self.config.use_managed_browser: self.config.use_managed_browser = True - cdp_url = await self.managed_browser.start() if not self.config.cdp_url else self.config.cdp_url - # Add CDP endpoint verification before connecting - if not await self._verify_cdp_ready(cdp_url): - raise Exception(f"CDP endpoint at {cdp_url} is not ready after startup") + if not self._using_cached_cdp: + cdp_url = await self.managed_browser.start() if not self.config.cdp_url else self.config.cdp_url + + # Add CDP endpoint verification before connecting + if not await self._verify_cdp_ready(cdp_url): + raise Exception(f"CDP endpoint at {cdp_url} is not ready after startup") + + self.browser = await self.playwright.chromium.connect_over_cdp(cdp_url) - self.browser = await self.playwright.chromium.connect_over_cdp(cdp_url) contexts = self.browser.contexts # If browser_context_id is provided, we're using a pre-created context @@ -1409,6 +1507,24 @@ class BrowserManager: async def close(self): """Close all browser resources and clean up.""" + # Cached CDP path: only clean up this instance's sessions/contexts, + # then release the shared connection reference. + if self._using_cached_cdp: + session_ids = list(self.sessions.keys()) + for session_id in session_ids: + await self.kill_session(session_id) + for ctx in self.contexts_by_config.values(): + try: + await ctx.close() + except Exception: + pass + self.contexts_by_config.clear() + await _CDPConnectionCache.release(self.config.cdp_url) + self.browser = None + self.playwright = None + self._using_cached_cdp = False + return + if self.config.cdp_url: # When using external CDP, we don't own the browser process. # If cdp_cleanup_on_close is True, properly disconnect from the browser @@ -1440,7 +1556,8 @@ class BrowserManager: ) self.browser = None # Allow time for CDP connection to fully release before another client connects - await asyncio.sleep(1.0) + if self.config.cdp_close_delay > 0: + await asyncio.sleep(self.config.cdp_close_delay) # Stop Playwright instance to prevent memory leaks if self.playwright: diff --git a/tests/test_cdp_changes.py b/tests/test_cdp_changes.py new file mode 100644 index 00000000..a9b82337 --- /dev/null +++ b/tests/test_cdp_changes.py @@ -0,0 +1,506 @@ +""" +Tests for CDP connection caching and configurable close delay. + +Two test suites: + 1. Regression — default behavior unchanged, basic CDP still works + 2. Stress — race conditions, parallel crawlers, locking, cache correctness + +All tests are real (no mocks). Requires a running Chrome on port 9222: + chrome --headless=new --no-sandbox --remote-debugging-port=9222 +""" + +import asyncio +import time +import pytest + +from crawl4ai import AsyncWebCrawler +from crawl4ai.async_configs import BrowserConfig, CrawlerRunConfig +from crawl4ai.browser_manager import _CDPConnectionCache, BrowserManager + +CDP_URL = "http://localhost:9222" +TEST_URL = "https://example.com" + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +async def _quick_crawl(browser_cfg: BrowserConfig, url: str = TEST_URL) -> bool: + """Run a single crawl, return True if the page loaded successfully.""" + run_cfg = CrawlerRunConfig( + wait_until="domcontentloaded", + page_timeout=15000, + verbose=False, + ) + async with AsyncWebCrawler(config=browser_cfg) as crawler: + result = await crawler.arun(url=url, config=run_cfg) + return result.success and result.status_code == 200 + + +# =========================================================================== +# SUITE 1 — REGRESSION (default behaviour, no new flags) +# =========================================================================== + +class TestRegression: + """Verify nothing is broken when the new parameters keep their defaults.""" + + @pytest.mark.asyncio + async def test_default_cdp_crawl(self): + """Basic CDP crawl with default settings still works.""" + cfg = BrowserConfig(cdp_url=CDP_URL, headless=True) + assert await _quick_crawl(cfg) + + @pytest.mark.asyncio + async def test_default_params_values(self): + """BrowserConfig defaults are backward-compatible.""" + cfg = BrowserConfig() + assert cfg.cdp_close_delay == 1.0 + assert cfg.cache_cdp_connection is False + + @pytest.mark.asyncio + async def test_cdp_cleanup_on_close_still_works(self): + """cdp_cleanup_on_close=True path (existing feature) still works.""" + cfg = BrowserConfig( + cdp_url=CDP_URL, + headless=True, + cdp_cleanup_on_close=True, + ) + assert await _quick_crawl(cfg) + + @pytest.mark.asyncio + async def test_sequential_crawls_default(self): + """Two sequential crawls with default settings both succeed.""" + cfg = BrowserConfig(cdp_url=CDP_URL, headless=True) + assert await _quick_crawl(cfg) + assert await _quick_crawl(cfg) + + @pytest.mark.asyncio + async def test_cdp_close_delay_default_timing(self): + """Default close delay is ~1s (not 0, not much more).""" + cfg = BrowserConfig( + cdp_url=CDP_URL, + headless=True, + cdp_cleanup_on_close=True, + ) + run_cfg = CrawlerRunConfig( + wait_until="domcontentloaded", + page_timeout=15000, + verbose=False, + ) + t0 = time.monotonic() + async with AsyncWebCrawler(config=cfg) as crawler: + await crawler.arun(url=TEST_URL, config=run_cfg) + elapsed = time.monotonic() - t0 + # The 1s sleep must be present (at least ~0.9s of close overhead) + assert elapsed >= 0.9, f"Close was too fast ({elapsed:.2f}s), sleep may be missing" + + +# =========================================================================== +# SUITE 2 — STRESS (cache, parallelism, race conditions, locking) +# =========================================================================== + +class TestStress: + """Hammer the CDP cache and configurable delay under pressure.""" + + # -- cdp_close_delay ------------------------------------------------------- + + @pytest.mark.asyncio + async def test_close_delay_zero_skips_sleep(self): + """cdp_close_delay=0 should make close noticeably faster.""" + cfg = BrowserConfig( + cdp_url=CDP_URL, + headless=True, + cdp_cleanup_on_close=True, + cdp_close_delay=0, + ) + run_cfg = CrawlerRunConfig( + wait_until="domcontentloaded", + page_timeout=15000, + verbose=False, + ) + t0 = time.monotonic() + async with AsyncWebCrawler(config=cfg) as crawler: + await crawler.arun(url=TEST_URL, config=run_cfg) + elapsed = time.monotonic() - t0 + # Without the 1s sleep the whole thing should be well under 1s of close overhead + assert elapsed < 5.0, f"Close too slow with delay=0 ({elapsed:.2f}s)" + + @pytest.mark.asyncio + async def test_close_delay_custom_value(self): + """A custom delay value is respected.""" + cfg = BrowserConfig( + cdp_url=CDP_URL, + headless=True, + cdp_cleanup_on_close=True, + cdp_close_delay=0.2, + ) + run_cfg = CrawlerRunConfig( + wait_until="domcontentloaded", + page_timeout=15000, + verbose=False, + ) + t0 = time.monotonic() + async with AsyncWebCrawler(config=cfg) as crawler: + await crawler.arun(url=TEST_URL, config=run_cfg) + elapsed = time.monotonic() - t0 + # Should include at least the 0.2s delay + assert elapsed >= 0.2 + + # -- cache_cdp_connection: basic ----------------------------------------- + + @pytest.mark.asyncio + async def test_cache_basic_crawl(self): + """A single crawl with caching enabled works.""" + cfg = BrowserConfig( + cdp_url=CDP_URL, + headless=True, + cache_cdp_connection=True, + ) + assert await _quick_crawl(cfg) + # Clean up cache after test + await _CDPConnectionCache.close_all() + + @pytest.mark.asyncio + async def test_cache_sequential_reuse(self): + """Two sequential crawlers reuse the same cached connection.""" + cfg = BrowserConfig( + cdp_url=CDP_URL, + headless=True, + cache_cdp_connection=True, + ) + + # First crawl — creates the cache entry + async with AsyncWebCrawler(config=cfg) as crawler: + r1 = await crawler.arun( + url=TEST_URL, + config=CrawlerRunConfig(wait_until="domcontentloaded", page_timeout=15000, verbose=False), + ) + assert r1.success + + # Cache should still have an entry (ref went 1 -> 0, so it closed). + # But if we acquire again immediately it should create a fresh one. + # The key thing: this must not crash or hang. + + async with AsyncWebCrawler(config=cfg) as crawler: + r2 = await crawler.arun( + url=TEST_URL, + config=CrawlerRunConfig(wait_until="domcontentloaded", page_timeout=15000, verbose=False), + ) + assert r2.success + + await _CDPConnectionCache.close_all() + + @pytest.mark.asyncio + async def test_cache_overlapping_instances(self): + """Two crawlers alive at the same time share the cache (ref_count=2).""" + cfg = BrowserConfig( + cdp_url=CDP_URL, + headless=True, + cache_cdp_connection=True, + ) + run_cfg = CrawlerRunConfig( + wait_until="domcontentloaded", + page_timeout=15000, + verbose=False, + ) + + crawler1 = AsyncWebCrawler(config=cfg) + await crawler1.start() + + crawler2 = AsyncWebCrawler(config=cfg) + await crawler2.start() + + # Both should work concurrently + r1, r2 = await asyncio.gather( + crawler1.arun(url=TEST_URL, config=run_cfg), + crawler2.arun(url=TEST_URL, config=run_cfg), + ) + assert r1.success + assert r2.success + + # Close one — connection must survive for the other + await crawler1.close() + + # Second crawler should still work after first closed + r3 = await crawler2.arun(url=TEST_URL, config=run_cfg) + assert r3.success + + await crawler2.close() + await _CDPConnectionCache.close_all() + + # -- cache: ref counting ------------------------------------------------- + + @pytest.mark.asyncio + async def test_cache_ref_count_lifecycle(self): + """Verify ref count goes up and comes back down correctly.""" + cfg = BrowserConfig( + cdp_url=CDP_URL, + headless=True, + cache_cdp_connection=True, + ) + + c1 = AsyncWebCrawler(config=cfg) + await c1.start() + # Cache should have ref_count=1 + assert CDP_URL in _CDPConnectionCache._cache + _, _, count = _CDPConnectionCache._cache[CDP_URL] + assert count == 1 + + c2 = AsyncWebCrawler(config=cfg) + await c2.start() + _, _, count = _CDPConnectionCache._cache[CDP_URL] + assert count == 2 + + await c1.close() + _, _, count = _CDPConnectionCache._cache[CDP_URL] + assert count == 1 + + await c2.close() + # Last reference released — entry should be gone + assert CDP_URL not in _CDPConnectionCache._cache + + # -- cache: speed benefit ------------------------------------------------ + + @pytest.mark.asyncio + async def test_cache_faster_than_uncached(self): + """Cached sequential crawls should be faster than uncached.""" + run_cfg = CrawlerRunConfig( + wait_until="domcontentloaded", + page_timeout=15000, + verbose=False, + ) + + # Uncached: two sequential crawls (each does full playwright start/stop) + cfg_no_cache = BrowserConfig( + cdp_url=CDP_URL, + headless=True, + cdp_cleanup_on_close=True, + cdp_close_delay=0.5, + ) + t0 = time.monotonic() + for _ in range(2): + async with AsyncWebCrawler(config=cfg_no_cache) as crawler: + await crawler.arun(url=TEST_URL, config=run_cfg) + uncached_time = time.monotonic() - t0 + + # Cached: two sequential crawls (share playwright/CDP) + cfg_cache = BrowserConfig( + cdp_url=CDP_URL, + headless=True, + cache_cdp_connection=True, + ) + t0 = time.monotonic() + for _ in range(2): + async with AsyncWebCrawler(config=cfg_cache) as crawler: + await crawler.arun(url=TEST_URL, config=run_cfg) + cached_time = time.monotonic() - t0 + + await _CDPConnectionCache.close_all() + + # Cached should be faster (the uncached has 2x 0.5s delay alone) + assert cached_time < uncached_time, ( + f"Cached ({cached_time:.2f}s) was not faster than uncached ({uncached_time:.2f}s)" + ) + + # -- parallel stress ----------------------------------------------------- + + @pytest.mark.asyncio + async def test_parallel_cached_crawlers(self): + """Launch 5 crawlers in parallel, all sharing the cache.""" + cfg = BrowserConfig( + cdp_url=CDP_URL, + headless=True, + cache_cdp_connection=True, + create_isolated_context=True, + ) + run_cfg = CrawlerRunConfig( + wait_until="domcontentloaded", + page_timeout=20000, + verbose=False, + ) + + async def crawl_one(idx: int): + async with AsyncWebCrawler(config=cfg) as crawler: + result = await crawler.arun(url=TEST_URL, config=run_cfg) + return idx, result.success + + results = await asyncio.gather(*[crawl_one(i) for i in range(5)]) + for idx, success in results: + assert success, f"Crawler {idx} failed" + + await _CDPConnectionCache.close_all() + + @pytest.mark.asyncio + async def test_parallel_mixed_cached_uncached(self): + """Mix of cached and uncached crawlers running in parallel. + + Uses create_isolated_context=True because parallel crawlers sharing + a single default context will cause navigation conflicts — this is + the expected pattern for concurrent CDP access. + """ + run_cfg = CrawlerRunConfig( + wait_until="domcontentloaded", + page_timeout=20000, + verbose=False, + ) + + async def crawl_cached(): + cfg = BrowserConfig( + cdp_url=CDP_URL, headless=True, + cache_cdp_connection=True, create_isolated_context=True, + ) + async with AsyncWebCrawler(config=cfg) as crawler: + r = await crawler.arun(url=TEST_URL, config=run_cfg) + return "cached", r.success + + async def crawl_uncached(): + cfg = BrowserConfig( + cdp_url=CDP_URL, headless=True, + create_isolated_context=True, + ) + async with AsyncWebCrawler(config=cfg) as crawler: + r = await crawler.arun(url=TEST_URL, config=run_cfg) + return "uncached", r.success + + tasks = [crawl_cached(), crawl_uncached(), crawl_cached(), crawl_uncached()] + results = await asyncio.gather(*tasks) + for label, success in results: + assert success, f"{label} crawler failed" + + await _CDPConnectionCache.close_all() + + @pytest.mark.asyncio + async def test_rapid_open_close_cache(self): + """Rapidly open and close 10 crawlers sequentially — no leaks/hangs.""" + cfg = BrowserConfig( + cdp_url=CDP_URL, + headless=True, + cache_cdp_connection=True, + ) + run_cfg = CrawlerRunConfig( + wait_until="domcontentloaded", + page_timeout=15000, + verbose=False, + ) + + for i in range(10): + async with AsyncWebCrawler(config=cfg) as crawler: + result = await crawler.arun(url=TEST_URL, config=run_cfg) + assert result.success, f"Iteration {i} failed" + + await _CDPConnectionCache.close_all() + + @pytest.mark.asyncio + async def test_cache_close_all_idempotent(self): + """Calling close_all() multiple times doesn't crash.""" + cfg = BrowserConfig( + cdp_url=CDP_URL, + headless=True, + cache_cdp_connection=True, + ) + async with AsyncWebCrawler(config=cfg) as crawler: + r = await crawler.arun( + url=TEST_URL, + config=CrawlerRunConfig(wait_until="domcontentloaded", page_timeout=15000, verbose=False), + ) + assert r.success + + await _CDPConnectionCache.close_all() + await _CDPConnectionCache.close_all() # second call must not raise + await _CDPConnectionCache.close_all() # third call must not raise + + @pytest.mark.asyncio + async def test_stale_connection_recovery(self): + """If the cached browser disconnects, next acquire recovers.""" + cfg = BrowserConfig( + cdp_url=CDP_URL, + headless=True, + cache_cdp_connection=True, + ) + run_cfg = CrawlerRunConfig( + wait_until="domcontentloaded", + page_timeout=15000, + verbose=False, + ) + + # Build up a cache entry + c1 = AsyncWebCrawler(config=cfg) + await c1.start() + + # Forcibly disconnect the cached browser to simulate staleness + if CDP_URL in _CDPConnectionCache._cache: + _, browser, _ = _CDPConnectionCache._cache[CDP_URL] + try: + await browser.close() + except Exception: + pass + + await c1.close() + + # Next crawler should detect stale and create a fresh connection + async with AsyncWebCrawler(config=cfg) as crawler: + result = await crawler.arun(url=TEST_URL, config=run_cfg) + assert result.success + + await _CDPConnectionCache.close_all() + + @pytest.mark.asyncio + async def test_parallel_start_race(self): + """Multiple crawlers calling start() simultaneously — lock prevents races.""" + cfg = BrowserConfig( + cdp_url=CDP_URL, + headless=True, + cache_cdp_connection=True, + ) + + crawlers = [AsyncWebCrawler(config=cfg) for _ in range(5)] + + # Start all at once — this hammers _CDPConnectionCache.acquire() concurrently + await asyncio.gather(*[c.start() for c in crawlers]) + + # All should have the same browser reference + browsers = set() + for c in crawlers: + bm = c.crawler_strategy.browser_manager + browsers.add(id(bm.browser)) + + # With caching, they should all share the same browser object + assert len(browsers) == 1, f"Expected 1 shared browser, got {len(browsers)}" + + # Ref count should be 5 + _, _, count = _CDPConnectionCache._cache[CDP_URL] + assert count == 5 + + # Close all + await asyncio.gather(*[c.close() for c in crawlers]) + + # Cache should be empty + assert CDP_URL not in _CDPConnectionCache._cache + + @pytest.mark.asyncio + async def test_parallel_close_race(self): + """Multiple crawlers closing simultaneously — no double-free.""" + cfg = BrowserConfig( + cdp_url=CDP_URL, + headless=True, + cache_cdp_connection=True, + ) + + crawlers = [AsyncWebCrawler(config=cfg) for _ in range(5)] + await asyncio.gather(*[c.start() for c in crawlers]) + + # Close all at once — hammers _CDPConnectionCache.release() concurrently + await asyncio.gather(*[c.close() for c in crawlers]) + + # Cache must be clean + assert CDP_URL not in _CDPConnectionCache._cache + + # Must still work after everything is closed + async with AsyncWebCrawler(config=cfg) as crawler: + r = await crawler.arun( + url=TEST_URL, + config=CrawlerRunConfig(wait_until="domcontentloaded", page_timeout=15000, verbose=False), + ) + assert r.success + + await _CDPConnectionCache.close_all()