Fix browser context memory leak — signature shrink + LRU eviction (#943)

contexts_by_config accumulated browser contexts unboundedly in long-running crawlers (Docker API). Two root causes fixed: 1. _make_config_signature() hashed ~60 CrawlerRunConfig fields but only 7 affect the browser context (proxy_config, locale, timezone_id, geolocation, override_navigator, simulate_user, magic). Switched from blacklist to whitelist — non-context fields like word_count_threshold, css_selector, screenshot, verbose no longer cause unnecessary context creation. 2. No eviction mechanism existed between close() calls. Added refcount tracking (_context_refcounts, incremented under _contexts_lock in get_page, decremented in release_page_with_context) and LRU eviction (_evict_lru_context_locked) that caps contexts at _max_contexts=20, evicting only idle contexts (refcount==0) oldest-first. Also fixed: storage_state path leaked a temporary context every request (now explicitly closed after clone_runtime_state). Closes #943. Credit to @Martichou for the investigation in #1640.
2026-02-01 14:23:04 +00:00
parent bb523b6c6c
commit c790231aba
4 changed files with 533 additions and 44 deletions
--- a/CONTRIBUTORS.md
+++ b/CONTRIBUTORS.md
@@ -31,6 +31,7 @@ We would like to thank the following people for their contributions to Crawl4AI:
 - [nnxiong](https://github.com/nnxiong) - fix: script tag removal losing adjacent text in cleaned_html [#1364](https://github.com/unclecode/crawl4ai/pull/1364)
 - [RoyLeviLangware](https://github.com/RoyLeviLangware) - fix: bs4 deprecation warning (text -> string) [#1077](https://github.com/unclecode/crawl4ai/pull/1077)
 - [garyluky](https://github.com/garyluky) - fix: proxy auth ERR_INVALID_AUTH_CREDENTIALS [#1281](https://github.com/unclecode/crawl4ai/pull/1281)
+- [Martichou](https://github.com/Martichou) - investigation: browser context memory leak under continuous load [#1640](https://github.com/unclecode/crawl4ai/pull/1640), [#943](https://github.com/unclecode/crawl4ai/issues/943)

 #### Feb-Alpha-1
 - [sufianuddin](https://github.com/sufianuddin) - fix: [Documentation for JsonCssExtractionStrategy](https://github.com/unclecode/crawl4ai/issues/651)
--- a/crawl4ai/async_crawler_strategy.py
+++ b/crawl4ai/async_crawler_strategy.py
@@ -1088,7 +1088,7 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
                pass
            elif total_pages <= 1 and (self.browser_config.use_managed_browser or self.browser_config.headless):
                # Keep the page open but release it for reuse by next crawl
-                self.browser_manager.release_page(page)
+                await self.browser_manager.release_page_with_context(page)
            else:
                # Detach listeners before closing to prevent potential errors during close
                if config.capture_network_requests:
@@ -1104,8 +1104,8 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
                    # Clean up console capture
                    await self.adapter.cleanup_console_capture(page, handle_console, handle_error)

-                # Release page from tracking before closing
-                self.browser_manager.release_page(page)
+                # Release page and decrement context refcount before closing
+                await self.browser_manager.release_page_with_context(page)
                # Close the page
                await page.close()

@@ -1623,7 +1623,7 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
            # Clean up the page
            if page:
                try:
-                    self.browser_manager.release_page(page)
+                    await self.browser_manager.release_page_with_context(page)
                    await page.close()
                except Exception:
                    pass
--- a/crawl4ai/browser_manager.py
+++ b/crawl4ai/browser_manager.py
@@ -711,6 +711,12 @@ class BrowserManager:
        self.contexts_by_config = {}
        self._contexts_lock = asyncio.Lock()

+        # Context lifecycle tracking for LRU eviction
+        self._context_refcounts = {}    # sig -> int  (active crawls using this context)
+        self._context_last_used = {}    # sig -> float (monotonic timestamp for LRU)
+        self._page_to_sig = {}          # page -> sig  (for decrement lookup on release)
+        self._max_contexts = 20         # LRU eviction threshold
+
        # Serialize context.new_page() across concurrent tasks to avoid races
        # when using a shared persistent context (context.pages may be empty
        # for all racers). Prevents 'Target page/context closed' errors.
@@ -1247,39 +1253,81 @@ class BrowserManager:

    def _make_config_signature(self, crawlerRunConfig: CrawlerRunConfig) -> str:
        """
-        Converts the crawlerRunConfig into a dict, excludes ephemeral fields,
-        then returns a hash of the sorted JSON. This yields a stable signature
-        that identifies configurations requiring a unique browser context.
+        Hash ONLY the CrawlerRunConfig fields that affect browser context
+        creation (create_browser_context) or context setup (setup_context).
+
+        Whitelist approach: fields like css_selector, word_count_threshold,
+        screenshot, verbose, etc. do NOT cause a new context to be created.
        """
        import json

-        config_dict = crawlerRunConfig.__dict__.copy()
-        # Exclude items that do not affect browser-level setup.
-        # Expand or adjust as needed, e.g. chunking_strategy is purely for data extraction, not for browser config.
-        ephemeral_keys = [
-            "session_id",
-            "js_code",
-            "scraping_strategy",
-            "extraction_strategy",
-            "chunking_strategy",
-            "cache_mode",
-            "content_filter",
-            "semaphore_count",
-            "url"
+        sig_dict = {}
+
+        # Fields that flow into create_browser_context()
+        pc = crawlerRunConfig.proxy_config
+        if pc is not None:
+            sig_dict["proxy_config"] = {
+                "server": getattr(pc, "server", None),
+                "username": getattr(pc, "username", None),
+                "password": getattr(pc, "password", None),
+            }
+        else:
+            sig_dict["proxy_config"] = None
+
+        sig_dict["locale"] = crawlerRunConfig.locale
+        sig_dict["timezone_id"] = crawlerRunConfig.timezone_id
+
+        geo = crawlerRunConfig.geolocation
+        if geo is not None:
+            sig_dict["geolocation"] = {
+                "latitude": geo.latitude,
+                "longitude": geo.longitude,
+                "accuracy": geo.accuracy,
+            }
+        else:
+            sig_dict["geolocation"] = None
+
+        # Fields that flow into setup_context() as init scripts
+        sig_dict["override_navigator"] = crawlerRunConfig.override_navigator
+        sig_dict["simulate_user"] = crawlerRunConfig.simulate_user
+        sig_dict["magic"] = crawlerRunConfig.magic
+
+        signature_json = json.dumps(sig_dict, sort_keys=True, default=str)
+        return hashlib.sha256(signature_json.encode("utf-8")).hexdigest()
+
+    def _evict_lru_context_locked(self):
+        """
+        If contexts exceed the limit, find the least-recently-used context
+        with zero active crawls and remove it from all tracking dicts.
+
+        MUST be called while holding self._contexts_lock.
+
+        Returns the BrowserContext to close (caller closes it OUTSIDE the
+        lock), or None if no eviction is needed or possible.
+        """
+        if len(self.contexts_by_config) <= self._max_contexts:
+            return None
+
+        # Sort candidates by last-used timestamp (oldest first)
+        candidates = sorted(
+            self._context_last_used.items(),
+            key=lambda item: item[1],
+        )
+        for evict_sig, _ in candidates:
+            if self._context_refcounts.get(evict_sig, 0) == 0:
+                ctx = self.contexts_by_config.pop(evict_sig, None)
+                self._context_refcounts.pop(evict_sig, None)
+                self._context_last_used.pop(evict_sig, None)
+                # Clean up stale page->sig mappings for evicted context
+                stale_pages = [
+                    p for p, s in self._page_to_sig.items() if s == evict_sig
                ]
+                for p in stale_pages:
+                    del self._page_to_sig[p]
+                return ctx

-        # Do NOT exclude locale, timezone_id, or geolocation as these DO affect browser context
-        # and should cause a new context to be created if they change
-        
-        for key in ephemeral_keys:
-            if key in config_dict:
-                del config_dict[key]
-        # Convert to canonical JSON string
-        signature_json = json.dumps(config_dict, sort_keys=True, default=str)
-
-        # Hash the JSON so we get a compact, unique string
-        signature_hash = hashlib.sha256(signature_json.encode("utf-8")).hexdigest()
-        return signature_hash
+        # All contexts are in active use — cannot evict
+        return None

    async def _apply_stealth_to_page(self, page):
        """Apply stealth to a page if stealth mode is enabled"""
@@ -1377,6 +1425,7 @@ class BrowserManager:
            # context reuse for multiple URLs with the same config (e.g., batch/deep crawls).
            if self.config.create_isolated_context:
                config_signature = self._make_config_signature(crawlerRunConfig)
+                to_close = None

                async with self._contexts_lock:
                    if config_signature in self.contexts_by_config:
@@ -1385,14 +1434,44 @@ class BrowserManager:
                        context = await self.create_browser_context(crawlerRunConfig)
                        await self.setup_context(context, crawlerRunConfig)
                        self.contexts_by_config[config_signature] = context
+                        self._context_refcounts[config_signature] = 0
+                        to_close = self._evict_lru_context_locked()
+
+                    # Increment refcount INSIDE lock before releasing
+                    self._context_refcounts[config_signature] = (
+                        self._context_refcounts.get(config_signature, 0) + 1
+                    )
+                    self._context_last_used[config_signature] = time.monotonic()
+
+                # Close evicted context OUTSIDE lock
+                if to_close is not None:
+                    try:
+                        await to_close.close()
+                    except Exception:
+                        pass

                # Always create a new page for each crawl (isolation for navigation)
+                try:
                    page = await context.new_page()
+                except Exception:
+                    async with self._contexts_lock:
+                        if config_signature in self._context_refcounts:
+                            self._context_refcounts[config_signature] = max(
+                                0, self._context_refcounts[config_signature] - 1
+                            )
+                    raise
                await self._apply_stealth_to_page(page)
+                self._page_to_sig[page] = config_signature
            elif self.config.storage_state:
-                context = await self.create_browser_context(crawlerRunConfig)
+                tmp_context = await self.create_browser_context(crawlerRunConfig)
                ctx = self.default_context        # default context, one window only
-                ctx = await clone_runtime_state(context, ctx, crawlerRunConfig, self.config)
+                ctx = await clone_runtime_state(tmp_context, ctx, crawlerRunConfig, self.config)
+                # Close the temporary context — only needed as a clone source
+                try:
+                    await tmp_context.close()
+                except Exception:
+                    pass
+                context = ctx  # so (page, context) return value is correct
                # Avoid concurrent new_page on shared persistent context
                # See GH-1198: context.pages can be empty under races
                async with self._page_lock:
@@ -1445,6 +1524,7 @@ class BrowserManager:
        else:
            # Otherwise, check if we have an existing context for this config
            config_signature = self._make_config_signature(crawlerRunConfig)
+            to_close = None

            async with self._contexts_lock:
                if config_signature in self.contexts_by_config:
@@ -1454,10 +1534,34 @@ class BrowserManager:
                    context = await self.create_browser_context(crawlerRunConfig)
                    await self.setup_context(context, crawlerRunConfig)
                    self.contexts_by_config[config_signature] = context
+                    self._context_refcounts[config_signature] = 0
+                    to_close = self._evict_lru_context_locked()
+
+                # Increment refcount INSIDE lock before releasing
+                self._context_refcounts[config_signature] = (
+                    self._context_refcounts.get(config_signature, 0) + 1
+                )
+                self._context_last_used[config_signature] = time.monotonic()
+
+            # Close evicted context OUTSIDE lock
+            if to_close is not None:
+                try:
+                    await to_close.close()
+                except Exception:
+                    pass

            # Create a new page from the chosen context
+            try:
                page = await context.new_page()
+            except Exception:
+                async with self._contexts_lock:
+                    if config_signature in self._context_refcounts:
+                        self._context_refcounts[config_signature] = max(
+                            0, self._context_refcounts[config_signature] - 1
+                        )
+                raise
            await self._apply_stealth_to_page(page)
+            self._page_to_sig[page] = config_signature

        # If a session_id is specified, store this session so we can reuse later
        if crawlerRunConfig.session_id:
@@ -1475,6 +1579,13 @@ class BrowserManager:
        if session_id in self.sessions:
            context, page, _ = self.sessions[session_id]
            self._release_page_from_use(page)
+            # Decrement context refcount for the session's page
+            async with self._contexts_lock:
+                sig = self._page_to_sig.pop(page, None)
+                if sig is not None and sig in self._context_refcounts:
+                    self._context_refcounts[sig] = max(
+                        0, self._context_refcounts[sig] - 1
+                    )
            await page.close()
            if not self.config.use_managed_browser:
                await context.close()
@@ -1483,15 +1594,25 @@ class BrowserManager:
    def release_page(self, page):
        """
        Release a page from the in-use tracking set (global tracking).
-
-        This should be called when a crawl operation completes to allow
-        the page to be reused by subsequent crawls.
-
-        Args:
-            page: The Playwright page to release.
+        Sync variant — does NOT decrement context refcount.
        """
        self._release_page_from_use(page)

+    async def release_page_with_context(self, page):
+        """
+        Release a page and decrement its context's refcount under the lock.
+
+        Should be called from the async crawl finally block instead of
+        release_page() so the context lifecycle is properly tracked.
+        """
+        self._release_page_from_use(page)
+        async with self._contexts_lock:
+            sig = self._page_to_sig.pop(page, None)
+            if sig is not None and sig in self._context_refcounts:
+                self._context_refcounts[sig] = max(
+                    0, self._context_refcounts[sig] - 1
+                )
+
    def _cleanup_expired_sessions(self):
        """Clean up expired sessions based on TTL."""
        current_time = time.time()
@@ -1517,6 +1638,9 @@ class BrowserManager:
                except Exception:
                    pass
            self.contexts_by_config.clear()
+            self._context_refcounts.clear()
+            self._context_last_used.clear()
+            self._page_to_sig.clear()
            await _CDPConnectionCache.release(self.config.cdp_url)
            self.browser = None
            self.playwright = None
@@ -1540,6 +1664,9 @@ class BrowserManager:
                    except Exception:
                        pass
                self.contexts_by_config.clear()
+                self._context_refcounts.clear()
+                self._context_last_used.clear()
+                self._page_to_sig.clear()

                # Disconnect from browser (doesn't terminate it, just releases connection)
                if self.browser:
@@ -1581,6 +1708,9 @@ class BrowserManager:
                    params={"error": str(e)}
                )
        self.contexts_by_config.clear()
+        self._context_refcounts.clear()
+        self._context_last_used.clear()
+        self._page_to_sig.clear()

        if self.browser:
            await self.browser.close()
--- a/tests/browser/test_context_leak_fix.py
+++ b/tests/browser/test_context_leak_fix.py
@@ -0,0 +1,358 @@
+"""
+Integration tests for the browser context memory leak fix.
+
+Tests:
+1. Signature shrink: non-context fields produce same hash
+2. Signature correctness: context-affecting fields produce different hashes
+3. Refcount lifecycle: increment on get_page, decrement on release
+4. LRU eviction: oldest idle context is evicted when over limit
+5. Eviction respects active refcounts
+6. Real browser: contexts don't leak under varying configs
+7. Real browser: batch crawl reuses same context
+8. Storage state path: temporary context is closed
+"""
+import asyncio
+import time
+import pytest
+
+from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
+from crawl4ai.async_configs import ProxyConfig, GeolocationConfig
+from crawl4ai.browser_manager import BrowserManager
+
+
+# ── Unit tests (no browser needed) ──────────────────────────────────────
+
+class TestSignatureShrink:
+    """Verify the whitelist signature only considers context-affecting fields."""
+
+    def _bm(self):
+        return BrowserManager(BrowserConfig(), logger=None)
+
+    def test_non_context_fields_same_signature(self):
+        """Fields that don't affect browser context must produce identical sigs."""
+        bm = self._bm()
+        configs = [
+            CrawlerRunConfig(word_count_threshold=200),
+            CrawlerRunConfig(word_count_threshold=50),
+            CrawlerRunConfig(css_selector=".main"),
+            CrawlerRunConfig(screenshot=True),
+            CrawlerRunConfig(pdf=True, verbose=False),
+            CrawlerRunConfig(scan_full_page=True, scroll_delay=0.5),
+            CrawlerRunConfig(only_text=True),
+            CrawlerRunConfig(wait_until="networkidle", page_timeout=30000),
+            CrawlerRunConfig(capture_network_requests=True),
+            CrawlerRunConfig(exclude_external_links=True),
+        ]
+        sigs = [bm._make_config_signature(c) for c in configs]
+        assert len(set(sigs)) == 1, (
+            f"Expected all same sig, got {len(set(sigs))} unique: {sigs[:3]}"
+        )
+
+    def test_proxy_changes_signature(self):
+        bm = self._bm()
+        c1 = CrawlerRunConfig()
+        c2 = CrawlerRunConfig(proxy_config=ProxyConfig(server="http://p1:8080"))
+        c3 = CrawlerRunConfig(proxy_config=ProxyConfig(server="http://p2:8080"))
+        s1 = bm._make_config_signature(c1)
+        s2 = bm._make_config_signature(c2)
+        s3 = bm._make_config_signature(c3)
+        assert s1 != s2, "proxy vs no-proxy should differ"
+        assert s2 != s3, "different proxies should differ"
+
+    def test_locale_changes_signature(self):
+        bm = self._bm()
+        s1 = bm._make_config_signature(CrawlerRunConfig())
+        s2 = bm._make_config_signature(CrawlerRunConfig(locale="en-US"))
+        s3 = bm._make_config_signature(CrawlerRunConfig(locale="fr-FR"))
+        assert s1 != s2
+        assert s2 != s3
+
+    def test_timezone_changes_signature(self):
+        bm = self._bm()
+        s1 = bm._make_config_signature(CrawlerRunConfig())
+        s2 = bm._make_config_signature(CrawlerRunConfig(timezone_id="America/New_York"))
+        assert s1 != s2
+
+    def test_geolocation_changes_signature(self):
+        bm = self._bm()
+        s1 = bm._make_config_signature(CrawlerRunConfig())
+        s2 = bm._make_config_signature(CrawlerRunConfig(
+            geolocation=GeolocationConfig(latitude=40.7, longitude=-74.0)
+        ))
+        assert s1 != s2
+
+    def test_navigator_overrides_change_signature(self):
+        bm = self._bm()
+        base = bm._make_config_signature(CrawlerRunConfig())
+        s_nav = bm._make_config_signature(CrawlerRunConfig(override_navigator=True))
+        s_sim = bm._make_config_signature(CrawlerRunConfig(simulate_user=True))
+        s_mag = bm._make_config_signature(CrawlerRunConfig(magic=True))
+        assert base != s_nav
+        assert base != s_sim
+        assert base != s_mag
+
+    def test_signature_stability(self):
+        """Same config always produces the same hash."""
+        bm = self._bm()
+        c = CrawlerRunConfig(locale="ja-JP", override_navigator=True)
+        assert bm._make_config_signature(c) == bm._make_config_signature(c)
+
+    def test_proxy_config_with_credentials(self):
+        """ProxyConfig with username/password produces distinct stable sigs."""
+        bm = self._bm()
+        c1 = CrawlerRunConfig(proxy_config=ProxyConfig(
+            server="http://proxy:8080", username="user1", password="pass1"
+        ))
+        c2 = CrawlerRunConfig(proxy_config=ProxyConfig(
+            server="http://proxy:8080", username="user2", password="pass2"
+        ))
+        s1 = bm._make_config_signature(c1)
+        s2 = bm._make_config_signature(c2)
+        assert s1 != s2, "different credentials should differ"
+        assert s1 == bm._make_config_signature(c1), "should be stable"
+
+
+class TestLRUEviction:
+    """Verify eviction logic (no browser needed)."""
+
+    def _bm(self, max_ctx=3):
+        bm = BrowserManager(BrowserConfig(), logger=None)
+        bm._max_contexts = max_ctx
+        return bm
+
+    def test_no_eviction_under_limit(self):
+        bm = self._bm(max_ctx=5)
+        for i in range(5):
+            sig = f"sig_{i}"
+            bm.contexts_by_config[sig] = f"ctx_{i}"
+            bm._context_refcounts[sig] = 0
+            bm._context_last_used[sig] = time.monotonic()
+        assert bm._evict_lru_context_locked() is None
+
+    def test_evicts_oldest_idle(self):
+        bm = self._bm(max_ctx=3)
+        for i in range(5):
+            sig = f"sig_{i}"
+            bm.contexts_by_config[sig] = f"ctx_{i}"
+            bm._context_refcounts[sig] = 0
+            bm._context_last_used[sig] = time.monotonic()
+            time.sleep(0.002)
+
+        evicted = bm._evict_lru_context_locked()
+        assert evicted == "ctx_0", f"expected oldest ctx_0, got {evicted}"
+        assert "sig_0" not in bm.contexts_by_config
+        assert "sig_0" not in bm._context_refcounts
+        assert "sig_0" not in bm._context_last_used
+
+    def test_skips_active_contexts(self):
+        bm = self._bm(max_ctx=2)
+        # sig_0: old but active
+        bm.contexts_by_config["sig_0"] = "ctx_0"
+        bm._context_refcounts["sig_0"] = 3
+        bm._context_last_used["sig_0"] = 0  # very old
+
+        # sig_1: newer, idle
+        bm.contexts_by_config["sig_1"] = "ctx_1"
+        bm._context_refcounts["sig_1"] = 0
+        bm._context_last_used["sig_1"] = time.monotonic()
+
+        # sig_2: newest, idle
+        bm.contexts_by_config["sig_2"] = "ctx_2"
+        bm._context_refcounts["sig_2"] = 0
+        bm._context_last_used["sig_2"] = time.monotonic()
+
+        evicted = bm._evict_lru_context_locked()
+        # sig_0 is oldest but active (refcount=3) — must skip it
+        assert evicted == "ctx_1", f"expected ctx_1 (oldest idle), got {evicted}"
+        assert "sig_0" in bm.contexts_by_config, "active context must NOT be evicted"
+
+    def test_all_active_no_eviction(self):
+        bm = self._bm(max_ctx=1)
+        for i in range(3):
+            sig = f"sig_{i}"
+            bm.contexts_by_config[sig] = f"ctx_{i}"
+            bm._context_refcounts[sig] = 1  # all active
+            bm._context_last_used[sig] = time.monotonic()
+
+        evicted = bm._evict_lru_context_locked()
+        assert evicted is None, "cannot evict when all are active"
+        assert len(bm.contexts_by_config) == 3, "all contexts should remain"
+
+    def test_eviction_cleans_page_to_sig(self):
+        bm = self._bm(max_ctx=1)
+        bm.contexts_by_config["sig_old"] = "ctx_old"
+        bm._context_refcounts["sig_old"] = 0
+        bm._context_last_used["sig_old"] = 0
+
+        bm.contexts_by_config["sig_new"] = "ctx_new"
+        bm._context_refcounts["sig_new"] = 0
+        bm._context_last_used["sig_new"] = time.monotonic()
+
+        # Simulate a stale page mapping for the old context
+        mock_page = object()
+        bm._page_to_sig[mock_page] = "sig_old"
+
+        evicted = bm._evict_lru_context_locked()
+        assert evicted == "ctx_old"
+        assert mock_page not in bm._page_to_sig, "stale page mapping should be cleaned"
+
+
+# ── Integration tests (real browser) ────────────────────────────────────
+
+@pytest.fixture
+def event_loop():
+    loop = asyncio.new_event_loop()
+    yield loop
+    loop.close()
+
+
+def run(coro):
+    """Run an async function synchronously."""
+    loop = asyncio.new_event_loop()
+    try:
+        return loop.run_until_complete(coro)
+    finally:
+        loop.close()
+
+
+class TestRealBrowserContextLifecycle:
+    """Real browser tests — verify contexts aren't leaked."""
+
+    def test_varying_configs_same_context(self):
+        """Different non-context fields should reuse the same context."""
+        async def _test():
+            async with AsyncWebCrawler(config=BrowserConfig(headless=True)) as crawler:
+                bm = crawler.crawler_strategy.browser_manager
+
+                # Crawl with different non-context configs
+                html = "<html><body><p>Hello World with enough words to pass threshold</p></body></html>"
+                for wct in [10, 50, 200]:
+                    config = CrawlerRunConfig(word_count_threshold=wct)
+                    result = await crawler.arun(f"raw:{html}", config=config)
+                    assert result.success
+
+                # Should have at most 1 context (all configs hash the same)
+                ctx_count = len(bm.contexts_by_config)
+                assert ctx_count <= 1, (
+                    f"Expected 1 context for identical browser config, got {ctx_count}"
+                )
+        run(_test())
+
+    def test_batch_crawl_reuses_context(self):
+        """Multiple URLs with same config should reuse a single context."""
+        async def _test():
+            async with AsyncWebCrawler(config=BrowserConfig(headless=True)) as crawler:
+                bm = crawler.crawler_strategy.browser_manager
+
+                html1 = "<html><body><p>Page one content here</p></body></html>"
+                html2 = "<html><body><p>Page two content here</p></body></html>"
+                html3 = "<html><body><p>Page three content here</p></body></html>"
+
+                config = CrawlerRunConfig()
+                for h in [html1, html2, html3]:
+                    result = await crawler.arun(f"raw:{h}", config=config)
+                    assert result.success
+
+                ctx_count = len(bm.contexts_by_config)
+                assert ctx_count <= 1, f"Batch should reuse context, got {ctx_count}"
+        run(_test())
+
+    def test_refcount_drops_to_zero_after_crawl(self):
+        """After a crawl completes, the context refcount should be 0."""
+        async def _test():
+            async with AsyncWebCrawler(config=BrowserConfig(headless=True)) as crawler:
+                bm = crawler.crawler_strategy.browser_manager
+                html = "<html><body><p>Test content</p></body></html>"
+                config = CrawlerRunConfig()
+                result = await crawler.arun(f"raw:{html}", config=config)
+                assert result.success
+
+                # All refcounts should be 0 after crawl completes
+                for sig, count in bm._context_refcounts.items():
+                    assert count == 0, (
+                        f"Refcount for {sig[:8]} should be 0 after crawl, got {count}"
+                    )
+        run(_test())
+
+    def test_page_to_sig_cleaned_after_crawl(self):
+        """After crawl, the page->sig mapping should be empty (pages released)."""
+        async def _test():
+            async with AsyncWebCrawler(config=BrowserConfig(headless=True)) as crawler:
+                bm = crawler.crawler_strategy.browser_manager
+                html = "<html><body><p>Test</p></body></html>"
+                result = await crawler.arun(f"raw:{html}", config=CrawlerRunConfig())
+                assert result.success
+
+                assert len(bm._page_to_sig) == 0, (
+                    f"Expected empty _page_to_sig after crawl, got {len(bm._page_to_sig)} entries"
+                )
+        run(_test())
+
+    def test_concurrent_crawls_refcount_tracking(self):
+        """Concurrent crawls should all properly increment/decrement refcounts."""
+        async def _test():
+            async with AsyncWebCrawler(config=BrowserConfig(headless=True)) as crawler:
+                bm = crawler.crawler_strategy.browser_manager
+                config = CrawlerRunConfig()
+
+                htmls = [
+                    f"raw:<html><body><p>Concurrent page {i}</p></body></html>"
+                    for i in range(5)
+                ]
+                tasks = [crawler.arun(h, config=config) for h in htmls]
+                results = await asyncio.gather(*tasks)
+                for r in results:
+                    assert r.success
+
+                # All done — refcounts should be 0
+                for sig, count in bm._context_refcounts.items():
+                    assert count == 0, (
+                        f"After concurrent crawls, refcount for {sig[:8]} = {count}"
+                    )
+                assert len(bm._page_to_sig) == 0
+        run(_test())
+
+    def test_lru_eviction_real_browser(self):
+        """Verify LRU eviction actually closes contexts when limit exceeded."""
+        async def _test():
+            async with AsyncWebCrawler(config=BrowserConfig(headless=True)) as crawler:
+                bm = crawler.crawler_strategy.browser_manager
+                bm._max_contexts = 2  # Low limit to trigger eviction
+
+                html = "<html><body><p>Test</p></body></html>"
+
+                # Crawl with 4 different locales → 4 different context signatures
+                for locale in ["en-US", "fr-FR", "de-DE", "ja-JP"]:
+                    config = CrawlerRunConfig(locale=locale)
+                    result = await crawler.arun(f"raw:{html}", config=config)
+                    assert result.success
+
+                # Should have at most 2 contexts (limit)
+                ctx_count = len(bm.contexts_by_config)
+                assert ctx_count <= 2, (
+                    f"Expected <= 2 contexts (limit), got {ctx_count}"
+                )
+
+                # Refcounts should all be 0
+                for sig, count in bm._context_refcounts.items():
+                    assert count == 0, f"refcount {sig[:8]} = {count}"
+        run(_test())
+
+    def test_close_clears_everything(self):
+        """close() should clear all tracking dicts."""
+        async def _test():
+            crawler = AsyncWebCrawler(config=BrowserConfig(headless=True))
+            await crawler.start()
+            bm = crawler.crawler_strategy.browser_manager
+
+            html = "<html><body><p>Test</p></body></html>"
+            result = await crawler.arun(f"raw:{html}", config=CrawlerRunConfig())
+            assert result.success
+
+            await crawler.close()
+
+            assert len(bm.contexts_by_config) == 0
+            assert len(bm._context_refcounts) == 0
+            assert len(bm._context_last_used) == 0
+            assert len(bm._page_to_sig) == 0
+        run(_test())