""" Regression tests for PR #1640 — memory leak / hang under high concurrency with max_pages_before_recycle enabled. Tests three bugs that were fixed: Bug 1: Race condition — release_page_with_context() runs BEFORE _maybe_bump_browser_version() adds the sig to _pending_cleanup. FIX: Don't add refcount-0 sigs to pending; clean them up immediately. Bug 2: The finally block in _crawl_web can fail before calling release_page_with_context(), leaking the refcount permanently. FIX: Call release_page_with_context() FIRST in the finally block. Bug 3: Accumulated pending_cleanup entries hit _max_pending_browsers cap, blocking ALL get_page() calls → system-wide deadlock. FIX: 30s timeout on safety cap wait + force-clean stuck entries. Exit code 0 = all tests pass. Exit code 1 = regression found. """ import asyncio import sys import os import time sys.path.insert(0, os.path.dirname(__file__)) from crawl4ai.async_configs import BrowserConfig, CrawlerRunConfig from crawl4ai.browser_manager import BrowserManager PASS = 0 FAIL = 0 def check(name, condition): global PASS, FAIL if condition: PASS += 1 print(f" PASS: {name}") else: FAIL += 1 print(f" FAIL: {name}") async def test_bug1_multi_config_race(): """ Bug 1 fix: idle sigs (refcount=0) must NOT be added to _pending_cleanup. They should be cleaned up immediately during the version bump. """ print("\n" + "="*70) print("TEST: Bug 1 — idle sig must not get stuck in _pending_cleanup") print("="*70) config = BrowserConfig( headless=True, extra_args=['--no-sandbox', '--disable-gpu'], max_pages_before_recycle=3, ) bm = BrowserManager(config) await bm.start() try: config_a = CrawlerRunConfig(magic=True, cache_mode="bypass") config_b = CrawlerRunConfig(magic=False, cache_mode="bypass") # Use config A, then release → refcount 0 page_a, _ = await bm.get_page(config_a) sig_a = bm._page_to_sig.get(page_a) await bm.release_page_with_context(page_a) await page_a.close() print(f" sig_a refcount after release: {bm._context_refcounts.get(sig_a)}") # Use config B twice → pages_served hits threshold → version bump page_b1, _ = await bm.get_page(config_b) page_b2, _ = await bm.get_page(config_b) sig_b = bm._page_to_sig.get(page_b1) # At this point the version should have bumped (3 pages served >= threshold 3) print(f" _browser_version: {bm._browser_version}") print(f" _pending_cleanup sigs: {list(bm._pending_cleanup.keys())}") # sig_a (refcount=0) must NOT be in _pending_cleanup check("sig_a NOT in _pending_cleanup", sig_a not in bm._pending_cleanup) # sig_a should have been cleaned up from _context_refcounts check("sig_a cleaned from _context_refcounts", sig_a not in bm._context_refcounts) # sig_b (refcount>0) SHOULD be in _pending_cleanup (it will drain naturally) check("sig_b IS in _pending_cleanup (active, will drain)", sig_b in bm._pending_cleanup) # Release B pages → sig_b drains → cleaned up await bm.release_page_with_context(page_b1) await page_b1.close() await bm.release_page_with_context(page_b2) await page_b2.close() check("sig_b cleaned after release", sig_b not in bm._pending_cleanup) check("_pending_cleanup is empty", len(bm._pending_cleanup) == 0) finally: await bm.close() async def test_bug2_release_always_called(): """ Bug 2 fix: release_page_with_context() must be called even when the browser is in a bad state. The fix moves release_page_with_context() to the FIRST line of the finally block in _crawl_web, wrapped in try/except. Here we verify that release_page_with_context itself works even after browser crash, and that the fixed finally block pattern always decrements the refcount. """ print("\n" + "="*70) print("TEST: Bug 2 — release_page_with_context must work after browser crash") print("="*70) config = BrowserConfig( headless=True, extra_args=['--no-sandbox', '--disable-gpu'], max_pages_before_recycle=5, ) bm = BrowserManager(config) await bm.start() try: crawl_config = CrawlerRunConfig(magic=True, cache_mode="bypass") page, ctx = await bm.get_page(crawl_config) sig = bm._page_to_sig.get(page) print(f" sig refcount before crash: {bm._context_refcounts.get(sig)}") check("refcount is 1 before crash", bm._context_refcounts.get(sig) == 1) # Simulate browser crash if bm.browser: await bm.browser.close() bm.browser = None # The FIX: call release_page_with_context even after crash # (simulating what the fixed finally block does) try: await bm.release_page_with_context(page) except Exception: pass refcount_after = bm._context_refcounts.get(sig, 0) print(f" sig refcount after crash + release: {refcount_after}") check("refcount decremented to 0 after crash + release", refcount_after == 0) check("page removed from _page_to_sig", page not in bm._page_to_sig) finally: bm.browser = None bm.contexts_by_config.clear() bm._context_refcounts.clear() bm._context_last_used.clear() bm._page_to_sig.clear() if bm.playwright: await bm.playwright.stop() async def test_bug3_safety_cap_timeout(): """ Bug 3 fix: the safety cap wait must have a timeout. When stuck entries accumulate, the timeout fires and force-cleans entries with refcount 0, preventing permanent deadlock. """ print("\n" + "="*70) print("TEST: Bug 3 — safety cap wait must not block forever") print("="*70) config = BrowserConfig( headless=True, extra_args=['--no-sandbox', '--disable-gpu'], max_pages_before_recycle=2, ) bm = BrowserManager(config) await bm.start() try: crawl_config = CrawlerRunConfig(magic=True, cache_mode="bypass") # Inject stuck entries WITH refcount 0 (simulating leaked refcounts # that were later force-decremented or never properly tracked) print(f" Safety cap: {bm._max_pending_browsers}") for i in range(bm._max_pending_browsers): fake_sig = f"stuck_sig_{i}" bm._pending_cleanup[fake_sig] = {"version": i, "done": asyncio.Event()} # refcount 0 = stuck (no future release will clean these up) bm._context_refcounts[fake_sig] = 0 print(f" Injected {len(bm._pending_cleanup)} stuck entries (refcount=0)") bm._pages_served = bm.config.max_pages_before_recycle # The fix: get_page should NOT block forever. # The 30s timeout will fire, force-clean stuck entries, and proceed. # We use a 35s test timeout to allow the 30s internal timeout to fire. print(f" Calling get_page() — should unblock after ~30s timeout...") start = time.monotonic() try: page, ctx = await asyncio.wait_for( bm.get_page(crawl_config), timeout=35.0 ) elapsed = time.monotonic() - start print(f" get_page() returned after {elapsed:.1f}s") check("get_page() did NOT deadlock (returned within 35s)", True) check("stuck entries were force-cleaned", len(bm._pending_cleanup) < bm._max_pending_browsers) await bm.release_page_with_context(page) await page.close() except asyncio.TimeoutError: elapsed = time.monotonic() - start print(f" get_page() STILL blocked after {elapsed:.1f}s") check("get_page() did NOT deadlock", False) finally: bm._pending_cleanup.clear() bm._context_refcounts.clear() await bm.close() async def test_real_concurrent_crawl(): """ Integration test: run many concurrent crawls with recycling and verify no stuck entries or deadlocks. """ print("\n" + "="*70) print("TEST: Real concurrent crawls with recycling") print("="*70) config = BrowserConfig( headless=True, extra_args=['--no-sandbox', '--disable-gpu'], max_pages_before_recycle=10, ) bm = BrowserManager(config) await bm.start() TOTAL = 80 CONCURRENT = 8 completed = 0 errors = 0 sem = asyncio.Semaphore(CONCURRENT) async def do_crawl(i): nonlocal completed, errors async with sem: try: crawl_config = CrawlerRunConfig(magic=True, cache_mode="bypass") page, ctx = await asyncio.wait_for( bm.get_page(crawl_config), timeout=30.0 ) try: await page.goto("https://example.com", timeout=15000) except Exception: pass # Use the FIXED finally pattern: release first, then close try: await bm.release_page_with_context(page) except Exception: pass try: await page.close() except Exception: pass completed += 1 if completed % 20 == 0: print(f" [{completed}/{TOTAL}] version={bm._browser_version} " f"pending={len(bm._pending_cleanup)} " f"pages_served={bm._pages_served}") except asyncio.TimeoutError: errors += 1 print(f" [{i}] TIMEOUT in get_page()!") except Exception as e: errors += 1 if errors <= 3: print(f" [{i}] Error: {e}") start = time.monotonic() tasks = [asyncio.create_task(do_crawl(i)) for i in range(TOTAL)] await asyncio.gather(*tasks) elapsed = time.monotonic() - start print(f"\n Results: {completed}/{TOTAL} completed, {errors} errors, {elapsed:.1f}s") stuck = [s for s in bm._pending_cleanup if bm._context_refcounts.get(s, 0) == 0] check(f"all {TOTAL} crawls completed", completed == TOTAL) check("no errors", errors == 0) check("no stuck entries in _pending_cleanup", len(stuck) == 0) check("no timeouts (no deadlock)", errors == 0) await bm.close() async def test_multi_config_concurrent(): """ Integration test: concurrent crawls with DIFFERENT configs to exercise the multi-sig path that triggered Bug 1. """ print("\n" + "="*70) print("TEST: Multi-config concurrent crawls") print("="*70) config = BrowserConfig( headless=True, extra_args=['--no-sandbox', '--disable-gpu'], max_pages_before_recycle=5, ) bm = BrowserManager(config) await bm.start() TOTAL = 40 CONCURRENT = 6 completed = 0 errors = 0 sem = asyncio.Semaphore(CONCURRENT) configs = [ CrawlerRunConfig(magic=True, cache_mode="bypass"), CrawlerRunConfig(magic=False, cache_mode="bypass"), CrawlerRunConfig(magic=True, simulate_user=True, cache_mode="bypass"), ] async def do_crawl(i): nonlocal completed, errors async with sem: try: crawl_config = configs[i % len(configs)] page, ctx = await asyncio.wait_for( bm.get_page(crawl_config), timeout=30.0 ) try: await page.goto("https://example.com", timeout=15000) except Exception: pass try: await bm.release_page_with_context(page) except Exception: pass try: await page.close() except Exception: pass completed += 1 except asyncio.TimeoutError: errors += 1 print(f" [{i}] TIMEOUT!") print(f" pending={len(bm._pending_cleanup)}") except Exception as e: errors += 1 if errors <= 3: print(f" [{i}] Error: {e}") start = time.monotonic() tasks = [asyncio.create_task(do_crawl(i)) for i in range(TOTAL)] await asyncio.gather(*tasks) elapsed = time.monotonic() - start stuck = [s for s in bm._pending_cleanup if bm._context_refcounts.get(s, 0) == 0] print(f"\n Results: {completed}/{TOTAL}, {errors} errors, {elapsed:.1f}s") print(f" Final: version={bm._browser_version} pending={len(bm._pending_cleanup)} stuck={len(stuck)}") check(f"all {TOTAL} multi-config crawls completed", completed == TOTAL) check("no stuck entries", len(stuck) == 0) check("no timeouts", errors == 0) await bm.close() async def main(): print("="*70) print("PR #1640 Regression Tests") print("="*70) await test_bug2_release_always_called() await test_bug1_multi_config_race() await test_bug3_safety_cap_timeout() await test_real_concurrent_crawl() await test_multi_config_concurrent() print("\n" + "="*70) if FAIL == 0: print(f"ALL {PASS} CHECKS PASSED") else: print(f"FAILED: {FAIL} checks failed, {PASS} passed") print("="*70) sys.exit(1 if FAIL > 0 else 0) if __name__ == "__main__": asyncio.run(main())