fix(browser_manager): serialize new_page on persistent context to avoid races ref #1198

- Add _page_lock and guarded creation; handle empty context.pages safely - Prevents BrowserContext.new_page “Target page/context closed” during concurrent arun_many
2025-08-11 18:55:43 +08:00
parent a5bcac4c9d
commit 96c4b0de67
2 changed files with 63 additions and 2 deletions
--- a/tests/general/test_persistent_context.py
+++ b/tests/general/test_persistent_context.py
@@ -0,0 +1,43 @@
+import asyncio
+import os
+from crawl4ai.async_webcrawler import AsyncWebCrawler
+from crawl4ai.async_configs import BrowserConfig, CrawlerRunConfig, CacheMode
+
+# Simple concurrency test for persistent context page creation
+# Usage: python scripts/test_persistent_context.py
+
+URLS = [
+    # "https://example.com",
+    "https://httpbin.org/html",
+    "https://www.python.org/",
+    "https://www.rust-lang.org/",
+]
+
+async def main():
+    profile_dir = os.path.join(os.path.expanduser("~"), ".crawl4ai", "profiles", "test-persistent-profile")
+    os.makedirs(profile_dir, exist_ok=True)
+
+    browser_config = BrowserConfig(
+        browser_type="chromium",
+        headless=True,
+        use_persistent_context=True,
+        user_data_dir=profile_dir,
+        use_managed_browser=True,
+        verbose=True,
+    )
+
+    run_cfg = CrawlerRunConfig(
+        cache_mode=CacheMode.BYPASS,
+        stream=False,
+        verbose=True,
+    )
+
+    async with AsyncWebCrawler(config=browser_config) as crawler:
+        results = await crawler.arun_many(URLS, config=run_cfg)
+        for r in results:
+            print(r.url, r.success, len(r.markdown.raw_markdown) if r.markdown else 0)
+        # r = await crawler.arun(url=URLS[0], config=run_cfg)
+        # print(r.url, r.success, len(r.markdown.raw_markdown) if r.markdown else 0)
+
+if __name__ == "__main__":
+    asyncio.run(main())