From 21e6c418be4f58c3bf309d0257cb61dbdecca94e Mon Sep 17 00:00:00 2001 From: unclecode Date: Mon, 26 Jan 2026 13:06:31 +0000 Subject: [PATCH] Fix: Keep storage_state.json in profile shrink - Add storage_state.json to all KEEP_PATTERNS levels - This file contains unencrypted cookies in Playwright format - Critical for cross-machine profile portability (local -> cloud) --- crawl4ai/browser_profiler.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/crawl4ai/browser_profiler.py b/crawl4ai/browser_profiler.py index fd26f389..c944a596 100644 --- a/crawl4ai/browser_profiler.py +++ b/crawl4ai/browser_profiler.py @@ -37,21 +37,23 @@ class ShrinkLevel(str, Enum): # Whitelist: what to KEEP at each level (everything else gets deleted) # Note: "Cookies" can be at root (older Chrome) or in Network/ (Chrome 96+) +# storage_state.json is Playwright's portable cookie format (unencrypted) +# It MUST be kept in all levels for cross-machine profile portability KEEP_PATTERNS: Dict[ShrinkLevel, Set[str]] = { ShrinkLevel.NONE: {"*"}, ShrinkLevel.LIGHT: { "Network", "Cookies", "Local Storage", "Session Storage", "IndexedDB", "Preferences", "Secure Preferences", "Login Data", "Login Data For Account", "Web Data", "History", "History-journal", "Visited Links", "Bookmarks", - "TransportSecurity", "Trust Tokens", + "TransportSecurity", "Trust Tokens", "storage_state.json", }, ShrinkLevel.MEDIUM: { "Network", "Cookies", "Local Storage", "Session Storage", "IndexedDB", "Preferences", "Secure Preferences", "Login Data", "Login Data For Account", - "Web Data", "TransportSecurity", + "Web Data", "TransportSecurity", "storage_state.json", }, - ShrinkLevel.AGGRESSIVE: {"Network", "Cookies", "Local Storage", "IndexedDB", "Preferences"}, - ShrinkLevel.MINIMAL: {"Network", "Cookies", "Local Storage"}, + ShrinkLevel.AGGRESSIVE: {"Network", "Cookies", "Local Storage", "IndexedDB", "Preferences", "storage_state.json"}, + ShrinkLevel.MINIMAL: {"Network", "Cookies", "Local Storage", "storage_state.json"}, }