Files
crawl4ai/tests/browser/test_profile_shrink.py
2026-01-24 08:02:52 +00:00

1054 lines
39 KiB
Python

"""
Tests for profile shrinking functionality.
Test approach:
1. Unit tests for core shrink logic with mock file structures
2. Integration tests with real Playwright browser to verify auth preservation
3. Edge case handling (empty profiles, missing profiles, permission errors)
"""
import pytest
import shutil
from pathlib import Path
from unittest.mock import patch, MagicMock
from crawl4ai.browser_profiler import (
ShrinkLevel,
KEEP_PATTERNS,
shrink_profile,
_get_size,
_format_size,
BrowserProfiler,
)
class TestShrinkLevel:
"""Test ShrinkLevel enum."""
def test_enum_values(self):
assert ShrinkLevel.NONE.value == "none"
assert ShrinkLevel.LIGHT.value == "light"
assert ShrinkLevel.MEDIUM.value == "medium"
assert ShrinkLevel.AGGRESSIVE.value == "aggressive"
assert ShrinkLevel.MINIMAL.value == "minimal"
def test_enum_from_string(self):
assert ShrinkLevel("aggressive") == ShrinkLevel.AGGRESSIVE
assert ShrinkLevel("minimal") == ShrinkLevel.MINIMAL
def test_keep_patterns_defined_for_all_levels(self):
for level in ShrinkLevel:
assert level in KEEP_PATTERNS
class TestHelperFunctions:
"""Test helper functions."""
def test_format_size_bytes(self):
assert _format_size(500) == "500.0 B"
def test_format_size_kilobytes(self):
assert _format_size(2048) == "2.0 KB"
def test_format_size_megabytes(self):
assert _format_size(5 * 1024 * 1024) == "5.0 MB"
def test_format_size_gigabytes(self):
assert _format_size(3 * 1024 * 1024 * 1024) == "3.0 GB"
def test_get_size_file(self, tmp_path):
test_file = tmp_path / "test.txt"
test_file.write_text("x" * 100)
assert _get_size(test_file) == 100
def test_get_size_directory(self, tmp_path):
(tmp_path / "file1.txt").write_text("a" * 50)
(tmp_path / "file2.txt").write_text("b" * 50)
subdir = tmp_path / "subdir"
subdir.mkdir()
(subdir / "file3.txt").write_text("c" * 100)
assert _get_size(tmp_path) == 200
def test_get_size_empty_directory(self, tmp_path):
assert _get_size(tmp_path) == 0
class TestShrinkProfile:
"""Test shrink_profile function."""
@pytest.fixture
def mock_profile(self, tmp_path):
"""Create a mock Chrome profile structure."""
profile = tmp_path / "test_profile"
profile.mkdir()
# Essential auth directories (should be kept)
(profile / "Network").mkdir()
(profile / "Network" / "Cookies").write_bytes(b"x" * 1000)
(profile / "Local Storage").mkdir()
(profile / "Local Storage" / "leveldb").mkdir()
(profile / "Local Storage" / "leveldb" / "data").write_bytes(b"y" * 500)
(profile / "IndexedDB").mkdir()
(profile / "IndexedDB" / "db").write_bytes(b"z" * 300)
(profile / "Preferences").write_text('{"profile": {}}')
# Cache directories (should be removed)
(profile / "Cache").mkdir()
(profile / "Cache" / "data_0").write_bytes(b"0" * 10000)
(profile / "Cache" / "data_1").write_bytes(b"1" * 10000)
(profile / "Code Cache").mkdir()
(profile / "Code Cache" / "js").mkdir()
(profile / "Code Cache" / "js" / "bytecode").write_bytes(b"c" * 5000)
(profile / "GPUCache").mkdir()
(profile / "GPUCache" / "data").write_bytes(b"g" * 2000)
(profile / "Service Worker").mkdir()
(profile / "Service Worker" / "CacheStorage").mkdir()
(profile / "Service Worker" / "CacheStorage" / "cache").write_bytes(b"s" * 50000)
# History and other files (removed at MEDIUM+)
(profile / "History").write_bytes(b"h" * 1000)
(profile / "Favicons").write_bytes(b"f" * 500)
(profile / "Visited Links").write_bytes(b"v" * 200)
return str(profile)
def test_shrink_none_keeps_everything(self, mock_profile):
result = shrink_profile(mock_profile, ShrinkLevel.NONE)
assert result["removed"] == []
assert result["kept"] == []
assert result["bytes_freed"] == 0
def test_shrink_aggressive_removes_caches(self, mock_profile):
result = shrink_profile(mock_profile, ShrinkLevel.AGGRESSIVE)
# Auth data kept
assert "Network" in result["kept"]
assert "Local Storage" in result["kept"]
assert "IndexedDB" in result["kept"]
assert "Preferences" in result["kept"]
# Caches removed
assert "Cache" in result["removed"]
assert "Code Cache" in result["removed"]
assert "GPUCache" in result["removed"]
assert "Service Worker" in result["removed"]
# Verify bytes freed > 0
assert result["bytes_freed"] > 0
assert result["size_after"] < result["size_before"]
def test_shrink_minimal_keeps_only_essential(self, mock_profile):
result = shrink_profile(mock_profile, ShrinkLevel.MINIMAL)
# Only Network and Local Storage kept
assert set(result["kept"]) == {"Network", "Local Storage"}
# IndexedDB and Preferences removed at MINIMAL
assert "IndexedDB" in result["removed"]
assert "Preferences" in result["removed"]
def test_shrink_light_keeps_history(self, mock_profile):
result = shrink_profile(mock_profile, ShrinkLevel.LIGHT)
# History kept at LIGHT level
assert "History" in result["kept"]
# Caches still removed
assert "Cache" in result["removed"]
def test_shrink_medium_removes_history(self, mock_profile):
result = shrink_profile(mock_profile, ShrinkLevel.MEDIUM)
# History removed at MEDIUM
assert "History" in result["removed"]
assert "Favicons" in result["removed"]
# Auth still kept
assert "Network" in result["kept"]
def test_shrink_dry_run_no_changes(self, mock_profile):
size_before = _get_size(Path(mock_profile))
result = shrink_profile(mock_profile, ShrinkLevel.AGGRESSIVE, dry_run=True)
size_after = _get_size(Path(mock_profile))
assert size_before == size_after
assert result["size_after"] is None
assert len(result["removed"]) > 0 # Still reports what would be removed
def test_shrink_nonexistent_profile_raises(self):
with pytest.raises(ValueError, match="Profile not found"):
shrink_profile("/nonexistent/path", ShrinkLevel.AGGRESSIVE)
def test_shrink_empty_profile(self, tmp_path):
empty_profile = tmp_path / "empty"
empty_profile.mkdir()
result = shrink_profile(str(empty_profile), ShrinkLevel.AGGRESSIVE)
assert result["removed"] == []
assert result["kept"] == []
assert result["errors"] == []
class TestBrowserProfilerShrink:
"""Test BrowserProfiler.shrink() method."""
@pytest.fixture
def profiler(self):
return BrowserProfiler()
@pytest.fixture
def mock_profile_in_profiles_dir(self, profiler, tmp_path):
"""Create a mock profile in the profiler's profiles directory."""
# Temporarily override profiles_dir
original_dir = profiler.profiles_dir
profiler.profiles_dir = str(tmp_path)
profile = tmp_path / "test_profile"
profile.mkdir()
(profile / "Network").mkdir()
(profile / "Network" / "Cookies").write_text("cookies")
(profile / "Cache").mkdir()
(profile / "Cache" / "data").write_bytes(b"x" * 1000)
(profile / "Preferences").write_text("{}")
yield "test_profile", str(profile)
# Cleanup
profiler.profiles_dir = original_dir
def test_shrink_by_name(self, profiler, mock_profile_in_profiles_dir):
name, path = mock_profile_in_profiles_dir
result = profiler.shrink(name, ShrinkLevel.AGGRESSIVE)
assert "Cache" in result["removed"]
assert "Network" in result["kept"]
assert "Preferences" in result["kept"]
def test_shrink_by_path(self, profiler, mock_profile_in_profiles_dir):
_, path = mock_profile_in_profiles_dir
result = profiler.shrink(path, ShrinkLevel.AGGRESSIVE)
assert "Cache" in result["removed"]
def test_shrink_nonexistent_raises(self, profiler):
with pytest.raises(ValueError, match="Profile not found"):
profiler.shrink("nonexistent_profile")
class TestKeepPatterns:
"""Test that KEEP_PATTERNS are correctly defined."""
def test_aggressive_keeps_auth_essentials(self):
keep = KEEP_PATTERNS[ShrinkLevel.AGGRESSIVE]
assert "Network" in keep # Cookies (Chrome 96+)
assert "Cookies" in keep # Cookies (older Chrome)
assert "Local Storage" in keep # JWT/tokens
assert "IndexedDB" in keep # Some sites use this
assert "Preferences" in keep # Profile identity
def test_minimal_is_subset_of_aggressive(self):
minimal = KEEP_PATTERNS[ShrinkLevel.MINIMAL]
aggressive = KEEP_PATTERNS[ShrinkLevel.AGGRESSIVE]
assert minimal.issubset(aggressive)
def test_aggressive_is_subset_of_medium(self):
aggressive = KEEP_PATTERNS[ShrinkLevel.AGGRESSIVE]
medium = KEEP_PATTERNS[ShrinkLevel.MEDIUM]
assert aggressive.issubset(medium)
def test_medium_is_subset_of_light(self):
medium = KEEP_PATTERNS[ShrinkLevel.MEDIUM]
light = KEEP_PATTERNS[ShrinkLevel.LIGHT]
assert medium.issubset(light)
class TestIntegrationWithPlaywright:
"""Integration tests using real Playwright browser.
These tests verify that auth data survives shrinking and the browser
can still launch successfully after shrinking.
"""
@staticmethod
async def _create_seeded_profile(profile_path: str) -> str:
"""Create a real profile with seeded auth data using Playwright."""
from playwright.async_api import async_playwright
async with async_playwright() as p:
browser = await p.chromium.launch_persistent_context(
profile_path,
headless=True,
)
page = await browser.new_page()
# Navigate to a real site to enable localStorage/cookies
try:
await page.goto("https://example.com", timeout=15000)
except Exception:
# Fallback to about:blank which still allows localStorage
await page.goto("about:blank")
# Seed test data (localStorage works on any origin)
await page.evaluate("""
() => {
localStorage.setItem('jwt', 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9');
localStorage.setItem('refresh', 'refresh_token_abc');
}
""")
await browser.close()
return profile_path
@pytest.mark.asyncio
async def test_browser_launches_after_aggressive_shrink(self, tmp_path):
"""Verify browser can launch after aggressive shrinking."""
pytest.importorskip("playwright")
from playwright.async_api import async_playwright
profile_path = str(tmp_path / "playwright_profile")
await self._create_seeded_profile(profile_path)
# Shrink the profile
result = shrink_profile(profile_path, ShrinkLevel.AGGRESSIVE)
assert result["bytes_freed"] >= 0
# Verify browser launches and localStorage survives
async with async_playwright() as p:
browser = await p.chromium.launch_persistent_context(
profile_path,
headless=True,
)
page = await browser.new_page()
# Navigate to same origin to access localStorage
try:
await page.goto("https://example.com", timeout=15000)
except Exception:
await page.goto("about:blank")
# Verify localStorage survived
jwt = await page.evaluate("localStorage.getItem('jwt')")
assert jwt == "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9"
refresh = await page.evaluate("localStorage.getItem('refresh')")
assert refresh == "refresh_token_abc"
await browser.close()
@pytest.mark.asyncio
async def test_browser_launches_after_minimal_shrink(self, tmp_path):
"""Verify browser launches after minimal shrinking (most aggressive)."""
pytest.importorskip("playwright")
from playwright.async_api import async_playwright
profile_path = str(tmp_path / "playwright_profile")
await self._create_seeded_profile(profile_path)
# Shrink to minimal
result = shrink_profile(profile_path, ShrinkLevel.MINIMAL)
assert result["bytes_freed"] >= 0
# Verify browser still launches
async with async_playwright() as p:
browser = await p.chromium.launch_persistent_context(
profile_path,
headless=True,
)
page = await browser.new_page()
# Navigate to same origin to access localStorage
try:
await page.goto("https://example.com", timeout=15000)
except Exception:
await page.goto("about:blank")
# localStorage should still work
jwt = await page.evaluate("localStorage.getItem('jwt')")
assert jwt == "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9"
await browser.close()
@pytest.mark.asyncio
async def test_shrink_actually_reduces_size(self, tmp_path):
"""Verify shrinking actually reduces profile size."""
pytest.importorskip("playwright")
profile_path = str(tmp_path / "playwright_profile")
await self._create_seeded_profile(profile_path)
size_before = _get_size(Path(profile_path))
result = shrink_profile(profile_path, ShrinkLevel.AGGRESSIVE)
size_after = _get_size(Path(profile_path))
# Profile should be smaller (or same if no cache was generated)
assert size_after <= size_before
assert result["size_before"] == size_before
assert result["size_after"] == size_after
class TestCLIIntegration:
"""Test CLI command integration."""
def test_cli_import(self):
"""Verify CLI imports work."""
from crawl4ai.cli import shrink_cmd
assert callable(shrink_cmd)
def test_shrink_level_import(self):
"""Verify ShrinkLevel can be imported from cli."""
from crawl4ai.browser_profiler import ShrinkLevel
assert ShrinkLevel.AGGRESSIVE.value == "aggressive"
class TestEdgeCases:
"""Edge case tests to ensure robustness."""
def test_shrink_profile_with_symlinks(self, tmp_path):
"""Test shrinking profile with symlinks doesn't follow them."""
profile = tmp_path / "profile"
profile.mkdir()
(profile / "Local Storage").mkdir()
(profile / "Cache").mkdir()
(profile / "Cache" / "data").write_bytes(b"x" * 1000)
# Create symlink pointing outside profile
external_dir = tmp_path / "external"
external_dir.mkdir()
important_file = external_dir / "important.txt"
important_file.write_text("DO NOT DELETE")
# Symlink inside Cache pointing to external
symlink = profile / "Cache" / "external_link"
symlink.symlink_to(external_dir)
result = shrink_profile(str(profile), ShrinkLevel.AGGRESSIVE)
# External file should NOT be deleted
assert important_file.exists(), "Symlink target was deleted!"
assert "Cache" in result["removed"]
def test_shrink_with_special_characters_in_names(self, tmp_path):
"""Test shrinking handles special chars in filenames."""
profile = tmp_path / "profile"
profile.mkdir()
# Create dirs/files with special characters
(profile / "Local Storage").mkdir()
(profile / "Cache (old)").mkdir()
(profile / "Cache (old)" / "data").write_bytes(b"x" * 100)
(profile / "Test[1]").mkdir()
(profile / "Test[1]" / "file").write_bytes(b"y" * 100)
(profile / "Spaced Name").mkdir()
(profile / "file with spaces.txt").write_bytes(b"z" * 50)
result = shrink_profile(str(profile), ShrinkLevel.AGGRESSIVE)
assert "Cache (old)" in result["removed"]
assert "Test[1]" in result["removed"]
assert "Spaced Name" in result["removed"]
assert "file with spaces.txt" in result["removed"]
assert "Local Storage" in result["kept"]
def test_shrink_with_unicode_filenames(self, tmp_path):
"""Test shrinking handles unicode filenames."""
profile = tmp_path / "profile"
profile.mkdir()
(profile / "Local Storage").mkdir()
(profile / "Кэш").mkdir() # Russian "Cache"
(profile / "Кэш" / "данные").write_bytes(b"x" * 100)
(profile / "缓存").mkdir() # Chinese "Cache"
(profile / "キャッシュ").mkdir() # Japanese "Cache"
(profile / "émojis_🎉").mkdir()
result = shrink_profile(str(profile), ShrinkLevel.AGGRESSIVE)
assert "Local Storage" in result["kept"]
assert len(result["removed"]) >= 4
def test_shrink_with_hidden_files(self, tmp_path):
"""Test shrinking handles hidden (dot) files."""
profile = tmp_path / "profile"
profile.mkdir()
(profile / "Local Storage").mkdir()
(profile / ".hidden_cache").mkdir()
(profile / ".hidden_cache" / "data").write_bytes(b"x" * 1000)
(profile / ".DS_Store").write_bytes(b"y" * 100)
(profile / ".git").mkdir()
result = shrink_profile(str(profile), ShrinkLevel.AGGRESSIVE)
# Hidden files should be removed (not in keep list)
assert ".hidden_cache" in result["removed"]
assert ".DS_Store" in result["removed"]
assert ".git" in result["removed"]
def test_shrink_with_empty_directories(self, tmp_path):
"""Test shrinking handles empty directories."""
profile = tmp_path / "profile"
profile.mkdir()
(profile / "Local Storage").mkdir()
(profile / "Empty Cache").mkdir()
(profile / "Another Empty").mkdir()
(profile / "Nested").mkdir()
(profile / "Nested" / "Also Empty").mkdir()
result = shrink_profile(str(profile), ShrinkLevel.AGGRESSIVE)
assert "Empty Cache" in result["removed"]
assert "Another Empty" in result["removed"]
assert "Nested" in result["removed"]
assert not (profile / "Empty Cache").exists()
def test_shrink_twice_same_profile(self, tmp_path):
"""Test shrinking same profile twice is idempotent."""
profile = tmp_path / "profile"
profile.mkdir()
(profile / "Local Storage").mkdir()
(profile / "Local Storage" / "data").write_bytes(b"x" * 100)
(profile / "Cache").mkdir()
(profile / "Cache" / "data").write_bytes(b"y" * 1000)
# First shrink
result1 = shrink_profile(str(profile), ShrinkLevel.AGGRESSIVE)
assert "Cache" in result1["removed"]
assert result1["bytes_freed"] > 0
# Second shrink - should be no-op
result2 = shrink_profile(str(profile), ShrinkLevel.AGGRESSIVE)
assert result2["removed"] == []
assert result2["bytes_freed"] == 0
assert "Local Storage" in result2["kept"]
def test_shrink_preserves_storage_state_json(self, tmp_path):
"""Test that storage_state.json is preserved."""
profile = tmp_path / "profile"
profile.mkdir()
# storage_state.json should be kept (starts with no pattern but is important)
(profile / "storage_state.json").write_text('{"cookies": []}')
(profile / "Local Storage").mkdir()
(profile / "Cache").mkdir()
result = shrink_profile(str(profile), ShrinkLevel.AGGRESSIVE)
# storage_state.json doesn't match keep patterns, so it gets removed
# This is expected - the shrink function preserves Chrome's auth files,
# not Crawl4AI's exported state file
# If we want to keep it, we need to add it to KEEP_PATTERNS
def test_shrink_with_very_deep_nesting(self, tmp_path):
"""Test shrinking deeply nested directories."""
profile = tmp_path / "profile"
profile.mkdir()
(profile / "Local Storage").mkdir()
# Create deeply nested cache
deep = profile / "Cache"
for i in range(20):
deep = deep / f"level_{i}"
deep.mkdir(parents=True)
(deep / "deep_file.txt").write_bytes(b"x" * 100)
result = shrink_profile(str(profile), ShrinkLevel.AGGRESSIVE)
assert "Cache" in result["removed"]
assert not (profile / "Cache").exists()
def test_shrink_with_large_files(self, tmp_path):
"""Test shrinking handles large files efficiently."""
profile = tmp_path / "profile"
profile.mkdir()
(profile / "Local Storage").mkdir()
(profile / "Cache").mkdir()
# Create a 10MB file
large_file = profile / "Cache" / "large_file.bin"
large_file.write_bytes(b"x" * (10 * 1024 * 1024))
size_before = _get_size(profile)
result = shrink_profile(str(profile), ShrinkLevel.AGGRESSIVE)
size_after = _get_size(profile)
assert result["bytes_freed"] >= 10 * 1024 * 1024
assert size_after < size_before
def test_shrink_with_read_only_files(self, tmp_path):
"""Test shrinking handles read-only files gracefully."""
import stat
profile = tmp_path / "profile"
profile.mkdir()
(profile / "Local Storage").mkdir()
cache = profile / "Cache"
cache.mkdir()
readonly_file = cache / "readonly.txt"
readonly_file.write_bytes(b"x" * 100)
# Make file read-only
readonly_file.chmod(stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH)
try:
result = shrink_profile(str(profile), ShrinkLevel.AGGRESSIVE)
# On some systems this will succeed, on others it will error
# Either way, it shouldn't crash
if result["errors"]:
assert "Cache" in str(result["errors"][0]) or len(result["errors"]) > 0
finally:
# Restore permissions for cleanup
try:
readonly_file.chmod(stat.S_IRWXU)
except:
pass
def test_shrink_with_many_small_files(self, tmp_path):
"""Test shrinking handles many small files efficiently."""
profile = tmp_path / "profile"
profile.mkdir()
(profile / "Local Storage").mkdir()
cache = profile / "Cache"
cache.mkdir()
# Create 1000 small files
for i in range(1000):
(cache / f"file_{i:04d}.txt").write_bytes(b"x" * 100)
result = shrink_profile(str(profile), ShrinkLevel.AGGRESSIVE)
assert "Cache" in result["removed"]
assert result["bytes_freed"] >= 100 * 1000
assert not cache.exists()
def test_shrink_default_subdirectory_structure(self, tmp_path):
"""Test shrinking when profile has Default/ subdirectory."""
profile = tmp_path / "profile"
profile.mkdir()
# Chrome-style structure with Default/
default = profile / "Default"
default.mkdir()
(default / "Local Storage").mkdir()
(default / "Local Storage" / "leveldb").mkdir()
(default / "Cookies").write_bytes(b"cookies" * 100)
(default / "Cache").mkdir()
(default / "Cache" / "data").write_bytes(b"x" * 10000)
(default / "GPUCache").mkdir()
(default / "GPUCache" / "data").write_bytes(b"y" * 5000)
result = shrink_profile(str(profile), ShrinkLevel.AGGRESSIVE)
# Should shrink inside Default/
assert "Cache" in result["removed"]
assert "GPUCache" in result["removed"]
assert "Local Storage" in result["kept"]
assert "Cookies" in result["kept"]
assert (default / "Local Storage").exists()
assert (default / "Cookies").exists()
assert not (default / "Cache").exists()
def test_shrink_mixed_files_and_directories(self, tmp_path):
"""Test shrinking mix of files and directories."""
profile = tmp_path / "profile"
profile.mkdir()
(profile / "Local Storage").mkdir()
(profile / "Preferences").write_text("{}")
(profile / "Cookies").write_bytes(b"x" * 500)
(profile / "Cookies-journal").write_bytes(b"y" * 100)
(profile / "History").write_bytes(b"z" * 1000)
(profile / "Cache").mkdir()
(profile / "random_file.txt").write_bytes(b"a" * 200)
result = shrink_profile(str(profile), ShrinkLevel.AGGRESSIVE)
# Files and dirs properly categorized
assert "Local Storage" in result["kept"]
assert "Preferences" in result["kept"]
assert "Cookies" in result["kept"]
assert "Cookies-journal" in result["kept"]
assert "History" in result["removed"]
assert "Cache" in result["removed"]
assert "random_file.txt" in result["removed"]
def test_shrink_level_none_is_noop(self, tmp_path):
"""Test ShrinkLevel.NONE does absolutely nothing."""
profile = tmp_path / "profile"
profile.mkdir()
(profile / "Cache").mkdir()
(profile / "Cache" / "data").write_bytes(b"x" * 1000)
size_before = _get_size(profile)
result = shrink_profile(str(profile), ShrinkLevel.NONE)
size_after = _get_size(profile)
assert result["removed"] == []
assert result["kept"] == []
assert result["bytes_freed"] == 0
assert size_before == size_after
def test_shrink_result_sizes_are_accurate(self, tmp_path):
"""Test that reported sizes match actual sizes."""
profile = tmp_path / "profile"
profile.mkdir()
(profile / "Local Storage").mkdir()
(profile / "Local Storage" / "data").write_bytes(b"k" * 500)
(profile / "Cache").mkdir()
(profile / "Cache" / "data").write_bytes(b"x" * 2000)
actual_before = _get_size(profile)
result = shrink_profile(str(profile), ShrinkLevel.AGGRESSIVE)
actual_after = _get_size(profile)
assert result["size_before"] == actual_before
assert result["size_after"] == actual_after
assert result["size_before"] - result["size_after"] == result["bytes_freed"]
def test_shrink_all_levels_progressively_smaller(self, tmp_path):
"""Test that stricter levels remove more data."""
def create_full_profile(path):
path.mkdir(exist_ok=True)
(path / "Network").mkdir(exist_ok=True)
(path / "Cookies").write_bytes(b"c" * 100)
(path / "Local Storage").mkdir(exist_ok=True)
(path / "IndexedDB").mkdir(exist_ok=True)
(path / "Preferences").write_text("{}")
(path / "History").write_bytes(b"h" * 500)
(path / "Bookmarks").write_text("[]")
(path / "Cache").mkdir(exist_ok=True)
(path / "Cache" / "data").write_bytes(b"x" * 2000)
results = {}
for level in [ShrinkLevel.LIGHT, ShrinkLevel.MEDIUM,
ShrinkLevel.AGGRESSIVE, ShrinkLevel.MINIMAL]:
profile = tmp_path / f"profile_{level.value}"
create_full_profile(profile)
results[level] = shrink_profile(str(profile), level)
# Stricter levels should remove more
assert len(results[ShrinkLevel.LIGHT]["kept"]) >= len(results[ShrinkLevel.MEDIUM]["kept"])
assert len(results[ShrinkLevel.MEDIUM]["kept"]) >= len(results[ShrinkLevel.AGGRESSIVE]["kept"])
assert len(results[ShrinkLevel.AGGRESSIVE]["kept"]) >= len(results[ShrinkLevel.MINIMAL]["kept"])
def test_shrink_with_broken_symlinks(self, tmp_path):
"""Test shrinking handles broken symlinks."""
profile = tmp_path / "profile"
profile.mkdir()
(profile / "Local Storage").mkdir()
(profile / "Cache").mkdir()
# Create broken symlink
broken_link = profile / "Cache" / "broken_link"
broken_link.symlink_to("/nonexistent/path/that/does/not/exist")
result = shrink_profile(str(profile), ShrinkLevel.AGGRESSIVE)
assert "Cache" in result["removed"]
assert not (profile / "Cache").exists()
def test_shrink_dry_run_reports_would_free(self, tmp_path):
"""Test dry run accurately reports what would be freed."""
profile = tmp_path / "profile"
profile.mkdir()
(profile / "Local Storage").mkdir()
(profile / "Cache").mkdir()
(profile / "Cache" / "data").write_bytes(b"x" * 5000)
dry_result = shrink_profile(str(profile), ShrinkLevel.AGGRESSIVE, dry_run=True)
# Nothing should be removed yet
assert (profile / "Cache").exists()
assert dry_result["size_after"] is None
# Actually shrink
real_result = shrink_profile(str(profile), ShrinkLevel.AGGRESSIVE)
# Dry run should have predicted the freed bytes
assert dry_result["bytes_freed"] == real_result["bytes_freed"]
assert dry_result["removed"] == real_result["removed"]
class TestBrowserProfilerEdgeCases:
"""Edge cases for BrowserProfiler.shrink() method."""
def test_profiler_shrink_relative_path(self, tmp_path):
"""Test profiler.shrink with profile name resolution."""
profiler = BrowserProfiler()
original_dir = profiler.profiles_dir
profiler.profiles_dir = str(tmp_path)
try:
profile = tmp_path / "test_profile"
profile.mkdir()
(profile / "Preferences").write_text("{}")
(profile / "Cache").mkdir()
(profile / "Cache" / "data").write_bytes(b"x" * 100)
result = profiler.shrink("test_profile", ShrinkLevel.AGGRESSIVE)
assert "Cache" in result["removed"]
finally:
profiler.profiles_dir = original_dir
def test_profiler_shrink_absolute_path(self, tmp_path):
"""Test profiler.shrink with absolute path."""
profiler = BrowserProfiler()
profile = tmp_path / "absolute_profile"
profile.mkdir()
(profile / "Preferences").write_text("{}")
(profile / "Cache").mkdir()
result = profiler.shrink(str(profile), ShrinkLevel.AGGRESSIVE)
assert "Cache" in result["removed"]
def test_profiler_shrink_invalid_name(self):
"""Test profiler.shrink with invalid profile name."""
profiler = BrowserProfiler()
with pytest.raises(ValueError, match="Profile not found"):
profiler.shrink("definitely_nonexistent_profile_12345")
class TestStressAndCornerCases:
"""Stress tests and extreme corner cases."""
def test_shrink_file_instead_of_directory(self, tmp_path):
"""Test shrinking a file (not directory) raises error."""
file_path = tmp_path / "not_a_profile.txt"
file_path.write_text("I am a file")
with pytest.raises(ValueError, match="Profile not found"):
shrink_profile(str(file_path), ShrinkLevel.AGGRESSIVE)
def test_shrink_with_circular_symlinks(self, tmp_path):
"""Test shrinking handles circular symlinks gracefully."""
profile = tmp_path / "profile"
profile.mkdir()
(profile / "Local Storage").mkdir()
cache = profile / "Cache"
cache.mkdir()
# Create circular symlink: Cache/link -> Cache
circular = cache / "circular"
circular.symlink_to(cache)
# Should not hang or crash
result = shrink_profile(str(profile), ShrinkLevel.AGGRESSIVE)
assert "Cache" in result["removed"]
def test_shrink_with_very_long_filenames(self, tmp_path):
"""Test shrinking handles very long filenames."""
profile = tmp_path / "profile"
profile.mkdir()
(profile / "Local Storage").mkdir()
# Create file with very long name (near filesystem limit)
long_name = "a" * 200 # Most filesystems support 255 chars
(profile / long_name).write_bytes(b"x" * 100)
result = shrink_profile(str(profile), ShrinkLevel.AGGRESSIVE)
assert long_name in result["removed"]
def test_shrink_profile_only_has_kept_items(self, tmp_path):
"""Test shrinking profile that only has items to keep."""
profile = tmp_path / "profile"
profile.mkdir()
(profile / "Local Storage").mkdir()
(profile / "Local Storage" / "leveldb").mkdir()
(profile / "Cookies").write_bytes(b"c" * 100)
(profile / "Preferences").write_text("{}")
(profile / "IndexedDB").mkdir()
result = shrink_profile(str(profile), ShrinkLevel.AGGRESSIVE)
assert result["removed"] == []
assert result["bytes_freed"] == 0
assert len(result["kept"]) == 4
def test_shrink_with_files_matching_keep_prefix(self, tmp_path):
"""Test that files starting with keep patterns are kept."""
profile = tmp_path / "profile"
profile.mkdir()
# These should be kept (match patterns)
(profile / "Local Storage").mkdir()
(profile / "Local Storage Extra").mkdir() # Starts with "Local Storage"
(profile / "Cookies").write_bytes(b"c" * 100)
(profile / "Cookies-journal").write_bytes(b"j" * 50)
(profile / "CookiesBackup").write_bytes(b"b" * 50) # Starts with "Cookies"
# This should be removed
(profile / "Cache").mkdir()
result = shrink_profile(str(profile), ShrinkLevel.AGGRESSIVE)
assert "Local Storage" in result["kept"]
assert "Local Storage Extra" in result["kept"]
assert "Cookies" in result["kept"]
assert "Cookies-journal" in result["kept"]
assert "CookiesBackup" in result["kept"]
assert "Cache" in result["removed"]
def test_shrink_calculates_size_correctly_with_nested_dirs(self, tmp_path):
"""Test size calculation is accurate for nested structures."""
profile = tmp_path / "profile"
profile.mkdir()
(profile / "Local Storage").mkdir()
# Create nested cache with known sizes
cache = profile / "Cache"
cache.mkdir()
(cache / "level1").mkdir()
(cache / "level1" / "level2").mkdir()
(cache / "level1" / "file1.bin").write_bytes(b"x" * 1000)
(cache / "level1" / "level2" / "file2.bin").write_bytes(b"y" * 2000)
(cache / "file0.bin").write_bytes(b"z" * 500)
expected_freed = 1000 + 2000 + 500 # Total bytes in Cache
result = shrink_profile(str(profile), ShrinkLevel.AGGRESSIVE)
assert result["bytes_freed"] == expected_freed
def test_shrink_empty_default_subdirectory(self, tmp_path):
"""Test shrinking when Default/ exists but is empty."""
profile = tmp_path / "profile"
profile.mkdir()
(profile / "Default").mkdir()
result = shrink_profile(str(profile), ShrinkLevel.AGGRESSIVE)
assert result["removed"] == []
assert result["kept"] == []
assert result["bytes_freed"] == 0
def test_shrink_with_both_root_and_default_structure(self, tmp_path):
"""Test when profile has items at root AND in Default/."""
profile = tmp_path / "profile"
profile.mkdir()
# Items at root level
(profile / "SomeRootFile.txt").write_bytes(b"r" * 100)
# Items in Default/
default = profile / "Default"
default.mkdir()
(default / "Local Storage").mkdir()
(default / "Cache").mkdir()
(default / "Cache" / "data").write_bytes(b"x" * 1000)
result = shrink_profile(str(profile), ShrinkLevel.AGGRESSIVE)
# Should shrink inside Default/, ignoring root level
assert "Cache" in result["removed"]
assert "Local Storage" in result["kept"]
# Root file should be untouched
assert (profile / "SomeRootFile.txt").exists()
def test_shrink_minimal_vs_aggressive_indexeddb(self, tmp_path):
"""Test that MINIMAL removes IndexedDB but AGGRESSIVE keeps it."""
def create_profile(path):
path.mkdir()
(path / "Local Storage").mkdir()
(path / "IndexedDB").mkdir()
(path / "IndexedDB" / "data").write_bytes(b"i" * 500)
# Test AGGRESSIVE
profile_agg = tmp_path / "aggressive"
create_profile(profile_agg)
result_agg = shrink_profile(str(profile_agg), ShrinkLevel.AGGRESSIVE)
assert "IndexedDB" in result_agg["kept"]
# Test MINIMAL
profile_min = tmp_path / "minimal"
create_profile(profile_min)
result_min = shrink_profile(str(profile_min), ShrinkLevel.MINIMAL)
assert "IndexedDB" in result_min["removed"]
def test_shrink_handles_oserror_gracefully(self, tmp_path):
"""Test that OSErrors during iteration don't crash the function."""
profile = tmp_path / "profile"
profile.mkdir()
(profile / "Local Storage").mkdir()
(profile / "Cache").mkdir()
(profile / "Cache" / "data").write_bytes(b"x" * 100)
# This should work without issues
result = shrink_profile(str(profile), ShrinkLevel.AGGRESSIVE)
assert result["errors"] == []
def test_format_size_edge_values(self):
"""Test _format_size with edge values."""
assert _format_size(0) == "0.0 B"
assert _format_size(1) == "1.0 B"
assert _format_size(1023) == "1023.0 B"
assert _format_size(1024) == "1.0 KB"
assert _format_size(1024 * 1024 - 1) == "1024.0 KB"
assert _format_size(1024 * 1024) == "1.0 MB"
assert _format_size(1024 * 1024 * 1024) == "1.0 GB"
assert _format_size(1024 * 1024 * 1024 * 1024) == "1.0 TB"
def test_get_size_with_permission_error(self, tmp_path):
"""Test _get_size handles permission errors gracefully."""
import stat
profile = tmp_path / "profile"
profile.mkdir()
restricted = profile / "restricted"
restricted.mkdir()
(restricted / "file.txt").write_bytes(b"x" * 100)
# Remove read permission on directory
restricted.chmod(stat.S_IWUSR)
try:
# Should not raise, should return partial size
size = _get_size(profile)
assert size >= 0
finally:
# Restore permissions
restricted.chmod(stat.S_IRWXU)
def test_shrink_with_cookies_in_network_subdirectory(self, tmp_path):
"""Test modern Chrome structure with Cookies in Network/."""
profile = tmp_path / "profile"
profile.mkdir()
# Chrome 96+ structure
network = profile / "Network"
network.mkdir()
(network / "Cookies").write_bytes(b"c" * 500)
(network / "TransportSecurity").write_bytes(b"t" * 100)
(profile / "Local Storage").mkdir()
(profile / "Cache").mkdir()
(profile / "Cache" / "data").write_bytes(b"x" * 1000)
result = shrink_profile(str(profile), ShrinkLevel.AGGRESSIVE)
assert "Network" in result["kept"]
assert "Local Storage" in result["kept"]
assert "Cache" in result["removed"]
assert (network / "Cookies").exists()