refactor(browser): improve browser strategy architecture and lifecycle management

Major refactoring of browser strategy implementations to improve code organization and reliability:
- Move CrawlResultContainer and RunManyReturn types from async_webcrawler to models.py
- Simplify browser lifecycle management in AsyncWebCrawler
- Standardize browser strategy interface with _generate_page method
- Improve headless mode handling and browser args construction
- Clean up Docker and Playwright strategy implementations
- Fix session management and context handling across strategies

BREAKING CHANGE: Browser strategy interface has changed with new _generate_page method requirement
This commit is contained in:
UncleCode
2025-03-30 20:58:39 +08:00
parent 3ff7eec8f3
commit bb02398086
11 changed files with 271 additions and 459 deletions

View File

@@ -530,7 +530,7 @@ async def test_docker_registry_reuse():
logger.info("First browser started successfully", tag="TEST")
# Get container ID from the strategy
docker_strategy1 = manager1._strategy
docker_strategy1 = manager1.strategy
container_id1 = docker_strategy1.container_id
logger.info(f"First browser container ID: {container_id1[:12]}", tag="TEST")
@@ -560,7 +560,7 @@ async def test_docker_registry_reuse():
logger.info("Second browser started successfully", tag="TEST")
# Get container ID from the second strategy
docker_strategy2 = manager2._strategy
docker_strategy2 = manager2.strategy
container_id2 = docker_strategy2.container_id
logger.info(f"Second browser container ID: {container_id2[:12]}", tag="TEST")

View File

@@ -56,13 +56,13 @@ async def test_builtin_browser_creation():
# Step 2: Check if we have a BuiltinBrowserStrategy
print(f"\n{INFO}2. Checking if we have a BuiltinBrowserStrategy{RESET}")
if isinstance(manager._strategy, BuiltinBrowserStrategy):
if isinstance(manager.strategy, BuiltinBrowserStrategy):
print(
f"{SUCCESS}Correct strategy type: {manager._strategy.__class__.__name__}{RESET}"
f"{SUCCESS}Correct strategy type: {manager.strategy.__class__.__name__}{RESET}"
)
else:
print(
f"{ERROR}Wrong strategy type: {manager._strategy.__class__.__name__}{RESET}"
f"{ERROR}Wrong strategy type: {manager.strategy.__class__.__name__}{RESET}"
)
return None
@@ -77,7 +77,7 @@ async def test_builtin_browser_creation():
# Step 4: Get browser info from the strategy
print(f"\n{INFO}4. Getting browser information{RESET}")
browser_info = manager._strategy.get_browser_info()
browser_info = manager.strategy.get_browser_info()
if browser_info:
print(f"{SUCCESS}Browser info retrieved:{RESET}")
for key, value in browser_info.items():
@@ -149,7 +149,7 @@ async def test_browser_status_management(manager: BrowserManager):
# Step 1: Get browser status
print(f"\n{INFO}1. Getting browser status{RESET}")
try:
status = await manager._strategy.get_builtin_browser_status()
status = await manager.strategy.get_builtin_browser_status()
print(f"{SUCCESS}Browser status:{RESET}")
print(f" Running: {status['running']}")
print(f" CDP URL: {status['cdp_url']}")
@@ -160,7 +160,7 @@ async def test_browser_status_management(manager: BrowserManager):
# Step 2: Test killing the browser
print(f"\n{INFO}2. Testing killing the browser{RESET}")
try:
result = await manager._strategy.kill_builtin_browser()
result = await manager.strategy.kill_builtin_browser()
if result:
print(f"{SUCCESS}Browser killed successfully{RESET}")
else:
@@ -172,7 +172,7 @@ async def test_browser_status_management(manager: BrowserManager):
# Step 3: Check status after kill
print(f"\n{INFO}3. Checking status after kill{RESET}")
try:
status = await manager._strategy.get_builtin_browser_status()
status = await manager.strategy.get_builtin_browser_status()
if not status["running"]:
print(f"{SUCCESS}Browser is correctly reported as not running{RESET}")
else:
@@ -184,7 +184,7 @@ async def test_browser_status_management(manager: BrowserManager):
# Step 4: Launch a new browser
print(f"\n{INFO}4. Launching a new browser{RESET}")
try:
cdp_url = await manager._strategy.launch_builtin_browser(
cdp_url = await manager.strategy.launch_builtin_browser(
browser_type="chromium", headless=True
)
if cdp_url:
@@ -223,8 +223,8 @@ async def test_multiple_managers():
print(f"{SUCCESS}Second manager started{RESET}")
# Check if they got the same CDP URL
cdp_url1 = manager1._strategy.config.cdp_url
cdp_url2 = manager2._strategy.config.cdp_url
cdp_url1 = manager1.strategy.config.cdp_url
cdp_url2 = manager2.strategy.config.cdp_url
if cdp_url1 == cdp_url2:
print(
@@ -316,7 +316,7 @@ async def test_edge_cases():
# Kill the browser directly
print(f"{INFO}Killing the browser...{RESET}")
await manager._strategy.kill_builtin_browser()
await manager.strategy.kill_builtin_browser()
print(f"{SUCCESS}Browser killed{RESET}")
# Try to get a page (should fail or launch a new browser)
@@ -350,7 +350,7 @@ async def cleanup_browsers():
try:
# No need to start, just access the strategy directly
strategy = manager._strategy
strategy = manager.strategy
if isinstance(strategy, BuiltinBrowserStrategy):
result = await strategy.kill_builtin_browser()
if result:
@@ -420,7 +420,7 @@ async def test_performance_scaling():
user_data_dir=os.path.join(temp_dir, f"browser_profile_{i}"),
)
manager = BrowserManager(browser_config=browser_config, logger=logger)
manager._strategy.shutting_down = True
manager.strategy.shutting_down = True
manager_configs.append((manager, i, port))
# Define async function to start a single manager
@@ -614,7 +614,7 @@ async def test_performance_scaling_lab( num_browsers: int = 10, pages_per_browse
user_data_dir=os.path.join(temp_dir, f"browser_profile_{i}"),
)
manager = BrowserManager(browser_config=browser_config, logger=logger)
manager._strategy.shutting_down = True
manager.strategy.shutting_down = True
manager_configs.append((manager, i, port))
# Define async function to start a single manager