From 64f20ab44a2062d85fbc7761ce4f8692cbbc4f7a Mon Sep 17 00:00:00 2001 From: UncleCode Date: Fri, 28 Mar 2025 15:59:02 +0800 Subject: [PATCH] refactor(docker): update Dockerfile and browser strategy to use Chromium --- crawl4ai/browser/docker/alpine/launch.Dockerfile | 6 +++++- crawl4ai/browser/docker_utils.py | 2 +- crawl4ai/browser/strategies/docker_strategy.py | 10 +++++----- tests/browser/docker/test_docker_browser.py | 4 ++-- 4 files changed, 13 insertions(+), 9 deletions(-) diff --git a/crawl4ai/browser/docker/alpine/launch.Dockerfile b/crawl4ai/browser/docker/alpine/launch.Dockerfile index 60b20539..17e3c660 100644 --- a/crawl4ai/browser/docker/alpine/launch.Dockerfile +++ b/crawl4ai/browser/docker/alpine/launch.Dockerfile @@ -9,11 +9,15 @@ freetype \ harfbuzz \ ca-certificates \ - ttf-freefont && \ + ttf-freefont \ + socat \ + curl && \ addgroup -S chromium && adduser -S chromium -G chromium && \ mkdir -p /data && chown chromium:chromium /data && \ rm -rf /var/cache/apk/* + ENV PATH="/usr/bin:/bin:/usr/sbin:/sbin" + # Switch to a non-root user for security USER chromium WORKDIR /home/chromium diff --git a/crawl4ai/browser/docker_utils.py b/crawl4ai/browser/docker_utils.py index 7ba48534..f93a51b9 100644 --- a/crawl4ai/browser/docker_utils.py +++ b/crawl4ai/browser/docker_utils.py @@ -501,7 +501,7 @@ class DockerUtils: bool: True if Chrome started successfully, False otherwise """ # Build Chrome command - chrome_cmd = ["google-chrome"] + chrome_cmd = ["chromium"] chrome_cmd.extend(browser_args) returncode, _, stderr = await self.exec_in_container( diff --git a/crawl4ai/browser/strategies/docker_strategy.py b/crawl4ai/browser/strategies/docker_strategy.py index 33e581be..ca7e314a 100644 --- a/crawl4ai/browser/strategies/docker_strategy.py +++ b/crawl4ai/browser/strategies/docker_strategy.py @@ -14,13 +14,13 @@ from ...async_configs import BrowserConfig from ..models import DockerConfig from ..docker_registry import DockerRegistry from ..docker_utils import DockerUtils -from .builtin import BuiltinBrowserStrategy +from .builtin import CDPBrowserStrategy -class DockerBrowserStrategy(BuiltinBrowserStrategy): +class DockerBrowserStrategy(CDPBrowserStrategy): """Docker-based browser strategy. - Extends the BuiltinBrowserStrategy to run browsers in Docker containers. + Extends the CDPBrowserStrategy to run browsers in Docker containers. Supports two modes: 1. "connect" - Uses a Docker image with Chrome already running 2. "launch" - Starts Chrome within the container with custom settings @@ -342,7 +342,7 @@ class DockerBrowserStrategy(BuiltinBrowserStrategy): # Get PIDs for later cleanup self.chrome_process_id = await self.docker_utils.get_process_id_in_container( - container_id, "chrome" + container_id, "chromium" ) self.socat_process_id = await self.docker_utils.get_process_id_in_container( container_id, "socat" @@ -396,7 +396,7 @@ class DockerBrowserStrategy(BuiltinBrowserStrategy): if self.config.light_mode: # Import here to avoid circular import - from .utils import get_browser_disable_options + from ..utils import get_browser_disable_options args.extend(get_browser_disable_options()) if self.config.user_data_dir: diff --git a/tests/browser/docker/test_docker_browser.py b/tests/browser/docker/test_docker_browser.py index bd3c4348..610a230e 100644 --- a/tests/browser/docker/test_docker_browser.py +++ b/tests/browser/docker/test_docker_browser.py @@ -615,8 +615,8 @@ async def run_tests(): # Run browser tests # results.append(await test_docker_connect_mode()) - results.append(await test_docker_launch_mode()) - results.append(await test_docker_persistent_storage()) + # results.append(await test_docker_launch_mode()) + # results.append(await test_docker_persistent_storage()) results.append(await test_docker_parallel_pages()) results.append(await test_docker_registry_reuse())