138 lines
3.9 KiB
Docker
138 lines
3.9 KiB
Docker
FROM python:3.10-slim
|
|
|
|
# Set build arguments
|
|
ARG APP_HOME=/app
|
|
ARG GITHUB_REPO=https://github.com/unclecode/crawl4ai.git
|
|
ARG GITHUB_BRANCH=next
|
|
ARG USE_LOCAL=False
|
|
ARG CONFIG_PATH=""
|
|
|
|
ENV PYTHONFAULTHANDLER=1 \
|
|
PYTHONHASHSEED=random \
|
|
PYTHONUNBUFFERED=1 \
|
|
PIP_NO_CACHE_DIR=1 \
|
|
PYTHONDONTWRITEBYTECODE=1 \
|
|
PIP_DISABLE_PIP_VERSION_CHECK=1 \
|
|
PIP_DEFAULT_TIMEOUT=100 \
|
|
DEBIAN_FRONTEND=noninteractive \
|
|
REDIS_HOST=localhost \
|
|
REDIS_PORT=6379
|
|
|
|
ARG PYTHON_VERSION=3.10
|
|
ARG INSTALL_TYPE=default
|
|
ARG ENABLE_GPU=false
|
|
ARG TARGETARCH
|
|
|
|
LABEL maintainer="unclecode"
|
|
LABEL description="🔥🕷️ Crawl4AI: Open-source LLM Friendly Web Crawler & scraper"
|
|
LABEL version="1.0"
|
|
|
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
build-essential \
|
|
curl \
|
|
wget \
|
|
gnupg \
|
|
git \
|
|
cmake \
|
|
pkg-config \
|
|
python3-dev \
|
|
libjpeg-dev \
|
|
redis-server \
|
|
supervisor \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
libglib2.0-0 \
|
|
libnss3 \
|
|
libnspr4 \
|
|
libatk1.0-0 \
|
|
libatk-bridge2.0-0 \
|
|
libcups2 \
|
|
libdrm2 \
|
|
libdbus-1-3 \
|
|
libxcb1 \
|
|
libxkbcommon0 \
|
|
libx11-6 \
|
|
libxcomposite1 \
|
|
libxdamage1 \
|
|
libxext6 \
|
|
libxfixes3 \
|
|
libxrandr2 \
|
|
libgbm1 \
|
|
libpango-1.0-0 \
|
|
libcairo2 \
|
|
libasound2 \
|
|
libatspi2.0-0 \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
RUN if [ "$ENABLE_GPU" = "true" ] && [ "$TARGETARCH" = "amd64" ] ; then \
|
|
apt-get update && apt-get install -y --no-install-recommends \
|
|
nvidia-cuda-toolkit \
|
|
&& rm -rf /var/lib/apt/lists/* ; \
|
|
else \
|
|
echo "Skipping NVIDIA CUDA Toolkit installation (unsupported platform or GPU disabled)"; \
|
|
fi
|
|
|
|
RUN if [ "$TARGETARCH" = "arm64" ]; then \
|
|
echo "🦾 Installing ARM-specific optimizations"; \
|
|
apt-get update && apt-get install -y --no-install-recommends \
|
|
libopenblas-dev \
|
|
&& rm -rf /var/lib/apt/lists/*; \
|
|
elif [ "$TARGETARCH" = "amd64" ]; then \
|
|
echo "🖥️ Installing AMD64-specific optimizations"; \
|
|
apt-get update && apt-get install -y --no-install-recommends \
|
|
libomp-dev \
|
|
&& rm -rf /var/lib/apt/lists/*; \
|
|
else \
|
|
echo "Skipping platform-specific optimizations (unsupported platform)"; \
|
|
fi
|
|
|
|
WORKDIR ${APP_HOME}
|
|
|
|
RUN git clone --branch ${GITHUB_BRANCH} ${GITHUB_REPO} /tmp/crawl4ai
|
|
|
|
COPY docker/supervisord.conf .
|
|
COPY docker/requirements.txt .
|
|
|
|
RUN pip install --no-cache-dir -r requirements.txt
|
|
|
|
RUN if [ "$INSTALL_TYPE" = "all" ] ; then \
|
|
pip install "/tmp/crawl4ai/[all]" && \
|
|
python -m nltk.downloader punkt stopwords && \
|
|
python -m crawl4ai.model_loader ; \
|
|
elif [ "$INSTALL_TYPE" = "torch" ] ; then \
|
|
pip install "/tmp/crawl4ai/[torch]" ; \
|
|
elif [ "$INSTALL_TYPE" = "transformer" ] ; then \
|
|
pip install "/tmp/crawl4ai/[transformer]" && \
|
|
python -m crawl4ai.model_loader ; \
|
|
else \
|
|
pip install "/tmp/crawl4ai" ; \
|
|
fi
|
|
|
|
RUN pip install --no-cache-dir --upgrade pip && \
|
|
python -c "import crawl4ai; print('✅ crawl4ai is ready to rock!')" && \
|
|
python -c "from playwright.sync_api import sync_playwright; print('✅ Playwright is feeling dramatic!')"
|
|
|
|
RUN playwright install --with-deps chromium
|
|
|
|
COPY docker/* ${APP_HOME}/
|
|
RUN if [ -n "$CONFIG_PATH" ] && [ -f "$CONFIG_PATH" ]; then \
|
|
echo "Using custom config from $CONFIG_PATH" && \
|
|
cp $CONFIG_PATH /app/config.yml; \
|
|
fi
|
|
|
|
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
|
CMD bash -c '\
|
|
MEM=$(free -m | awk "/^Mem:/{print \$2}"); \
|
|
if [ $MEM -lt 2048 ]; then \
|
|
echo "⚠️ Warning: Less than 2GB RAM available! Your container might need a memory boost! 🚀"; \
|
|
exit 1; \
|
|
fi && \
|
|
redis-cli ping > /dev/null && \
|
|
curl -f http://localhost:8000/health || exit 1'
|
|
|
|
# EXPOSE 6379
|
|
|
|
CMD ["supervisord", "-c", "supervisord.conf"]
|
|
|