Add JWT token-based authentication to Docker server and client. Refactor server architecture for better code organization and error handling. Move Dockerfile to root deploy directory and update configuration. Add comprehensive documentation and examples. BREAKING CHANGE: Docker server now requires authentication by default. Endpoints require JWT tokens when security.jwt_enabled is true in config.
138 lines
3.9 KiB
Docker
138 lines
3.9 KiB
Docker
FROM python:3.10-slim
|
|
|
|
# Set build arguments
|
|
ARG APP_HOME=/app
|
|
ARG GITHUB_REPO=https://github.com/unclecode/crawl4ai.git
|
|
ARG GITHUB_BRANCH=next
|
|
ARG USE_LOCAL=False
|
|
ARG CONFIG_PATH=""
|
|
|
|
ENV PYTHONFAULTHANDLER=1 \
|
|
PYTHONHASHSEED=random \
|
|
PYTHONUNBUFFERED=1 \
|
|
PIP_NO_CACHE_DIR=1 \
|
|
PYTHONDONTWRITEBYTECODE=1 \
|
|
PIP_DISABLE_PIP_VERSION_CHECK=1 \
|
|
PIP_DEFAULT_TIMEOUT=100 \
|
|
DEBIAN_FRONTEND=noninteractive \
|
|
REDIS_HOST=localhost \
|
|
REDIS_PORT=6379
|
|
|
|
ARG PYTHON_VERSION=3.10
|
|
ARG INSTALL_TYPE=default
|
|
ARG ENABLE_GPU=false
|
|
ARG TARGETARCH
|
|
|
|
LABEL maintainer="unclecode"
|
|
LABEL description="🔥🕷️ Crawl4AI: Open-source LLM Friendly Web Crawler & scraper"
|
|
LABEL version="1.0"
|
|
|
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
build-essential \
|
|
curl \
|
|
wget \
|
|
gnupg \
|
|
git \
|
|
cmake \
|
|
pkg-config \
|
|
python3-dev \
|
|
libjpeg-dev \
|
|
redis-server \
|
|
supervisor \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
libglib2.0-0 \
|
|
libnss3 \
|
|
libnspr4 \
|
|
libatk1.0-0 \
|
|
libatk-bridge2.0-0 \
|
|
libcups2 \
|
|
libdrm2 \
|
|
libdbus-1-3 \
|
|
libxcb1 \
|
|
libxkbcommon0 \
|
|
libx11-6 \
|
|
libxcomposite1 \
|
|
libxdamage1 \
|
|
libxext6 \
|
|
libxfixes3 \
|
|
libxrandr2 \
|
|
libgbm1 \
|
|
libpango-1.0-0 \
|
|
libcairo2 \
|
|
libasound2 \
|
|
libatspi2.0-0 \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
RUN if [ "$ENABLE_GPU" = "true" ] && [ "$TARGETARCH" = "amd64" ] ; then \
|
|
apt-get update && apt-get install -y --no-install-recommends \
|
|
nvidia-cuda-toolkit \
|
|
&& rm -rf /var/lib/apt/lists/* ; \
|
|
else \
|
|
echo "Skipping NVIDIA CUDA Toolkit installation (unsupported platform or GPU disabled)"; \
|
|
fi
|
|
|
|
RUN if [ "$TARGETARCH" = "arm64" ]; then \
|
|
echo "🦾 Installing ARM-specific optimizations"; \
|
|
apt-get update && apt-get install -y --no-install-recommends \
|
|
libopenblas-dev \
|
|
&& rm -rf /var/lib/apt/lists/*; \
|
|
elif [ "$TARGETARCH" = "amd64" ]; then \
|
|
echo "🖥️ Installing AMD64-specific optimizations"; \
|
|
apt-get update && apt-get install -y --no-install-recommends \
|
|
libomp-dev \
|
|
&& rm -rf /var/lib/apt/lists/*; \
|
|
else \
|
|
echo "Skipping platform-specific optimizations (unsupported platform)"; \
|
|
fi
|
|
|
|
WORKDIR ${APP_HOME}
|
|
|
|
RUN git clone --branch ${GITHUB_BRANCH} ${GITHUB_REPO} /tmp/crawl4ai
|
|
|
|
COPY docker/supervisord.conf .
|
|
COPY docker/requirements.txt .
|
|
|
|
RUN pip install --no-cache-dir -r requirements.txt
|
|
|
|
RUN if [ "$INSTALL_TYPE" = "all" ] ; then \
|
|
pip install "/tmp/crawl4ai/[all]" && \
|
|
python -m nltk.downloader punkt stopwords && \
|
|
python -m crawl4ai.model_loader ; \
|
|
elif [ "$INSTALL_TYPE" = "torch" ] ; then \
|
|
pip install "/tmp/crawl4ai/[torch]" ; \
|
|
elif [ "$INSTALL_TYPE" = "transformer" ] ; then \
|
|
pip install "/tmp/crawl4ai/[transformer]" && \
|
|
python -m crawl4ai.model_loader ; \
|
|
else \
|
|
pip install "/tmp/crawl4ai" ; \
|
|
fi
|
|
|
|
RUN pip install --no-cache-dir --upgrade pip && \
|
|
python -c "import crawl4ai; print('✅ crawl4ai is ready to rock!')" && \
|
|
python -c "from playwright.sync_api import sync_playwright; print('✅ Playwright is feeling dramatic!')"
|
|
|
|
RUN playwright install --with-deps chromium
|
|
|
|
COPY docker/* ${APP_HOME}/
|
|
RUN if [ -n "$CONFIG_PATH" ] && [ -f "$CONFIG_PATH" ]; then \
|
|
echo "Using custom config from $CONFIG_PATH" && \
|
|
cp $CONFIG_PATH /app/config.yml; \
|
|
fi
|
|
|
|
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
|
CMD bash -c '\
|
|
MEM=$(free -m | awk "/^Mem:/{print \$2}"); \
|
|
if [ $MEM -lt 2048 ]; then \
|
|
echo "⚠️ Warning: Less than 2GB RAM available! Your container might need a memory boost! 🚀"; \
|
|
exit 1; \
|
|
fi && \
|
|
redis-cli ping > /dev/null && \
|
|
curl -f http://localhost:8000/health || exit 1'
|
|
|
|
# EXPOSE 6379
|
|
|
|
CMD ["supervisord", "-c", "supervisord.conf"]
|
|
|