refactor(docker): improve server architecture and configuration
Complete overhaul of Docker deployment setup with improved architecture: - Add Redis integration for task management - Implement rate limiting and security middleware - Add Prometheus metrics and health checks - Improve error handling and logging - Add support for streaming responses - Implement proper configuration management - Add platform-specific optimizations for ARM64/AMD64 BREAKING CHANGE: Docker deployment now requires Redis and new config.yml structure
This commit is contained in:
@@ -1,174 +0,0 @@
|
||||
FROM python:3.10-slim
|
||||
|
||||
# Set build arguments
|
||||
ARG APP_HOME=/app
|
||||
ARG GITHUB_REPO=https://github.com/yourusername/crawl4ai.git
|
||||
ARG GITHUB_BRANCH=main
|
||||
ARG USE_LOCAL=false
|
||||
|
||||
# 🤓 Environment variables - because who doesn't love a good ENV party?
|
||||
ENV PYTHONFAULTHANDLER=1 \
|
||||
PYTHONHASHSEED=random \
|
||||
PYTHONUNBUFFERED=1 \
|
||||
PIP_NO_CACHE_DIR=1 \
|
||||
PYTHONDONTWRITEBYTECODE=1 \
|
||||
PIP_DISABLE_PIP_VERSION_CHECK=1 \
|
||||
PIP_DEFAULT_TIMEOUT=100 \
|
||||
DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# Other build arguments
|
||||
ARG PYTHON_VERSION=3.10
|
||||
ARG INSTALL_TYPE=default
|
||||
ARG ENABLE_GPU=false
|
||||
ARG TARGETARCH
|
||||
|
||||
# 🎯 Platform-specific labels - because even containers need ID badges
|
||||
LABEL maintainer="unclecode"
|
||||
LABEL description="🔥🕷️ Crawl4AI: Open-source LLM Friendly Web Crawler & scraper"
|
||||
LABEL version="1.0"
|
||||
|
||||
# 📦 Installing system dependencies... please hold, your package is being delivered
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
curl \
|
||||
wget \
|
||||
gnupg \
|
||||
git \
|
||||
cmake \
|
||||
pkg-config \
|
||||
python3-dev \
|
||||
libjpeg-dev \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# 🎭 Playwright dependencies - because browsers need their vitamins too
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libglib2.0-0 \
|
||||
libnss3 \
|
||||
libnspr4 \
|
||||
libatk1.0-0 \
|
||||
libatk-bridge2.0-0 \
|
||||
libcups2 \
|
||||
libdrm2 \
|
||||
libdbus-1-3 \
|
||||
libxcb1 \
|
||||
libxkbcommon0 \
|
||||
libx11-6 \
|
||||
libxcomposite1 \
|
||||
libxdamage1 \
|
||||
libxext6 \
|
||||
libxfixes3 \
|
||||
libxrandr2 \
|
||||
libgbm1 \
|
||||
libpango-1.0-0 \
|
||||
libcairo2 \
|
||||
libasound2 \
|
||||
libatspi2.0-0 \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# 🎮 GPU support - because sometimes CPU just doesn't cut it
|
||||
RUN if [ "$ENABLE_GPU" = "true" ] && [ "$TARGETARCH" = "amd64" ] ; then \
|
||||
apt-get update && apt-get install -y --no-install-recommends \
|
||||
nvidia-cuda-toolkit \
|
||||
&& rm -rf /var/lib/apt/lists/* ; \
|
||||
else \
|
||||
echo "Skipping NVIDIA CUDA Toolkit installation (unsupported platform or GPU disabled)"; \
|
||||
fi
|
||||
|
||||
# 🏗️ Platform-specific optimizations - because one size doesn't fit all
|
||||
RUN if [ "$TARGETARCH" = "arm64" ]; then \
|
||||
echo "🦾 Installing ARM-specific optimizations"; \
|
||||
apt-get update && apt-get install -y --no-install-recommends \
|
||||
libopenblas-dev \
|
||||
&& rm -rf /var/lib/apt/lists/*; \
|
||||
elif [ "$TARGETARCH" = "amd64" ]; then \
|
||||
echo "🖥️ Installing AMD64-specific optimizations"; \
|
||||
apt-get update && apt-get install -y --no-install-recommends \
|
||||
libomp-dev \
|
||||
&& rm -rf /var/lib/apt/lists/*; \
|
||||
fi
|
||||
|
||||
WORKDIR ${APP_HOME}
|
||||
|
||||
# 🔄 Installation script - now with retry logic because sometimes Git needs a coffee break
|
||||
RUN echo '#!/bin/bash\n\
|
||||
if [ "$USE_LOCAL" = "true" ]; then\n\
|
||||
echo "📦 Installing from local source..."\n\
|
||||
pip install --no-cache-dir /tmp/project/\n\
|
||||
else\n\
|
||||
echo "🌐 Installing from GitHub..."\n\
|
||||
for i in {1..3}; do \n\
|
||||
git clone --branch ${GITHUB_BRANCH} ${GITHUB_REPO} /tmp/crawl4ai && break || \n\
|
||||
{ echo "Attempt $i/3 failed! Taking a short break... ☕"; sleep 5; }; \n\
|
||||
done\n\
|
||||
pip install --no-cache-dir /tmp/crawl4ai\n\
|
||||
fi' > /tmp/install.sh && chmod +x /tmp/install.sh
|
||||
|
||||
# Copy local project if USE_LOCAL is true
|
||||
COPY . /tmp/project/
|
||||
|
||||
# Copy and install other requirements
|
||||
COPY deploy/docker/requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Install ML dependencies first for better layer caching
|
||||
RUN if [ "$INSTALL_TYPE" = "all" ] ; then \
|
||||
pip install --no-cache-dir \
|
||||
torch \
|
||||
torchvision \
|
||||
torchaudio \
|
||||
scikit-learn \
|
||||
nltk \
|
||||
transformers \
|
||||
tokenizers && \
|
||||
python -m nltk.downloader punkt stopwords ; \
|
||||
fi
|
||||
|
||||
# Install the package
|
||||
RUN if [ "$INSTALL_TYPE" = "all" ] ; then \
|
||||
pip install "/tmp/project/[all]" && \
|
||||
python -m crawl4ai.model_loader ; \
|
||||
elif [ "$INSTALL_TYPE" = "torch" ] ; then \
|
||||
pip install "/tmp/project/[torch]" ; \
|
||||
elif [ "$INSTALL_TYPE" = "transformer" ] ; then \
|
||||
pip install "/tmp/project/[transformer]" && \
|
||||
python -m crawl4ai.model_loader ; \
|
||||
else \
|
||||
pip install "/tmp/project" ; \
|
||||
fi
|
||||
|
||||
# 🚀 Installation validation - trust but verify!
|
||||
RUN pip install --no-cache-dir --upgrade pip && \
|
||||
/tmp/install.sh && \
|
||||
python -c "import crawl4ai; print('✅ crawl4ai is ready to rock!')" && \
|
||||
python -c "from playwright.sync_api import sync_playwright; print('✅ Playwright is feeling dramatic!')"
|
||||
|
||||
RUN playwright install --with-deps chromium
|
||||
|
||||
# Copy application files
|
||||
COPY deploy/docker/* ${APP_HOME}/
|
||||
|
||||
# 🏥 Health check - now with memory validation!
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
||||
CMD bash -c '\
|
||||
MEM=$(free -m | awk "/^Mem:/{print \$2}"); \
|
||||
if [ $MEM -lt 2048 ]; then \
|
||||
echo "⚠️ Warning: Less than 2GB RAM available! Your container might need a memory boost! 🚀"; \
|
||||
exit 1; \
|
||||
fi && \
|
||||
curl -f http://localhost:8000/health || exit 1'
|
||||
|
||||
# Entrypoint script
|
||||
COPY deploy/docker/docker-entrypoint.sh /usr/local/bin/
|
||||
RUN chmod +x /usr/local/bin/docker-entrypoint.sh
|
||||
ENTRYPOINT ["docker-entrypoint.sh"]
|
||||
|
||||
# Default command - may the server be with you! 🚀
|
||||
CMD ["gunicorn", \
|
||||
"--bind", "0.0.0.0:8000", \
|
||||
"--workers", "4", \
|
||||
"--threads", "2", \
|
||||
"--timeout", "120", \
|
||||
"--graceful-timeout", "30", \
|
||||
"--log-level", "info", \
|
||||
"--worker-class", "uvicorn.workers.UvicornWorker", \
|
||||
"server:app"]
|
||||
Reference in New Issue
Block a user