Files
crawl4ai/DockerfileTest2

74 lines
2.5 KiB
Plaintext

# First stage: Build and install dependencies
FROM pytorch/pytorch:latest as builder
# Set the working directory in the container
WORKDIR /usr/src/app
# Install build dependencies
RUN apt-get update && \
apt-get install -y --no-install-recommends \
wget \
git \
curl \
unzip \
gnupg \
xvfb \
ca-certificates \
apt-transport-https \
software-properties-common && \
rm -rf /var/lib/apt/lists/*
# Install Python dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt && \
pip install --no-cache-dir spacy onnxruntime && \
python -m spacy download en_core_web_sm
# Install Google Chrome and ChromeDriver
RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - && \
sh -c 'echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google-chrome.list' && \
apt-get update && \
apt-get install -y google-chrome-stable && \
wget -O /tmp/chromedriver.zip http://chromedriver.storage.googleapis.com/`curl -sS chromedriver.storage.googleapis.com/LATEST_RELEASE`/chromedriver_linux64.zip && \
unzip /tmp/chromedriver.zip chromedriver -d /usr/local/bin/
# Second stage: Create the final image
FROM pytorch/pytorch:latest
# Set the working directory in the container
WORKDIR /usr/src/app
# Copy Chromedriver and Chrome from the builder stage
COPY --from=builder /usr/local/bin/chromedriver /usr/local/bin/chromedriver
COPY --from=builder /usr/bin/google-chrome /usr/bin/google-chrome
# Copy installed Python packages from builder stage
COPY --from=builder /opt/conda/lib/python3.10/site-packages /opt/conda/lib/python3.10/site-packages
COPY --from=builder /opt/conda/bin /opt/conda/bin
# Copy the rest of the application code
COPY . .
# Set environment to use Chrome and ChromeDriver properly
ENV CHROME_BIN=/usr/bin/google-chrome \
CHROMEDRIVER=/usr/local/bin/chromedriver \
DISPLAY=:99 \
DBUS_SESSION_BUS_ADDRESS=/dev/null \
PYTHONUNBUFFERED=1
# pip install -e .[all]
RUN pip install --no-cache-dir -e .[all]
# Ensure the PATH environment variable includes the location of the installed packages
ENV PATH /opt/conda/bin:$PATH
# Make port 80 available to the world outside this container
EXPOSE 80
# Download models call cli "crawl4ai-download-models"
RUN crawl4ai-download-models
# RUN python crawl4ai/model_loader.py
# Run uvicorn
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80", "--workers", "4"]