refactor: Update Dockerfile to install Crawl4AI with specified options
This commit updates the Dockerfile to install Crawl4AI with the specified options. The `INSTALL_OPTION` build argument is used to determine which additional packages to install. If the option is set to "all", all models will be downloaded. If the option is set to "torch", only torch models will be downloaded. If the option is set to "transformer", only transformer models will be downloaded. If no option is specified, the default installation will be used. This change improves the flexibility and customization of the Crawl4AI installation process.
This commit is contained in:
27
Dockerfile
27
Dockerfile
@@ -4,6 +4,9 @@ FROM python:3.10-slim-bookworm
|
||||
# Set the working directory in the container
|
||||
WORKDIR /usr/src/app
|
||||
|
||||
# Define build arguments
|
||||
ARG INSTALL_OPTION=default
|
||||
|
||||
# Install build dependencies
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
@@ -21,8 +24,20 @@ RUN apt-get update && \
|
||||
# Copy the application code
|
||||
COPY . .
|
||||
|
||||
# Install Crawl4AI using the local setup.py (which will use the default installation)
|
||||
RUN pip install --no-cache-dir .
|
||||
# Install Crawl4AI using the local setup.py with the specified option
|
||||
# and download models only for torch, transformer, or all options
|
||||
RUN if [ "$INSTALL_OPTION" = "all" ]; then \
|
||||
pip install --no-cache-dir .[all] && \
|
||||
crawl4ai-download-models; \
|
||||
elif [ "$INSTALL_OPTION" = "torch" ]; then \
|
||||
pip install --no-cache-dir .[torch] && \
|
||||
crawl4ai-download-models; \
|
||||
elif [ "$INSTALL_OPTION" = "transformer" ]; then \
|
||||
pip install --no-cache-dir .[transformer] && \
|
||||
crawl4ai-download-models; \
|
||||
else \
|
||||
pip install --no-cache-dir .; \
|
||||
fi
|
||||
|
||||
# Install Google Chrome
|
||||
RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - && \
|
||||
@@ -30,9 +45,6 @@ RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key
|
||||
apt-get update && \
|
||||
apt-get install -y google-chrome-stable
|
||||
|
||||
# Update webdriver_manager to version 4.0.2
|
||||
RUN pip install --no-cache-dir webdriver_manager==4.0.2
|
||||
|
||||
# Set environment to use Chrome properly
|
||||
ENV CHROME_BIN=/usr/bin/google-chrome \
|
||||
DISPLAY=:99 \
|
||||
@@ -40,14 +52,11 @@ ENV CHROME_BIN=/usr/bin/google-chrome \
|
||||
PYTHONUNBUFFERED=1
|
||||
|
||||
# Ensure the PATH environment variable includes the location of the installed packages
|
||||
ENV PATH /opt/conda/bin:$PATH
|
||||
ENV PATH=/opt/conda/bin:$PATH
|
||||
|
||||
# Make port 80 available to the world outside this container
|
||||
EXPOSE 80
|
||||
|
||||
# Download models call cli "crawl4ai-download-models"
|
||||
# RUN crawl4ai-download-models
|
||||
|
||||
# Install mkdocs
|
||||
RUN pip install mkdocs mkdocs-terminal
|
||||
|
||||
|
||||
Reference in New Issue
Block a user