Files
crawl4ai/deploy/lambda/Dockerfile
2025-03-10 18:57:14 +08:00

104 lines
2.5 KiB
Docker

FROM python:3.12-bookworm AS python-builder
RUN pip install poetry
ENV POETRY_NO_INTERACTION=1 \
POETRY_CACHE_DIR=/tmp/poetry_cache
WORKDIR /app
COPY pyproject.toml poetry.lock ./
RUN --mount=type=cache,target=$POETRY_CACHE_DIR poetry export -f requirements.txt -o requirements.txt
# Install build dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
python3-dev \
python3-setuptools \
python3-wheel \
python3-pip \
gcc \
g++ \
&& rm -rf /var/lib/apt/lists/*
# Install specific dependencies that have build issues
RUN pip install --no-cache-dir cchardet
FROM python:3.12-bookworm
# Install AWS Lambda Runtime Interface Client
RUN python3 -m pip install --no-cache-dir awslambdaric
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
curl \
wget \
gnupg \
git \
cmake \
pkg-config \
python3-dev \
libjpeg-dev \
redis-server \
supervisor \
&& rm -rf /var/lib/apt/lists/*
RUN apt-get update && apt-get install -y --no-install-recommends \
libglib2.0-0 \
libnss3 \
libnspr4 \
libatk1.0-0 \
libatk-bridge2.0-0 \
libcups2 \
libdrm2 \
libdbus-1-3 \
libxcb1 \
libxkbcommon0 \
libx11-6 \
libxcomposite1 \
libxdamage1 \
libxext6 \
libxfixes3 \
libxrandr2 \
libgbm1 \
libpango-1.0-0 \
libcairo2 \
libasound2 \
libatspi2.0-0 \
&& rm -rf /var/lib/apt/lists/*
# Install build essentials for any compilations needed
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
python3-dev \
&& rm -rf /var/lib/apt/lists/*
# Set up function directory and browser path
ARG FUNCTION_DIR="/function"
RUN mkdir -p "${FUNCTION_DIR}/pw-browsers"
RUN mkdir -p "/tmp/.crawl4ai"
# Set critical environment variables
ENV PLAYWRIGHT_BROWSERS_PATH="${FUNCTION_DIR}/pw-browsers" \
HOME="/tmp" \
CRAWL4_AI_BASE_DIRECTORY="/tmp/.crawl4ai"
# Create Craw4ai base directory
RUN mkdir -p ${CRAWL4_AI_BASE_DIRECTORY}
RUN pip install --no-cache-dir faust-cchardet
# Install Crawl4ai and dependencies
RUN pip install --no-cache-dir git+https://github.com/unclecode/crawl4ai.git@next
# Install Chromium only (no deps flag)
RUN playwright install chromium
# Copy function code
COPY lambda_function.py ${FUNCTION_DIR}/
# Set working directory
WORKDIR ${FUNCTION_DIR}
ENTRYPOINT [ "/usr/local/bin/python", "-m", "awslambdaric" ]
CMD [ "lambda_function.handler" ]