104 lines
2.5 KiB
Docker
104 lines
2.5 KiB
Docker
FROM python:3.12-bookworm AS python-builder
|
|
|
|
RUN pip install poetry
|
|
|
|
ENV POETRY_NO_INTERACTION=1 \
|
|
POETRY_CACHE_DIR=/tmp/poetry_cache
|
|
|
|
WORKDIR /app
|
|
|
|
COPY pyproject.toml poetry.lock ./
|
|
RUN --mount=type=cache,target=$POETRY_CACHE_DIR poetry export -f requirements.txt -o requirements.txt
|
|
|
|
# Install build dependencies
|
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
build-essential \
|
|
python3-dev \
|
|
python3-setuptools \
|
|
python3-wheel \
|
|
python3-pip \
|
|
gcc \
|
|
g++ \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
# Install specific dependencies that have build issues
|
|
RUN pip install --no-cache-dir cchardet
|
|
|
|
FROM python:3.12-bookworm
|
|
|
|
# Install AWS Lambda Runtime Interface Client
|
|
RUN python3 -m pip install --no-cache-dir awslambdaric
|
|
|
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
build-essential \
|
|
curl \
|
|
wget \
|
|
gnupg \
|
|
git \
|
|
cmake \
|
|
pkg-config \
|
|
python3-dev \
|
|
libjpeg-dev \
|
|
redis-server \
|
|
supervisor \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
libglib2.0-0 \
|
|
libnss3 \
|
|
libnspr4 \
|
|
libatk1.0-0 \
|
|
libatk-bridge2.0-0 \
|
|
libcups2 \
|
|
libdrm2 \
|
|
libdbus-1-3 \
|
|
libxcb1 \
|
|
libxkbcommon0 \
|
|
libx11-6 \
|
|
libxcomposite1 \
|
|
libxdamage1 \
|
|
libxext6 \
|
|
libxfixes3 \
|
|
libxrandr2 \
|
|
libgbm1 \
|
|
libpango-1.0-0 \
|
|
libcairo2 \
|
|
libasound2 \
|
|
libatspi2.0-0 \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
# Install build essentials for any compilations needed
|
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
build-essential \
|
|
python3-dev \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
# Set up function directory and browser path
|
|
ARG FUNCTION_DIR="/function"
|
|
RUN mkdir -p "${FUNCTION_DIR}/pw-browsers"
|
|
RUN mkdir -p "/tmp/.crawl4ai"
|
|
|
|
# Set critical environment variables
|
|
ENV PLAYWRIGHT_BROWSERS_PATH="${FUNCTION_DIR}/pw-browsers" \
|
|
HOME="/tmp" \
|
|
CRAWL4_AI_BASE_DIRECTORY="/tmp/.crawl4ai"
|
|
|
|
# Create Craw4ai base directory
|
|
RUN mkdir -p ${CRAWL4_AI_BASE_DIRECTORY}
|
|
|
|
RUN pip install --no-cache-dir faust-cchardet
|
|
|
|
# Install Crawl4ai and dependencies
|
|
RUN pip install --no-cache-dir git+https://github.com/unclecode/crawl4ai.git@next
|
|
|
|
# Install Chromium only (no deps flag)
|
|
RUN playwright install chromium
|
|
|
|
# Copy function code
|
|
COPY lambda_function.py ${FUNCTION_DIR}/
|
|
|
|
# Set working directory
|
|
WORKDIR ${FUNCTION_DIR}
|
|
|
|
ENTRYPOINT [ "/usr/local/bin/python", "-m", "awslambdaric" ]
|
|
CMD [ "lambda_function.handler" ] |