Compare commits
1 Commits
release/v0
...
docker-tes
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4d43880cde |
37
DockerfileTest
Normal file
37
DockerfileTest
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
|
||||||
|
# First stage: Build and install dependencies
|
||||||
|
FROM python:3.10-slim-bookworm as builder
|
||||||
|
|
||||||
|
# Set the working directory in the container
|
||||||
|
WORKDIR /usr/src/app
|
||||||
|
|
||||||
|
# Install build dependencies
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y --no-install-recommends \
|
||||||
|
wget \
|
||||||
|
curl \
|
||||||
|
unzip
|
||||||
|
|
||||||
|
# Install Python dependencies
|
||||||
|
COPY requirements.txt .
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt && \
|
||||||
|
pip install --no-cache-dir spacy
|
||||||
|
|
||||||
|
# Copy the rest of the application code
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
# Set environment to use Chrome and ChromeDriver properly
|
||||||
|
ENV CHROME_BIN=/usr/bin/google-chrome \
|
||||||
|
CHROMEDRIVER=/usr/local/bin/chromedriver \
|
||||||
|
DISPLAY=:99 \
|
||||||
|
DBUS_SESSION_BUS_ADDRESS=/dev/null \
|
||||||
|
PYTHONUNBUFFERED=1
|
||||||
|
|
||||||
|
# Ensure the PATH environment variable includes the location of the installed packages
|
||||||
|
ENV PATH /usr/local/bin:$PATH
|
||||||
|
|
||||||
|
# Make port 80 available to the world outside this container
|
||||||
|
EXPOSE 80
|
||||||
|
|
||||||
|
# Print helloworld when the container launches
|
||||||
|
CMD ["echo", "Hello, World!"]
|
||||||
73
DockerfileTest2
Normal file
73
DockerfileTest2
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
# First stage: Build and install dependencies
|
||||||
|
FROM pytorch/pytorch:latest as builder
|
||||||
|
|
||||||
|
# Set the working directory in the container
|
||||||
|
WORKDIR /usr/src/app
|
||||||
|
|
||||||
|
# Install build dependencies
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y --no-install-recommends \
|
||||||
|
wget \
|
||||||
|
git \
|
||||||
|
curl \
|
||||||
|
unzip \
|
||||||
|
gnupg \
|
||||||
|
xvfb \
|
||||||
|
ca-certificates \
|
||||||
|
apt-transport-https \
|
||||||
|
software-properties-common && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Install Python dependencies
|
||||||
|
COPY requirements.txt .
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt && \
|
||||||
|
pip install --no-cache-dir spacy onnxruntime && \
|
||||||
|
python -m spacy download en_core_web_sm
|
||||||
|
|
||||||
|
# Install Google Chrome and ChromeDriver
|
||||||
|
RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - && \
|
||||||
|
sh -c 'echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google-chrome.list' && \
|
||||||
|
apt-get update && \
|
||||||
|
apt-get install -y google-chrome-stable && \
|
||||||
|
wget -O /tmp/chromedriver.zip http://chromedriver.storage.googleapis.com/`curl -sS chromedriver.storage.googleapis.com/LATEST_RELEASE`/chromedriver_linux64.zip && \
|
||||||
|
unzip /tmp/chromedriver.zip chromedriver -d /usr/local/bin/
|
||||||
|
|
||||||
|
# Second stage: Create the final image
|
||||||
|
FROM pytorch/pytorch:latest
|
||||||
|
|
||||||
|
# Set the working directory in the container
|
||||||
|
WORKDIR /usr/src/app
|
||||||
|
|
||||||
|
# Copy Chromedriver and Chrome from the builder stage
|
||||||
|
COPY --from=builder /usr/local/bin/chromedriver /usr/local/bin/chromedriver
|
||||||
|
COPY --from=builder /usr/bin/google-chrome /usr/bin/google-chrome
|
||||||
|
|
||||||
|
# Copy installed Python packages from builder stage
|
||||||
|
COPY --from=builder /opt/conda/lib/python3.10/site-packages /opt/conda/lib/python3.10/site-packages
|
||||||
|
COPY --from=builder /opt/conda/bin /opt/conda/bin
|
||||||
|
|
||||||
|
# Copy the rest of the application code
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
# Set environment to use Chrome and ChromeDriver properly
|
||||||
|
ENV CHROME_BIN=/usr/bin/google-chrome \
|
||||||
|
CHROMEDRIVER=/usr/local/bin/chromedriver \
|
||||||
|
DISPLAY=:99 \
|
||||||
|
DBUS_SESSION_BUS_ADDRESS=/dev/null \
|
||||||
|
PYTHONUNBUFFERED=1
|
||||||
|
|
||||||
|
# pip install -e .[all]
|
||||||
|
RUN pip install --no-cache-dir -e .[all]
|
||||||
|
|
||||||
|
# Ensure the PATH environment variable includes the location of the installed packages
|
||||||
|
ENV PATH /opt/conda/bin:$PATH
|
||||||
|
|
||||||
|
# Make port 80 available to the world outside this container
|
||||||
|
EXPOSE 80
|
||||||
|
|
||||||
|
# Download models call cli "crawl4ai-download-models"
|
||||||
|
RUN crawl4ai-download-models
|
||||||
|
# RUN python crawl4ai/model_loader.py
|
||||||
|
|
||||||
|
# Run uvicorn
|
||||||
|
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80", "--workers", "4"]
|
||||||
61
DockerfileTest3
Normal file
61
DockerfileTest3
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
# First stage: Build and install dependencies
|
||||||
|
FROM pytorch/pytorch:latest
|
||||||
|
|
||||||
|
# Set the working directory in the container
|
||||||
|
WORKDIR /usr/src/app
|
||||||
|
|
||||||
|
# Install build dependencies
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y --no-install-recommends \
|
||||||
|
wget \
|
||||||
|
git \
|
||||||
|
curl \
|
||||||
|
unzip \
|
||||||
|
gnupg \
|
||||||
|
xvfb \
|
||||||
|
ca-certificates \
|
||||||
|
apt-transport-https \
|
||||||
|
software-properties-common && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Install Python dependencies
|
||||||
|
COPY requirements.txt .
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt && \
|
||||||
|
pip install --no-cache-dir spacy onnxruntime && \
|
||||||
|
python -m spacy download en_core_web_sm
|
||||||
|
|
||||||
|
# Install Google Chrome and ChromeDriver
|
||||||
|
RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - && \
|
||||||
|
sh -c 'echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google-chrome.list' && \
|
||||||
|
apt-get update && \
|
||||||
|
apt-get install -y google-chrome-stable && \
|
||||||
|
wget -O /tmp/chromedriver.zip http://chromedriver.storage.googleapis.com/`curl -sS chromedriver.storage.googleapis.com/LATEST_RELEASE`/chromedriver_linux64.zip && \
|
||||||
|
unzip /tmp/chromedriver.zip chromedriver -d /usr/local/bin/
|
||||||
|
|
||||||
|
# Copy the rest of the application code
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
# Set environment to use Chrome and ChromeDriver properly
|
||||||
|
ENV CHROME_BIN=/usr/bin/google-chrome \
|
||||||
|
CHROMEDRIVER=/usr/local/bin/chromedriver \
|
||||||
|
DISPLAY=:99 \
|
||||||
|
DBUS_SESSION_BUS_ADDRESS=/dev/null \
|
||||||
|
PYTHONUNBUFFERED=1
|
||||||
|
|
||||||
|
# pip install -e .[all]
|
||||||
|
RUN pip install --no-cache-dir -e .[all]
|
||||||
|
|
||||||
|
# Ensure the PATH environment variable includes the location of the installed packages
|
||||||
|
ENV PATH /opt/conda/bin:$PATH
|
||||||
|
|
||||||
|
# Make port 80 available to the world outside this container
|
||||||
|
EXPOSE 80
|
||||||
|
|
||||||
|
# Download models call cli "crawl4ai-download-models"
|
||||||
|
RUN crawl4ai-download-models
|
||||||
|
# RUN python crawl4ai/model_loader.py
|
||||||
|
|
||||||
|
# Run uvicorn
|
||||||
|
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80", "--workers", "4"]
|
||||||
|
|
||||||
|
|
||||||
@@ -188,7 +188,7 @@ pip install -e .[all]
|
|||||||
# docker build --platform linux/amd64 -t crawl4ai .
|
# docker build --platform linux/amd64 -t crawl4ai .
|
||||||
# For other users
|
# For other users
|
||||||
# docker build -t crawl4ai .
|
# docker build -t crawl4ai .
|
||||||
docker run -d -p 8000:80 crawl4ai
|
docker run -d -p 8000:80 --name crawl4ai_container_1 crawl4ai
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -53,7 +53,6 @@ def set_model_device(model):
|
|||||||
model.to(device)
|
model.to(device)
|
||||||
return model, device
|
return model, device
|
||||||
|
|
||||||
@lru_cache()
|
|
||||||
def get_home_folder():
|
def get_home_folder():
|
||||||
home_folder = os.path.join(Path.home(), ".crawl4ai")
|
home_folder = os.path.join(Path.home(), ".crawl4ai")
|
||||||
os.makedirs(home_folder, exist_ok=True)
|
os.makedirs(home_folder, exist_ok=True)
|
||||||
@@ -202,7 +201,7 @@ def load_spacy_model():
|
|||||||
repo_folder = os.path.join(home_folder, "crawl4ai")
|
repo_folder = os.path.join(home_folder, "crawl4ai")
|
||||||
model_folder = os.path.join(home_folder, name)
|
model_folder = os.path.join(home_folder, name)
|
||||||
|
|
||||||
# print("[LOG] ⏬ Downloading Spacy model for the first time...")
|
print("[LOG] ⏬ Downloading Spacy model for the first time...")
|
||||||
|
|
||||||
# Remove existing repo folder if it exists
|
# Remove existing repo folder if it exists
|
||||||
if Path(repo_folder).exists():
|
if Path(repo_folder).exists():
|
||||||
@@ -230,7 +229,7 @@ def load_spacy_model():
|
|||||||
shutil.rmtree(repo_folder)
|
shutil.rmtree(repo_folder)
|
||||||
|
|
||||||
# Print completion message
|
# Print completion message
|
||||||
# print("[LOG] ✅ Spacy Model downloaded successfully")
|
print("[LOG] ✅ Spacy Model downloaded successfully")
|
||||||
except subprocess.CalledProcessError as e:
|
except subprocess.CalledProcessError as e:
|
||||||
print(f"An error occurred while cloning the repository: {e}")
|
print(f"An error occurred while cloning the repository: {e}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -255,8 +254,8 @@ def download_all_models(remove_existing=False):
|
|||||||
# Load each model to trigger download
|
# Load each model to trigger download
|
||||||
# print("[LOG] Downloading BERT Base Uncased...")
|
# print("[LOG] Downloading BERT Base Uncased...")
|
||||||
# load_bert_base_uncased()
|
# load_bert_base_uncased()
|
||||||
# print("[LOG] Downloading BGE Small EN v1.5...")
|
print("[LOG] Downloading BGE Small EN v1.5...")
|
||||||
# load_bge_small_en_v1_5()
|
load_bge_small_en_v1_5()
|
||||||
# print("[LOG] Downloading ONNX model...")
|
# print("[LOG] Downloading ONNX model...")
|
||||||
# load_onnx_all_MiniLM_l6_v2()
|
# load_onnx_all_MiniLM_l6_v2()
|
||||||
print("[LOG] Downloading text classifier...")
|
print("[LOG] Downloading text classifier...")
|
||||||
|
|||||||
12
setup.py
12
setup.py
@@ -1,8 +1,18 @@
|
|||||||
from setuptools import setup, find_packages
|
from setuptools import setup, find_packages
|
||||||
import os
|
import os, sys
|
||||||
|
from pathlib import Path
|
||||||
import subprocess
|
import subprocess
|
||||||
from setuptools.command.install import install
|
from setuptools.command.install import install
|
||||||
|
|
||||||
|
def get_home_folder():
|
||||||
|
home_folder = os.path.join(Path.home(), ".crawl4ai")
|
||||||
|
os.makedirs(home_folder, exist_ok=True)
|
||||||
|
os.makedirs(f"{home_folder}/cache", exist_ok=True)
|
||||||
|
os.makedirs(f"{home_folder}/models", exist_ok=True)
|
||||||
|
return home_folder
|
||||||
|
|
||||||
|
home_folder = get_home_folder()
|
||||||
|
|
||||||
# Read the requirements from requirements.txt
|
# Read the requirements from requirements.txt
|
||||||
with open("requirements.txt") as f:
|
with open("requirements.txt") as f:
|
||||||
requirements = f.read().splitlines()
|
requirements = f.read().splitlines()
|
||||||
|
|||||||
Reference in New Issue
Block a user