Update Redme and Docker file

This commit is contained in:
unclecode
2024-06-30 00:15:43 +08:00
parent 61ae2de841
commit 7b0979e134
2 changed files with 15 additions and 17 deletions

View File

@@ -18,12 +18,11 @@ RUN apt-get update && \
software-properties-common && \ software-properties-common && \
rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/*
# Install Python dependencies # Copy the application code
COPY requirements.txt . COPY . .
RUN pip install --no-cache-dir -r requirements.txt && \
pip install --no-cache-dir spacy torch onnxruntime uvicorn && \ # Install Crawl4AI using the local setup.py (which will use the default installation)
python -m spacy download en_core_web_sm RUN pip install --no-cache-dir .
# pip install --no-cache-dir spacy torch torchvision torchaudio onnxruntime uvicorn && \
# Install Google Chrome and ChromeDriver # Install Google Chrome and ChromeDriver
RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - && \ RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - && \
@@ -33,9 +32,6 @@ RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key
wget -O /tmp/chromedriver.zip http://chromedriver.storage.googleapis.com/`curl -sS chromedriver.storage.googleapis.com/LATEST_RELEASE`/chromedriver_linux64.zip && \ wget -O /tmp/chromedriver.zip http://chromedriver.storage.googleapis.com/`curl -sS chromedriver.storage.googleapis.com/LATEST_RELEASE`/chromedriver_linux64.zip && \
unzip /tmp/chromedriver.zip chromedriver -d /usr/local/bin/ unzip /tmp/chromedriver.zip chromedriver -d /usr/local/bin/
# Copy the rest of the application code
COPY . .
# Set environment to use Chrome and ChromeDriver properly # Set environment to use Chrome and ChromeDriver properly
ENV CHROME_BIN=/usr/bin/google-chrome \ ENV CHROME_BIN=/usr/bin/google-chrome \
CHROMEDRIVER=/usr/local/bin/chromedriver \ CHROMEDRIVER=/usr/local/bin/chromedriver \
@@ -43,9 +39,6 @@ ENV CHROME_BIN=/usr/bin/google-chrome \
DBUS_SESSION_BUS_ADDRESS=/dev/null \ DBUS_SESSION_BUS_ADDRESS=/dev/null \
PYTHONUNBUFFERED=1 PYTHONUNBUFFERED=1
# pip install -e .[all]
RUN pip install --no-cache-dir -e .[all]
# Ensure the PATH environment variable includes the location of the installed packages # Ensure the PATH environment variable includes the location of the installed packages
ENV PATH /opt/conda/bin:$PATH ENV PATH /opt/conda/bin:$PATH
@@ -53,15 +46,13 @@ ENV PATH /opt/conda/bin:$PATH
EXPOSE 80 EXPOSE 80
# Download models call cli "crawl4ai-download-models" # Download models call cli "crawl4ai-download-models"
RUN crawl4ai-download-models # RUN crawl4ai-download-models
# Instakk mkdocs # Install mkdocs
RUN pip install mkdocs mkdocs-terminal RUN pip install mkdocs mkdocs-terminal
# Call mkdocs to build the documentation # Call mkdocs to build the documentation
RUN mkdocs build RUN mkdocs build
# Run uvicorn # Run uvicorn
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80", "--workers", "4"] CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80", "--workers", "4"]

View File

@@ -52,6 +52,13 @@ result = crawler.run(url="https://www.nbcnews.com/business")
print(result.markdown) print(result.markdown)
``` ```
## How to install 🛠
```bash
virtualenv venv
source venv/bin/activate
pip install "crawl4ai @ git+https://github.com/unclecode/crawl4ai.git"
```
### Speed-First Design 🚀 ### Speed-First Design 🚀
Perhaps the most important design principle for this library is speed. We need to ensure it can handle many links and resources in parallel as quickly as possible. By combining this speed with fast LLMs like Groq, the results will be truly amazing. Perhaps the most important design principle for this library is speed. We need to ensure it can handle many links and resources in parallel as quickly as possible. By combining this speed with fast LLMs like Groq, the results will be truly amazing.