From 7b0979e1348ec768ffe1f4efffbabc62e2780684 Mon Sep 17 00:00:00 2001
From: unclecode <unclecode@kidocode.com>
Date: Sun, 30 Jun 2024 00:15:43 +0800
Subject: [PATCH] Update Redme and Docker file

---
 Dockerfile | 25 ++++++++-----------------
 README.md  |  7 +++++++
 2 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 2131ccf7..07c41ad7 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -18,12 +18,11 @@ RUN apt-get update && \
     software-properties-common && \
     rm -rf /var/lib/apt/lists/*    
 
-# Install Python dependencies
-COPY requirements.txt .
-RUN pip install --no-cache-dir -r requirements.txt && \
-    pip install --no-cache-dir spacy torch onnxruntime uvicorn && \
-    python -m spacy download en_core_web_sm
-    # pip install --no-cache-dir spacy torch torchvision torchaudio onnxruntime uvicorn && \
+# Copy the application code
+COPY . .
+
+# Install Crawl4AI using the local setup.py (which will use the default installation)
+RUN pip install --no-cache-dir .
 
 # Install Google Chrome and ChromeDriver
 RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - && \
@@ -33,9 +32,6 @@ RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key
     wget -O /tmp/chromedriver.zip http://chromedriver.storage.googleapis.com/`curl -sS chromedriver.storage.googleapis.com/LATEST_RELEASE`/chromedriver_linux64.zip && \
     unzip /tmp/chromedriver.zip chromedriver -d /usr/local/bin/
 
-# Copy the rest of the application code
-COPY . .
-
 # Set environment to use Chrome and ChromeDriver properly
 ENV CHROME_BIN=/usr/bin/google-chrome \
     CHROMEDRIVER=/usr/local/bin/chromedriver \
@@ -43,9 +39,6 @@ ENV CHROME_BIN=/usr/bin/google-chrome \
     DBUS_SESSION_BUS_ADDRESS=/dev/null \
     PYTHONUNBUFFERED=1
 
-#  pip install -e .[all]
-RUN pip install --no-cache-dir -e .[all]
-
 # Ensure the PATH environment variable includes the location of the installed packages
 ENV PATH /opt/conda/bin:$PATH   
 
@@ -53,15 +46,13 @@ ENV PATH /opt/conda/bin:$PATH
 EXPOSE 80
 
 # Download models call cli "crawl4ai-download-models"
-RUN crawl4ai-download-models
+# RUN crawl4ai-download-models
 
-# Instakk mkdocs
+# Install mkdocs
 RUN pip install mkdocs mkdocs-terminal
 
 # Call mkdocs to build the documentation
 RUN mkdocs build
 
 # Run uvicorn
-CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80", "--workers", "4"]
-
-
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80", "--workers", "4"]
\ No newline at end of file
diff --git a/README.md b/README.md
index f910c829..b185aef4 100644
--- a/README.md
+++ b/README.md
@@ -52,6 +52,13 @@ result = crawler.run(url="https://www.nbcnews.com/business")
 print(result.markdown)
 ```
 
+## How to install 🛠    
+```bash
+virtualenv venv
+source venv/bin/activate
+pip install "crawl4ai @ git+https://github.com/unclecode/crawl4ai.git"
+```️
+
 ### Speed-First Design 🚀
 
 Perhaps the most important design principle for this library is speed. We need to ensure it can handle many links and resources in parallel as quickly as possible. By combining this speed with fast LLMs like Groq, the results will be truly amazing.