refactor: Temporarily disable fetching image file size in get_content_of_website_optimized

Set the `image_size` variable to 0 in the `get_content_of_website_optimized` function to temporarily disable fetching the image file size. This change addresses performance issues and will be improved in a future update. Update Dockerfile for linuz users
2024-07-31 13:29:23 +08:00
parent aa9412e1b4
commit 9e43f7beda
3 changed files with 19 additions and 19 deletions
--- a/12
+++ b/12
@@ -24,17 +24,17 @@ COPY . .
 # Install Crawl4AI using the local setup.py (which will use the default installation)
 RUN pip install --no-cache-dir .

-# Install Google Chrome and ChromeDriver
+# Install Google Chrome
 RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - && \
    sh -c 'echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google-chrome.list' && \
    apt-get update && \
-    apt-get install -y google-chrome-stable && \
-    wget -O /tmp/chromedriver.zip http://chromedriver.storage.googleapis.com/`curl -sS chromedriver.storage.googleapis.com/LATEST_RELEASE`/chromedriver_linux64.zip && \
-    unzip /tmp/chromedriver.zip chromedriver -d /usr/local/bin/
+    apt-get install -y google-chrome-stable

-# Set environment to use Chrome and ChromeDriver properly
+# Update webdriver_manager to version 4.0.2
+RUN pip install --no-cache-dir webdriver_manager==4.0.2
+
+# Set environment to use Chrome properly
 ENV CHROME_BIN=/usr/bin/google-chrome \
-    CHROMEDRIVER=/usr/local/bin/chromedriver \
    DISPLAY=:99 \
    DBUS_SESSION_BUS_ADDRESS=/dev/null \
    PYTHONUNBUFFERED=1
--- a/crawl4ai/crawler_strategy.py
+++ b/crawl4ai/crawler_strategy.py
@@ -137,10 +137,15 @@ class LocalSeleniumCrawlerStrategy(CrawlerStrategy):
        # self.service = Service(chromedriver_autoinstaller.install())
        
        
-        chromedriver_path = ChromeDriverManager().install()
-        self.service = Service(chromedriver_path)
-        self.service.log_path = "NUL"
-        self.driver = webdriver.Chrome(service=self.service, options=self.options)
+        # chromedriver_path = ChromeDriverManager().install()
+        # self.service = Service(chromedriver_path)
+        # self.service.log_path = "NUL"
+        # self.driver = webdriver.Chrome(service=self.service, options=self.options)
+        
+        # Use selenium-manager (built into Selenium 4.10.0+)
+        self.service = Service()
+        self.driver = webdriver.Chrome(options=self.options)
+        
        self.driver = self.execute_hook('on_driver_created', self.driver)
        
        if kwargs.get("cookies"):
@@ -292,7 +297,7 @@ class LocalSeleniumCrawlerStrategy(CrawlerStrategy):
            # Open the screenshot with PIL
            image = Image.open(BytesIO(screenshot))

-            # Convert image to RGB mode
+            # Convert image to RGB mode (this will handle both RGB and RGBA images)
            rgb_image = image.convert('RGB')

            # Convert to JPEG and compress
@@ -304,11 +309,6 @@ class LocalSeleniumCrawlerStrategy(CrawlerStrategy):
                print(f"[LOG] 📸 Screenshot taken and converted to base64")

            return img_base64
-        except Exception as e:
-            if self.verbose:
-                print(f"[ERROR] Failed to take screenshot: {str(e)}")
-            return ""
-
        except Exception as e:
            error_message = sanitize_input_encode(f"Failed to take screenshot: {str(e)}")
            print(error_message)
@@ -321,7 +321,7 @@ class LocalSeleniumCrawlerStrategy(CrawlerStrategy):
            try:
                font = ImageFont.truetype("arial.ttf", 40)
            except IOError:
-                font = ImageFont.load_default(size=40)
+                font = ImageFont.load_default()

            # Define text color and wrap the text
            text_color = (255, 255, 255)
--- a/requirements.txt
+++ b/requirements.txt
@@ -12,7 +12,7 @@ python-dotenv==1.0.1
 requests==2.32.3
 rich==13.7.1
 scikit-learn==1.5.0
-selenium==4.21.0
+selenium==4.23.1
 uvicorn==0.30.1
 transformers==4.41.2
 chromedriver-autoinstaller==0.6.4