refactor: Temporarily disable fetching image file size in get_content_of_website_optimized
Set the `image_size` variable to 0 in the `get_content_of_website_optimized` function to temporarily disable fetching the image file size. This change addresses performance issues and will be improved in a future update. Update Dockerfile for linuz users
This commit is contained in:
12
Dockerfile
12
Dockerfile
@@ -24,17 +24,17 @@ COPY . .
|
||||
# Install Crawl4AI using the local setup.py (which will use the default installation)
|
||||
RUN pip install --no-cache-dir .
|
||||
|
||||
# Install Google Chrome and ChromeDriver
|
||||
# Install Google Chrome
|
||||
RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - && \
|
||||
sh -c 'echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google-chrome.list' && \
|
||||
apt-get update && \
|
||||
apt-get install -y google-chrome-stable && \
|
||||
wget -O /tmp/chromedriver.zip http://chromedriver.storage.googleapis.com/`curl -sS chromedriver.storage.googleapis.com/LATEST_RELEASE`/chromedriver_linux64.zip && \
|
||||
unzip /tmp/chromedriver.zip chromedriver -d /usr/local/bin/
|
||||
apt-get install -y google-chrome-stable
|
||||
|
||||
# Set environment to use Chrome and ChromeDriver properly
|
||||
# Update webdriver_manager to version 4.0.2
|
||||
RUN pip install --no-cache-dir webdriver_manager==4.0.2
|
||||
|
||||
# Set environment to use Chrome properly
|
||||
ENV CHROME_BIN=/usr/bin/google-chrome \
|
||||
CHROMEDRIVER=/usr/local/bin/chromedriver \
|
||||
DISPLAY=:99 \
|
||||
DBUS_SESSION_BUS_ADDRESS=/dev/null \
|
||||
PYTHONUNBUFFERED=1
|
||||
|
||||
@@ -137,10 +137,15 @@ class LocalSeleniumCrawlerStrategy(CrawlerStrategy):
|
||||
# self.service = Service(chromedriver_autoinstaller.install())
|
||||
|
||||
|
||||
chromedriver_path = ChromeDriverManager().install()
|
||||
self.service = Service(chromedriver_path)
|
||||
self.service.log_path = "NUL"
|
||||
self.driver = webdriver.Chrome(service=self.service, options=self.options)
|
||||
# chromedriver_path = ChromeDriverManager().install()
|
||||
# self.service = Service(chromedriver_path)
|
||||
# self.service.log_path = "NUL"
|
||||
# self.driver = webdriver.Chrome(service=self.service, options=self.options)
|
||||
|
||||
# Use selenium-manager (built into Selenium 4.10.0+)
|
||||
self.service = Service()
|
||||
self.driver = webdriver.Chrome(options=self.options)
|
||||
|
||||
self.driver = self.execute_hook('on_driver_created', self.driver)
|
||||
|
||||
if kwargs.get("cookies"):
|
||||
@@ -292,7 +297,7 @@ class LocalSeleniumCrawlerStrategy(CrawlerStrategy):
|
||||
# Open the screenshot with PIL
|
||||
image = Image.open(BytesIO(screenshot))
|
||||
|
||||
# Convert image to RGB mode
|
||||
# Convert image to RGB mode (this will handle both RGB and RGBA images)
|
||||
rgb_image = image.convert('RGB')
|
||||
|
||||
# Convert to JPEG and compress
|
||||
@@ -304,11 +309,6 @@ class LocalSeleniumCrawlerStrategy(CrawlerStrategy):
|
||||
print(f"[LOG] 📸 Screenshot taken and converted to base64")
|
||||
|
||||
return img_base64
|
||||
except Exception as e:
|
||||
if self.verbose:
|
||||
print(f"[ERROR] Failed to take screenshot: {str(e)}")
|
||||
return ""
|
||||
|
||||
except Exception as e:
|
||||
error_message = sanitize_input_encode(f"Failed to take screenshot: {str(e)}")
|
||||
print(error_message)
|
||||
@@ -321,7 +321,7 @@ class LocalSeleniumCrawlerStrategy(CrawlerStrategy):
|
||||
try:
|
||||
font = ImageFont.truetype("arial.ttf", 40)
|
||||
except IOError:
|
||||
font = ImageFont.load_default(size=40)
|
||||
font = ImageFont.load_default()
|
||||
|
||||
# Define text color and wrap the text
|
||||
text_color = (255, 255, 255)
|
||||
|
||||
@@ -12,7 +12,7 @@ python-dotenv==1.0.1
|
||||
requests==2.32.3
|
||||
rich==13.7.1
|
||||
scikit-learn==1.5.0
|
||||
selenium==4.21.0
|
||||
selenium==4.23.1
|
||||
uvicorn==0.30.1
|
||||
transformers==4.41.2
|
||||
chromedriver-autoinstaller==0.6.4
|
||||
|
||||
Reference in New Issue
Block a user