diff --git a/crawl4ai/crawler_strategy.py b/crawl4ai/crawler_strategy.py index 9e85d60d..06e386c3 100644 --- a/crawl4ai/crawler_strategy.py +++ b/crawl4ai/crawler_strategy.py @@ -6,6 +6,9 @@ from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.chrome.options import Options from selenium.common.exceptions import InvalidArgumentException +from selenium.webdriver.chrome.service import Service as ChromeService +from webdriver_manager.chrome import ChromeDriverManager + import logging import base64 from PIL import Image, ImageDraw, ImageFont @@ -118,10 +121,15 @@ class LocalSeleniumCrawlerStrategy(CrawlerStrategy): } # chromedriver_autoinstaller.install() - import chromedriver_autoinstaller - crawl4ai_folder = os.path.join(Path.home(), ".crawl4ai") - chromedriver_path = chromedriver_autoinstaller.utils.download_chromedriver(crawl4ai_folder, False) + # import chromedriver_autoinstaller + # crawl4ai_folder = os.path.join(Path.home(), ".crawl4ai") + # driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=self.options) + # chromedriver_path = chromedriver_autoinstaller.install() + # chromedriver_path = chromedriver_autoinstaller.utils.download_chromedriver() # self.service = Service(chromedriver_autoinstaller.install()) + + + chromedriver_path = ChromeDriverManager().install() self.service = Service(chromedriver_path) self.service.log_path = "NUL" self.driver = webdriver.Chrome(service=self.service, options=self.options) diff --git a/main.py b/main.py index 45947c5a..a20c13ad 100644 --- a/main.py +++ b/main.py @@ -49,7 +49,9 @@ templates = Jinja2Templates(directory=__location__ + "/pages") @lru_cache() def get_crawler(): # Initialize and return a WebCrawler instance - return WebCrawler(verbose = True) + crawler = WebCrawler(verbose = True) + crawler.warmup() + return crawler class CrawlRequest(BaseModel): urls: List[str] diff --git a/requirements.txt b/requirements.txt index ee5be60a..ced41173 100644 --- a/requirements.txt +++ b/requirements.txt @@ -20,3 +20,4 @@ torch==2.3.1 onnxruntime==1.18.0 tokenizers==0.19.1 pillow==10.3.0 +webdriver-manager==4.0.1 \ No newline at end of file