From 3f8576f8700b00c2b0f206fffdc587455be2ed63 Mon Sep 17 00:00:00 2001 From: unclecode Date: Fri, 17 May 2024 15:26:15 +0800 Subject: [PATCH] chore: Update model_loader.py to use pretrained models without resume_download --- crawl4ai/model_loader.py | 8 ++++---- crawl4ai/web_crawler.py | 3 --- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/crawl4ai/model_loader.py b/crawl4ai/model_loader.py index 3a2b8695..7ed56919 100644 --- a/crawl4ai/model_loader.py +++ b/crawl4ai/model_loader.py @@ -15,15 +15,15 @@ def get_home_folder(): @lru_cache() def load_bert_base_uncased(): from transformers import BertTokenizer, BertModel, AutoTokenizer, AutoModel - tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', resume_download=None) - model = BertModel.from_pretrained('bert-base-uncased', resume_download=None) + tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') #, resume_download=None) + model = BertModel.from_pretrained('bert-base-uncased') #, resume_download=None) return tokenizer, model @lru_cache() def load_bge_small_en_v1_5(): from transformers import BertTokenizer, BertModel, AutoTokenizer, AutoModel - tokenizer = AutoTokenizer.from_pretrained('BAAI/bge-small-en-v1.5', resume_download=None) - model = AutoModel.from_pretrained('BAAI/bge-small-en-v1.5', resume_download=None) + tokenizer = AutoTokenizer.from_pretrained('BAAI/bge-small-en-v1.5') #, resume_download=None) + model = AutoModel.from_pretrained('BAAI/bge-small-en-v1.5') #, resume_download=None) model.eval() return tokenizer, model diff --git a/crawl4ai/web_crawler.py b/crawl4ai/web_crawler.py index 0dc6e16c..e8437d17 100644 --- a/crawl4ai/web_crawler.py +++ b/crawl4ai/web_crawler.py @@ -39,9 +39,6 @@ class WebCrawler: self.ready = False def warmup(self): - - - print("[LOG] 🌤️ Warming up the WebCrawler") result = self.run( url='https://crawl4ai.uccode.io/',