chore: Update model_loader.py to use pretrained models without resume_download

This commit is contained in:
unclecode
2024-05-17 15:26:15 +08:00
parent bf3b040f10
commit 3f8576f870
2 changed files with 4 additions and 7 deletions

View File

@@ -15,15 +15,15 @@ def get_home_folder():
@lru_cache() @lru_cache()
def load_bert_base_uncased(): def load_bert_base_uncased():
from transformers import BertTokenizer, BertModel, AutoTokenizer, AutoModel from transformers import BertTokenizer, BertModel, AutoTokenizer, AutoModel
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', resume_download=None) tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') #, resume_download=None)
model = BertModel.from_pretrained('bert-base-uncased', resume_download=None) model = BertModel.from_pretrained('bert-base-uncased') #, resume_download=None)
return tokenizer, model return tokenizer, model
@lru_cache() @lru_cache()
def load_bge_small_en_v1_5(): def load_bge_small_en_v1_5():
from transformers import BertTokenizer, BertModel, AutoTokenizer, AutoModel from transformers import BertTokenizer, BertModel, AutoTokenizer, AutoModel
tokenizer = AutoTokenizer.from_pretrained('BAAI/bge-small-en-v1.5', resume_download=None) tokenizer = AutoTokenizer.from_pretrained('BAAI/bge-small-en-v1.5') #, resume_download=None)
model = AutoModel.from_pretrained('BAAI/bge-small-en-v1.5', resume_download=None) model = AutoModel.from_pretrained('BAAI/bge-small-en-v1.5') #, resume_download=None)
model.eval() model.eval()
return tokenizer, model return tokenizer, model

View File

@@ -39,9 +39,6 @@ class WebCrawler:
self.ready = False self.ready = False
def warmup(self): def warmup(self):
print("[LOG] 🌤️ Warming up the WebCrawler") print("[LOG] 🌤️ Warming up the WebCrawler")
result = self.run( result = self.run(
url='https://crawl4ai.uccode.io/', url='https://crawl4ai.uccode.io/',