chore: Update model_loader.py to use pretrained models without resume_download

This commit is contained in:
unclecode
2024-05-17 15:26:15 +08:00
parent bf3b040f10
commit 3f8576f870
2 changed files with 4 additions and 7 deletions

View File

@@ -15,15 +15,15 @@ def get_home_folder():
@lru_cache()
def load_bert_base_uncased():
from transformers import BertTokenizer, BertModel, AutoTokenizer, AutoModel
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', resume_download=None)
model = BertModel.from_pretrained('bert-base-uncased', resume_download=None)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') #, resume_download=None)
model = BertModel.from_pretrained('bert-base-uncased') #, resume_download=None)
return tokenizer, model
@lru_cache()
def load_bge_small_en_v1_5():
from transformers import BertTokenizer, BertModel, AutoTokenizer, AutoModel
tokenizer = AutoTokenizer.from_pretrained('BAAI/bge-small-en-v1.5', resume_download=None)
model = AutoModel.from_pretrained('BAAI/bge-small-en-v1.5', resume_download=None)
tokenizer = AutoTokenizer.from_pretrained('BAAI/bge-small-en-v1.5') #, resume_download=None)
model = AutoModel.from_pretrained('BAAI/bge-small-en-v1.5') #, resume_download=None)
model.eval()
return tokenizer, model

View File

@@ -39,9 +39,6 @@ class WebCrawler:
self.ready = False
def warmup(self):
print("[LOG] 🌤️ Warming up the WebCrawler")
result = self.run(
url='https://crawl4ai.uccode.io/',