- Test all methods

- Update index.hml
- Update Readme
- Resolve some bugs
This commit is contained in:
unclecode
2024-05-14 21:27:41 +08:00
parent 5fea6c064b
commit f6e59157bf
17 changed files with 1004 additions and 402 deletions

20
crawl4ai/model_loader.py Normal file
View File

@@ -0,0 +1,20 @@
from functools import lru_cache
from transformers import BertTokenizer, BertModel, AutoTokenizer, AutoModel
import spacy
@lru_cache()
def load_bert_base_uncased():
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', resume_download=None)
model = BertModel.from_pretrained('bert-base-uncased', resume_download=None)
return tokenizer, model
@lru_cache()
def load_bge_small_en_v1_5():
tokenizer = AutoTokenizer.from_pretrained('BAAI/bge-small-en-v1.5', resume_download=None)
model = AutoModel.from_pretrained('BAAI/bge-small-en-v1.5', resume_download=None)
model.eval()
return tokenizer, model
@lru_cache()
def load_spacy_model():
return spacy.load("models/reuters")