chore: Add verbose option to ExtractionStrategy classes

This commit adds a new `verbose` option to the `ExtractionStrategy` classes. The `verbose` option allows for logging of extraction details, such as the number of extracted blocks and the URL being processed. This improves the debugging and monitoring capabilities of the code.
This commit is contained in:
unclecode
2024-05-17 18:06:10 +08:00
parent 32c87f0388
commit 36e46be23d
3 changed files with 14 additions and 7 deletions

View File

@@ -15,15 +15,15 @@ def get_home_folder():
@lru_cache()
def load_bert_base_uncased():
from transformers import BertTokenizer, BertModel, AutoTokenizer, AutoModel
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') #, resume_download=None)
model = BertModel.from_pretrained('bert-base-uncased') #, resume_download=None)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', resume_download=None)
model = BertModel.from_pretrained('bert-base-uncased', resume_download=None)
return tokenizer, model
@lru_cache()
def load_bge_small_en_v1_5():
from transformers import BertTokenizer, BertModel, AutoTokenizer, AutoModel
tokenizer = AutoTokenizer.from_pretrained('BAAI/bge-small-en-v1.5') #, resume_download=None)
model = AutoModel.from_pretrained('BAAI/bge-small-en-v1.5') #, resume_download=None)
tokenizer = AutoTokenizer.from_pretrained('BAAI/bge-small-en-v1.5', resume_download=None)
model = AutoModel.from_pretrained('BAAI/bge-small-en-v1.5', resume_download=None)
model.eval()
return tokenizer, model