chore: Update README, generate new notbook for quickstart
This commit is contained in:
@@ -80,47 +80,6 @@ def load_bge_small_en_v1_5():
|
||||
model, device = set_model_device(model)
|
||||
return tokenizer, model
|
||||
|
||||
@lru_cache()
|
||||
def load_onnx_all_MiniLM_l6_v2():
|
||||
from crawl4ai.onnx_embedding import DefaultEmbeddingModel
|
||||
|
||||
model_path = "models/onnx.tar.gz"
|
||||
model_url = "https://unclecode-files.s3.us-west-2.amazonaws.com/onnx.tar.gz"
|
||||
__location__ = os.path.realpath(
|
||||
os.path.join(os.getcwd(), os.path.dirname(__file__)))
|
||||
download_path = os.path.join(__location__, model_path)
|
||||
onnx_dir = os.path.join(__location__, "models/onnx")
|
||||
|
||||
# Create the models directory if it does not exist
|
||||
os.makedirs(os.path.dirname(download_path), exist_ok=True)
|
||||
|
||||
# Download the tar.gz file if it does not exist
|
||||
if not os.path.exists(download_path):
|
||||
def download_with_progress(url, filename):
|
||||
def reporthook(block_num, block_size, total_size):
|
||||
downloaded = block_num * block_size
|
||||
percentage = 100 * downloaded / total_size
|
||||
if downloaded < total_size:
|
||||
print(f"\rDownloading: {percentage:.2f}% ({downloaded / (1024 * 1024):.2f} MB of {total_size / (1024 * 1024):.2f} MB)", end='')
|
||||
else:
|
||||
print("\rDownload complete!")
|
||||
|
||||
urllib.request.urlretrieve(url, filename, reporthook)
|
||||
|
||||
download_with_progress(model_url, download_path)
|
||||
|
||||
# Extract the tar.gz file if the onnx directory does not exist
|
||||
if not os.path.exists(onnx_dir):
|
||||
with tarfile.open(download_path, "r:gz") as tar:
|
||||
tar.extractall(path=os.path.join(__location__, "models"))
|
||||
|
||||
# remove the tar.gz file
|
||||
os.remove(download_path)
|
||||
|
||||
|
||||
|
||||
model = DefaultEmbeddingModel()
|
||||
return model
|
||||
|
||||
@lru_cache()
|
||||
def load_text_classifier():
|
||||
|
||||
@@ -1,50 +0,0 @@
|
||||
# A dependency-light way to run the onnx model
|
||||
|
||||
|
||||
import numpy as np
|
||||
from typing import List
|
||||
import os
|
||||
|
||||
__location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
|
||||
MODEL_ID = "sentence-transformers/all-MiniLM-L6-v2"
|
||||
|
||||
def normalize(v):
|
||||
norm = np.linalg.norm(v, axis=1)
|
||||
norm[norm == 0] = 1e-12
|
||||
return v / norm[:, np.newaxis]
|
||||
|
||||
# Sampel implementation of the default sentence-transformers model using ONNX
|
||||
class DefaultEmbeddingModel():
|
||||
|
||||
def __init__(self):
|
||||
from tokenizers import Tokenizer
|
||||
import onnxruntime as ort
|
||||
# max_seq_length = 256, for some reason sentence-transformers uses 256 even though the HF config has a max length of 128
|
||||
# https://github.com/UKPLab/sentence-transformers/blob/3e1929fddef16df94f8bc6e3b10598a98f46e62d/docs/_static/html/models_en_sentence_embeddings.html#LL480
|
||||
self.tokenizer = Tokenizer.from_file(os.path.join(__location__, "models/onnx/tokenizer.json"))
|
||||
self.tokenizer.enable_truncation(max_length=256)
|
||||
self.tokenizer.enable_padding(pad_id=0, pad_token="[PAD]", length=256)
|
||||
self.model = ort.InferenceSession(os.path.join(__location__,"models/onnx/model.onnx"))
|
||||
|
||||
|
||||
def __call__(self, documents: List[str], batch_size: int = 32):
|
||||
all_embeddings = []
|
||||
for i in range(0, len(documents), batch_size):
|
||||
batch = documents[i:i + batch_size]
|
||||
encoded = [self.tokenizer.encode(d) for d in batch]
|
||||
input_ids = np.array([e.ids for e in encoded])
|
||||
attention_mask = np.array([e.attention_mask for e in encoded])
|
||||
onnx_input = {
|
||||
"input_ids": np.array(input_ids, dtype=np.int64),
|
||||
"attention_mask": np.array(attention_mask, dtype=np.int64),
|
||||
"token_type_ids": np.array([np.zeros(len(e), dtype=np.int64) for e in input_ids], dtype=np.int64),
|
||||
}
|
||||
model_output = self.model.run(None, onnx_input)
|
||||
last_hidden_state = model_output[0]
|
||||
# Perform mean pooling with attention weighting
|
||||
input_mask_expanded = np.broadcast_to(np.expand_dims(attention_mask, -1), last_hidden_state.shape)
|
||||
embeddings = np.sum(last_hidden_state * input_mask_expanded, 1) / np.clip(input_mask_expanded.sum(1), a_min=1e-9, a_max=None)
|
||||
embeddings = normalize(embeddings).astype(np.float32)
|
||||
all_embeddings.append(embeddings)
|
||||
return np.concatenate(all_embeddings)
|
||||
|
||||
Reference in New Issue
Block a user