Add model loader, update requirements.txt

This commit is contained in:
unclecode
2024-05-16 20:08:21 +08:00
parent c8589f8da3
commit 8e28eb9efb
3 changed files with 60 additions and 19 deletions

View File

@@ -4,6 +4,7 @@ from pathlib import Path
import subprocess, os
import shutil
from .config import MODEL_REPO_BRANCH
import argparse
@lru_cache()
def load_bert_base_uncased():
@@ -83,4 +84,40 @@ def load_spacy_model():
except Exception as e:
print(f"An error occurred: {e}")
return spacy.load(model_folder)
return spacy.load(model_folder)
def download_all_models(remove_existing=False):
"""Download all models required for Crawl4AI."""
if remove_existing:
print("[LOG] Removing existing models...")
home_folder = get_home_folder()
model_folders = [
os.path.join(home_folder, "models/reuters"),
os.path.join(home_folder, "models"),
]
for folder in model_folders:
if Path(folder).exists():
shutil.rmtree(folder)
print("[LOG] Existing models removed.")
# Load each model to trigger download
print("[LOG] Downloading BERT Base Uncased...")
load_bert_base_uncased()
print("[LOG] Downloading BGE Small EN v1.5...")
load_bge_small_en_v1_5()
print("[LOG] Downloading spaCy EN Core Web SM...")
load_spacy_en_core_web_sm()
print("[LOG] Downloading custom spaCy model...")
load_spacy_model()
print("[LOG] ✅ All models downloaded successfully.")
def main():
parser = argparse.ArgumentParser(description="Crawl4AI Model Downloader")
parser.add_argument('--remove-existing', action='store_true', help="Remove existing models before downloading")
args = parser.parse_args()
download_all_models(remove_existing=args.remove_existing)
if __name__ == "__main__":
main()