- Fix Spacy model issue
- Update Readme and requirements.txt
This commit is contained in:
unclecode
2024-05-16 19:50:20 +08:00
parent 6a6365ae0a
commit c8589f8da3
8 changed files with 137 additions and 70 deletions

View File

@@ -6,6 +6,7 @@ import re
# from nltk.tokenize import word_tokenize, TextTilingTokenizer
from collections import Counter
import string
from .model_loader import load_spacy_en_core_web_sm
# Define the abstract base class for chunking strategies
class ChunkingStrategy(ABC):
@@ -37,13 +38,7 @@ class RegexChunking(ChunkingStrategy):
class NlpSentenceChunking(ChunkingStrategy):
def __init__(self, model='en_core_web_sm'):
import spacy
try:
self.nlp = spacy.load(model)
except IOError:
spacy.cli.download("en_core_web_sm")
self.nlp = spacy.load(model)
# raise ImportError(f"Spacy model '{model}' not found. Please download the model using 'python -m spacy download {model}'")
self.nlp = load_spacy_en_core_web_sm()
def chunk(self, text: str) -> list:
doc = self.nlp(text)