- Debug
- Refactor code for new version
This commit is contained in:
unclecode
2024-05-16 17:31:44 +08:00
parent f6e59157bf
commit 5b80be956d
23 changed files with 3116 additions and 1019 deletions

View File

@@ -38,7 +38,12 @@ class RegexChunking(ChunkingStrategy):
class NlpSentenceChunking(ChunkingStrategy):
def __init__(self, model='en_core_web_sm'):
import spacy
self.nlp = spacy.load(model)
try:
self.nlp = spacy.load(model)
except IOError:
spacy.cli.download("en_core_web_sm")
self.nlp = spacy.load(model)
# raise ImportError(f"Spacy model '{model}' not found. Please download the model using 'python -m spacy download {model}'")
def chunk(self, text: str) -> list:
doc = self.nlp(text)