fix: Move sentence-transformers to optional dependencies
- Moved sentence-transformers from core to optional dependencies in pyproject.toml - Removed sentence-transformers from requirements.txt - Added proper ImportError handling with helpful installation message - This prevents ~2.5GB of NVIDIA CUDA libraries from being installed by default - Users who need embedding features can install with: pip install 'crawl4ai[transformer]'
This commit is contained in:
@@ -3342,7 +3342,13 @@ async def get_text_embeddings(
|
|||||||
# Default: use sentence-transformers
|
# Default: use sentence-transformers
|
||||||
else:
|
else:
|
||||||
# Lazy load to avoid importing heavy libraries unless needed
|
# Lazy load to avoid importing heavy libraries unless needed
|
||||||
from sentence_transformers import SentenceTransformer
|
try:
|
||||||
|
from sentence_transformers import SentenceTransformer
|
||||||
|
except ImportError:
|
||||||
|
raise ImportError(
|
||||||
|
"sentence-transformers is required for local embeddings. "
|
||||||
|
"Install it with: pip install 'crawl4ai[transformer]' or pip install sentence-transformers"
|
||||||
|
)
|
||||||
|
|
||||||
# Cache the model in function attribute to avoid reloading
|
# Cache the model in function attribute to avoid reloading
|
||||||
if not hasattr(get_text_embeddings, '_models'):
|
if not hasattr(get_text_embeddings, '_models'):
|
||||||
|
|||||||
@@ -44,7 +44,6 @@ dependencies = [
|
|||||||
"brotli>=1.1.0",
|
"brotli>=1.1.0",
|
||||||
"humanize>=4.10.0",
|
"humanize>=4.10.0",
|
||||||
"lark>=1.2.2",
|
"lark>=1.2.2",
|
||||||
"sentence-transformers>=2.2.0",
|
|
||||||
"alphashape>=1.3.1",
|
"alphashape>=1.3.1",
|
||||||
"shapely>=2.0.0"
|
"shapely>=2.0.0"
|
||||||
]
|
]
|
||||||
@@ -62,8 +61,8 @@ classifiers = [
|
|||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
pdf = ["PyPDF2"]
|
pdf = ["PyPDF2"]
|
||||||
torch = ["torch", "nltk", "scikit-learn"]
|
torch = ["torch", "nltk", "scikit-learn"]
|
||||||
transformer = ["transformers", "tokenizers"]
|
transformer = ["transformers", "tokenizers", "sentence-transformers"]
|
||||||
cosine = ["torch", "transformers", "nltk"]
|
cosine = ["torch", "transformers", "nltk", "sentence-transformers"]
|
||||||
sync = ["selenium"]
|
sync = ["selenium"]
|
||||||
all = [
|
all = [
|
||||||
"PyPDF2",
|
"PyPDF2",
|
||||||
@@ -72,8 +71,8 @@ all = [
|
|||||||
"scikit-learn",
|
"scikit-learn",
|
||||||
"transformers",
|
"transformers",
|
||||||
"tokenizers",
|
"tokenizers",
|
||||||
"selenium",
|
"sentence-transformers",
|
||||||
"PyPDF2"
|
"selenium"
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.scripts]
|
[project.scripts]
|
||||||
|
|||||||
@@ -24,7 +24,6 @@ cssselect>=1.2.0
|
|||||||
chardet>=5.2.0
|
chardet>=5.2.0
|
||||||
brotli>=1.1.0
|
brotli>=1.1.0
|
||||||
httpx[http2]>=0.27.2
|
httpx[http2]>=0.27.2
|
||||||
sentence-transformers>=2.2.0
|
|
||||||
alphashape>=1.3.1
|
alphashape>=1.3.1
|
||||||
shapely>=2.0.0
|
shapely>=2.0.0
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user