fix: Allow local embeddings by removing OpenAI fallback in EmbeddingStrategy
Fixes #1658 The `_get_embedding_llm_config_dict` method was returning a fallback OpenAI config when `embedding_llm_config` was None, which prevented the use of local sentence-transformers embeddings. Changed the method to: - Return None when no embedding config is provided - Update return type from Dict to Optional[Dict] - Update docstring to clarify the behavior This allows `get_text_embeddings` utility to correctly switch to the local sentence-transformers implementation when no LLM config is provided. Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -630,18 +630,15 @@ class EmbeddingStrategy(CrawlStrategy):
|
||||
self._validation_embeddings_cache = None # Cache validation query embeddings
|
||||
self._kb_similarity_threshold = 0.95 # Threshold for deduplication
|
||||
|
||||
def _get_embedding_llm_config_dict(self) -> Dict:
|
||||
"""Get embedding LLM config as dict with fallback to default."""
|
||||
def _get_embedding_llm_config_dict(self) -> Optional[Dict]:
|
||||
"""Get embedding LLM config as dict, or None for local embeddings."""
|
||||
if hasattr(self, 'config') and self.config:
|
||||
config_dict = self.config._embedding_llm_config_dict
|
||||
if config_dict:
|
||||
return config_dict
|
||||
|
||||
# Fallback to default if no config provided
|
||||
return {
|
||||
'provider': 'openai/text-embedding-3-small',
|
||||
'api_token': os.getenv('OPENAI_API_KEY')
|
||||
}
|
||||
|
||||
# Return None to use local sentence-transformers embeddings
|
||||
return None
|
||||
|
||||
async def _get_embeddings(self, texts: List[str]) -> Any:
|
||||
"""Get embeddings using configured method"""
|
||||
|
||||
Reference in New Issue
Block a user