feat: add voice-ai-engine-development skill for building real-time conversational AI
This commit is contained in:
@@ -0,0 +1,193 @@
|
||||
"""
|
||||
Template: Base Worker Implementation
|
||||
|
||||
Use this template as a starting point for creating new workers
|
||||
in your voice AI pipeline.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from typing import Any
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class BaseWorker:
|
||||
"""
|
||||
Base class for all workers in the voice AI pipeline
|
||||
|
||||
Workers follow the producer-consumer pattern:
|
||||
- Consume items from input_queue
|
||||
- Process items
|
||||
- Produce results to output_queue
|
||||
|
||||
All workers run concurrently via asyncio.
|
||||
"""
|
||||
|
||||
def __init__(self, input_queue: asyncio.Queue, output_queue: asyncio.Queue):
|
||||
"""
|
||||
Initialize the worker
|
||||
|
||||
Args:
|
||||
input_queue: Queue to consume items from
|
||||
output_queue: Queue to produce results to
|
||||
"""
|
||||
self.input_queue = input_queue
|
||||
self.output_queue = output_queue
|
||||
self.active = False
|
||||
self._task = None
|
||||
|
||||
def start(self):
|
||||
"""Start the worker's processing loop"""
|
||||
self.active = True
|
||||
self._task = asyncio.create_task(self._run_loop())
|
||||
logger.info(f"✅ [{self.__class__.__name__}] Started")
|
||||
|
||||
async def _run_loop(self):
|
||||
"""
|
||||
Main processing loop - runs forever until terminated
|
||||
|
||||
This loop:
|
||||
1. Waits for items from input_queue
|
||||
2. Processes each item
|
||||
3. Handles errors gracefully
|
||||
"""
|
||||
while self.active:
|
||||
try:
|
||||
# Block until item arrives
|
||||
item = await self.input_queue.get()
|
||||
|
||||
# Process the item
|
||||
await self.process(item)
|
||||
|
||||
except asyncio.CancelledError:
|
||||
# Task was cancelled (normal during shutdown)
|
||||
logger.info(f"🛑 [{self.__class__.__name__}] Task cancelled")
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
# Log error but don't crash the worker
|
||||
logger.error(
|
||||
f"❌ [{self.__class__.__name__}] Error processing item: {e}",
|
||||
exc_info=True
|
||||
)
|
||||
# Continue processing next item
|
||||
|
||||
async def process(self, item: Any):
|
||||
"""
|
||||
Process a single item
|
||||
|
||||
Override this method in your worker implementation.
|
||||
|
||||
Args:
|
||||
item: The item to process
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
f"{self.__class__.__name__} must implement process()"
|
||||
)
|
||||
|
||||
def terminate(self):
|
||||
"""
|
||||
Stop the worker gracefully
|
||||
|
||||
This sets active=False and cancels the processing task.
|
||||
"""
|
||||
self.active = False
|
||||
|
||||
if self._task and not self._task.done():
|
||||
self._task.cancel()
|
||||
|
||||
logger.info(f"🛑 [{self.__class__.__name__}] Terminated")
|
||||
|
||||
async def wait_for_completion(self):
|
||||
"""Wait for the worker task to complete"""
|
||||
if self._task:
|
||||
try:
|
||||
await self._task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Example: Custom Worker Implementation
|
||||
# ============================================================================
|
||||
|
||||
class ExampleWorker(BaseWorker):
|
||||
"""
|
||||
Example worker that demonstrates how to extend BaseWorker
|
||||
|
||||
This worker receives strings, converts them to uppercase,
|
||||
and sends them to the output queue.
|
||||
"""
|
||||
|
||||
def __init__(self, input_queue: asyncio.Queue, output_queue: asyncio.Queue):
|
||||
super().__init__(input_queue, output_queue)
|
||||
# Add any custom initialization here
|
||||
self.processed_count = 0
|
||||
|
||||
async def process(self, item: str):
|
||||
"""
|
||||
Process a single item
|
||||
|
||||
Args:
|
||||
item: String to convert to uppercase
|
||||
"""
|
||||
# Simulate some processing time
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
# Process the item
|
||||
result = item.upper()
|
||||
|
||||
# Send to output queue
|
||||
self.output_queue.put_nowait(result)
|
||||
|
||||
# Update counter
|
||||
self.processed_count += 1
|
||||
|
||||
logger.info(
|
||||
f"✅ [{self.__class__.__name__}] "
|
||||
f"Processed '{item}' -> '{result}' "
|
||||
f"(total: {self.processed_count})"
|
||||
)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Example Usage
|
||||
# ============================================================================
|
||||
|
||||
async def example_usage():
|
||||
"""Example of how to use the worker"""
|
||||
|
||||
# Create queues
|
||||
input_queue = asyncio.Queue()
|
||||
output_queue = asyncio.Queue()
|
||||
|
||||
# Create worker
|
||||
worker = ExampleWorker(input_queue, output_queue)
|
||||
|
||||
# Start worker
|
||||
worker.start()
|
||||
|
||||
# Send items to process
|
||||
items = ["hello", "world", "voice", "ai"]
|
||||
for item in items:
|
||||
input_queue.put_nowait(item)
|
||||
|
||||
# Wait for processing
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
# Get results
|
||||
results = []
|
||||
while not output_queue.empty():
|
||||
results.append(await output_queue.get())
|
||||
|
||||
print(f"\n✅ Results: {results}")
|
||||
|
||||
# Terminate worker
|
||||
worker.terminate()
|
||||
await worker.wait_for_completion()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
asyncio.run(example_usage())
|
||||
@@ -0,0 +1,289 @@
|
||||
"""
|
||||
Template: Multi-Provider Factory
|
||||
|
||||
Use this template to create a factory that supports multiple providers
|
||||
for transcription, LLM, and TTS services.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any
|
||||
from abc import ABC, abstractmethod
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Provider Interfaces
|
||||
# ============================================================================
|
||||
|
||||
class TranscriberProvider(ABC):
|
||||
"""Abstract base class for transcriber providers"""
|
||||
|
||||
@abstractmethod
|
||||
async def transcribe_stream(self, audio_stream):
|
||||
"""Transcribe streaming audio"""
|
||||
pass
|
||||
|
||||
|
||||
class LLMProvider(ABC):
|
||||
"""Abstract base class for LLM providers"""
|
||||
|
||||
@abstractmethod
|
||||
async def generate_response(self, messages, stream=True):
|
||||
"""Generate response from messages"""
|
||||
pass
|
||||
|
||||
|
||||
class TTSProvider(ABC):
|
||||
"""Abstract base class for TTS providers"""
|
||||
|
||||
@abstractmethod
|
||||
async def synthesize_speech(self, text):
|
||||
"""Synthesize speech from text"""
|
||||
pass
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Multi-Provider Factory
|
||||
# ============================================================================
|
||||
|
||||
class VoiceComponentFactory:
|
||||
"""
|
||||
Factory for creating voice AI components with multiple provider support
|
||||
|
||||
Supports:
|
||||
- Multiple transcription providers (Deepgram, AssemblyAI, Azure, Google)
|
||||
- Multiple LLM providers (OpenAI, Gemini, Claude)
|
||||
- Multiple TTS providers (ElevenLabs, Azure, Google, Polly, Play.ht)
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.transcriber_providers = {
|
||||
"deepgram": self._create_deepgram_transcriber,
|
||||
"assemblyai": self._create_assemblyai_transcriber,
|
||||
"azure": self._create_azure_transcriber,
|
||||
"google": self._create_google_transcriber,
|
||||
}
|
||||
|
||||
self.llm_providers = {
|
||||
"openai": self._create_openai_agent,
|
||||
"gemini": self._create_gemini_agent,
|
||||
"claude": self._create_claude_agent,
|
||||
}
|
||||
|
||||
self.tts_providers = {
|
||||
"elevenlabs": self._create_elevenlabs_synthesizer,
|
||||
"azure": self._create_azure_synthesizer,
|
||||
"google": self._create_google_synthesizer,
|
||||
"polly": self._create_polly_synthesizer,
|
||||
"playht": self._create_playht_synthesizer,
|
||||
}
|
||||
|
||||
def create_transcriber(self, config: Dict[str, Any]):
|
||||
"""
|
||||
Create transcriber based on configuration
|
||||
|
||||
Args:
|
||||
config: Configuration dict with 'transcriberProvider' key
|
||||
|
||||
Returns:
|
||||
Transcriber instance
|
||||
|
||||
Raises:
|
||||
ValueError: If provider is not supported
|
||||
"""
|
||||
provider = config.get("transcriberProvider", "deepgram").lower()
|
||||
|
||||
if provider not in self.transcriber_providers:
|
||||
raise ValueError(
|
||||
f"Unknown transcriber provider: {provider}. "
|
||||
f"Supported: {list(self.transcriber_providers.keys())}"
|
||||
)
|
||||
|
||||
logger.info(f"🎤 Creating transcriber: {provider}")
|
||||
return self.transcriber_providers[provider](config)
|
||||
|
||||
def create_agent(self, config: Dict[str, Any]):
|
||||
"""
|
||||
Create LLM agent based on configuration
|
||||
|
||||
Args:
|
||||
config: Configuration dict with 'llmProvider' key
|
||||
|
||||
Returns:
|
||||
Agent instance
|
||||
|
||||
Raises:
|
||||
ValueError: If provider is not supported
|
||||
"""
|
||||
provider = config.get("llmProvider", "openai").lower()
|
||||
|
||||
if provider not in self.llm_providers:
|
||||
raise ValueError(
|
||||
f"Unknown LLM provider: {provider}. "
|
||||
f"Supported: {list(self.llm_providers.keys())}"
|
||||
)
|
||||
|
||||
logger.info(f"🤖 Creating agent: {provider}")
|
||||
return self.llm_providers[provider](config)
|
||||
|
||||
def create_synthesizer(self, config: Dict[str, Any]):
|
||||
"""
|
||||
Create TTS synthesizer based on configuration
|
||||
|
||||
Args:
|
||||
config: Configuration dict with 'voiceProvider' key
|
||||
|
||||
Returns:
|
||||
Synthesizer instance
|
||||
|
||||
Raises:
|
||||
ValueError: If provider is not supported
|
||||
"""
|
||||
provider = config.get("voiceProvider", "elevenlabs").lower()
|
||||
|
||||
if provider not in self.tts_providers:
|
||||
raise ValueError(
|
||||
f"Unknown voice provider: {provider}. "
|
||||
f"Supported: {list(self.tts_providers.keys())}"
|
||||
)
|
||||
|
||||
logger.info(f"🔊 Creating synthesizer: {provider}")
|
||||
return self.tts_providers[provider](config)
|
||||
|
||||
# ========================================================================
|
||||
# Transcriber Implementations
|
||||
# ========================================================================
|
||||
|
||||
def _create_deepgram_transcriber(self, config: Dict[str, Any]):
|
||||
"""Create Deepgram transcriber"""
|
||||
# TODO: Implement Deepgram transcriber
|
||||
# from .transcribers.deepgram import DeepgramTranscriber
|
||||
# return DeepgramTranscriber(
|
||||
# api_key=config.get("deepgramApiKey"),
|
||||
# model=config.get("deepgramModel", "nova-2"),
|
||||
# language=config.get("language", "en-US")
|
||||
# )
|
||||
raise NotImplementedError("Deepgram transcriber not implemented")
|
||||
|
||||
def _create_assemblyai_transcriber(self, config: Dict[str, Any]):
|
||||
"""Create AssemblyAI transcriber"""
|
||||
# TODO: Implement AssemblyAI transcriber
|
||||
raise NotImplementedError("AssemblyAI transcriber not implemented")
|
||||
|
||||
def _create_azure_transcriber(self, config: Dict[str, Any]):
|
||||
"""Create Azure Speech transcriber"""
|
||||
# TODO: Implement Azure transcriber
|
||||
raise NotImplementedError("Azure transcriber not implemented")
|
||||
|
||||
def _create_google_transcriber(self, config: Dict[str, Any]):
|
||||
"""Create Google Cloud Speech transcriber"""
|
||||
# TODO: Implement Google transcriber
|
||||
raise NotImplementedError("Google transcriber not implemented")
|
||||
|
||||
# ========================================================================
|
||||
# LLM Agent Implementations
|
||||
# ========================================================================
|
||||
|
||||
def _create_openai_agent(self, config: Dict[str, Any]):
|
||||
"""Create OpenAI agent"""
|
||||
# TODO: Implement OpenAI agent
|
||||
# from .agents.openai import OpenAIAgent
|
||||
# return OpenAIAgent(
|
||||
# api_key=config.get("openaiApiKey"),
|
||||
# model=config.get("openaiModel", "gpt-4"),
|
||||
# system_prompt=config.get("prompt", "You are a helpful assistant.")
|
||||
# )
|
||||
raise NotImplementedError("OpenAI agent not implemented")
|
||||
|
||||
def _create_gemini_agent(self, config: Dict[str, Any]):
|
||||
"""Create Google Gemini agent"""
|
||||
# TODO: Implement Gemini agent
|
||||
# from .agents.gemini import GeminiAgent
|
||||
# return GeminiAgent(
|
||||
# api_key=config.get("geminiApiKey"),
|
||||
# model=config.get("geminiModel", "gemini-pro"),
|
||||
# system_prompt=config.get("prompt", "You are a helpful assistant.")
|
||||
# )
|
||||
raise NotImplementedError("Gemini agent not implemented")
|
||||
|
||||
def _create_claude_agent(self, config: Dict[str, Any]):
|
||||
"""Create Anthropic Claude agent"""
|
||||
# TODO: Implement Claude agent
|
||||
raise NotImplementedError("Claude agent not implemented")
|
||||
|
||||
# ========================================================================
|
||||
# TTS Synthesizer Implementations
|
||||
# ========================================================================
|
||||
|
||||
def _create_elevenlabs_synthesizer(self, config: Dict[str, Any]):
|
||||
"""Create ElevenLabs synthesizer"""
|
||||
# TODO: Implement ElevenLabs synthesizer
|
||||
# from .synthesizers.elevenlabs import ElevenLabsSynthesizer
|
||||
# return ElevenLabsSynthesizer(
|
||||
# api_key=config.get("elevenlabsApiKey"),
|
||||
# voice_id=config.get("elevenlabsVoiceId"),
|
||||
# model_id=config.get("elevenlabsModel", "eleven_monolingual_v1")
|
||||
# )
|
||||
raise NotImplementedError("ElevenLabs synthesizer not implemented")
|
||||
|
||||
def _create_azure_synthesizer(self, config: Dict[str, Any]):
|
||||
"""Create Azure TTS synthesizer"""
|
||||
# TODO: Implement Azure synthesizer
|
||||
raise NotImplementedError("Azure synthesizer not implemented")
|
||||
|
||||
def _create_google_synthesizer(self, config: Dict[str, Any]):
|
||||
"""Create Google Cloud TTS synthesizer"""
|
||||
# TODO: Implement Google synthesizer
|
||||
raise NotImplementedError("Google synthesizer not implemented")
|
||||
|
||||
def _create_polly_synthesizer(self, config: Dict[str, Any]):
|
||||
"""Create Amazon Polly synthesizer"""
|
||||
# TODO: Implement Polly synthesizer
|
||||
raise NotImplementedError("Polly synthesizer not implemented")
|
||||
|
||||
def _create_playht_synthesizer(self, config: Dict[str, Any]):
|
||||
"""Create Play.ht synthesizer"""
|
||||
# TODO: Implement Play.ht synthesizer
|
||||
raise NotImplementedError("Play.ht synthesizer not implemented")
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Example Usage
|
||||
# ============================================================================
|
||||
|
||||
def example_usage():
|
||||
"""Example of how to use the factory"""
|
||||
|
||||
# Configuration
|
||||
config = {
|
||||
"transcriberProvider": "deepgram",
|
||||
"deepgramApiKey": "your-api-key",
|
||||
"llmProvider": "gemini",
|
||||
"geminiApiKey": "your-api-key",
|
||||
"voiceProvider": "elevenlabs",
|
||||
"elevenlabsApiKey": "your-api-key",
|
||||
"elevenlabsVoiceId": "your-voice-id",
|
||||
"prompt": "You are a helpful AI assistant."
|
||||
}
|
||||
|
||||
# Create factory
|
||||
factory = VoiceComponentFactory()
|
||||
|
||||
try:
|
||||
# Create components
|
||||
transcriber = factory.create_transcriber(config)
|
||||
agent = factory.create_agent(config)
|
||||
synthesizer = factory.create_synthesizer(config)
|
||||
|
||||
print("✅ All components created successfully!")
|
||||
|
||||
except ValueError as e:
|
||||
print(f"❌ Configuration error: {e}")
|
||||
except NotImplementedError as e:
|
||||
print(f"⚠️ Not implemented: {e}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
example_usage()
|
||||
Reference in New Issue
Block a user