feat: add voice-ai-engine-development skill for building real-time conversational AI

This commit is contained in:
taksrules
2026-01-27 07:24:06 +02:00
parent e9783892c1
commit d972c4fa3a
9 changed files with 3360 additions and 0 deletions

View File

@@ -0,0 +1,193 @@
"""
Template: Base Worker Implementation
Use this template as a starting point for creating new workers
in your voice AI pipeline.
"""
import asyncio
from typing import Any
import logging
logger = logging.getLogger(__name__)
class BaseWorker:
"""
Base class for all workers in the voice AI pipeline
Workers follow the producer-consumer pattern:
- Consume items from input_queue
- Process items
- Produce results to output_queue
All workers run concurrently via asyncio.
"""
def __init__(self, input_queue: asyncio.Queue, output_queue: asyncio.Queue):
"""
Initialize the worker
Args:
input_queue: Queue to consume items from
output_queue: Queue to produce results to
"""
self.input_queue = input_queue
self.output_queue = output_queue
self.active = False
self._task = None
def start(self):
"""Start the worker's processing loop"""
self.active = True
self._task = asyncio.create_task(self._run_loop())
logger.info(f"✅ [{self.__class__.__name__}] Started")
async def _run_loop(self):
"""
Main processing loop - runs forever until terminated
This loop:
1. Waits for items from input_queue
2. Processes each item
3. Handles errors gracefully
"""
while self.active:
try:
# Block until item arrives
item = await self.input_queue.get()
# Process the item
await self.process(item)
except asyncio.CancelledError:
# Task was cancelled (normal during shutdown)
logger.info(f"🛑 [{self.__class__.__name__}] Task cancelled")
break
except Exception as e:
# Log error but don't crash the worker
logger.error(
f"❌ [{self.__class__.__name__}] Error processing item: {e}",
exc_info=True
)
# Continue processing next item
async def process(self, item: Any):
"""
Process a single item
Override this method in your worker implementation.
Args:
item: The item to process
"""
raise NotImplementedError(
f"{self.__class__.__name__} must implement process()"
)
def terminate(self):
"""
Stop the worker gracefully
This sets active=False and cancels the processing task.
"""
self.active = False
if self._task and not self._task.done():
self._task.cancel()
logger.info(f"🛑 [{self.__class__.__name__}] Terminated")
async def wait_for_completion(self):
"""Wait for the worker task to complete"""
if self._task:
try:
await self._task
except asyncio.CancelledError:
pass
# ============================================================================
# Example: Custom Worker Implementation
# ============================================================================
class ExampleWorker(BaseWorker):
"""
Example worker that demonstrates how to extend BaseWorker
This worker receives strings, converts them to uppercase,
and sends them to the output queue.
"""
def __init__(self, input_queue: asyncio.Queue, output_queue: asyncio.Queue):
super().__init__(input_queue, output_queue)
# Add any custom initialization here
self.processed_count = 0
async def process(self, item: str):
"""
Process a single item
Args:
item: String to convert to uppercase
"""
# Simulate some processing time
await asyncio.sleep(0.1)
# Process the item
result = item.upper()
# Send to output queue
self.output_queue.put_nowait(result)
# Update counter
self.processed_count += 1
logger.info(
f"✅ [{self.__class__.__name__}] "
f"Processed '{item}' -> '{result}' "
f"(total: {self.processed_count})"
)
# ============================================================================
# Example Usage
# ============================================================================
async def example_usage():
"""Example of how to use the worker"""
# Create queues
input_queue = asyncio.Queue()
output_queue = asyncio.Queue()
# Create worker
worker = ExampleWorker(input_queue, output_queue)
# Start worker
worker.start()
# Send items to process
items = ["hello", "world", "voice", "ai"]
for item in items:
input_queue.put_nowait(item)
# Wait for processing
await asyncio.sleep(0.5)
# Get results
results = []
while not output_queue.empty():
results.append(await output_queue.get())
print(f"\n✅ Results: {results}")
# Terminate worker
worker.terminate()
await worker.wait_for_completion()
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
asyncio.run(example_usage())

View File

@@ -0,0 +1,289 @@
"""
Template: Multi-Provider Factory
Use this template to create a factory that supports multiple providers
for transcription, LLM, and TTS services.
"""
from typing import Dict, Any
from abc import ABC, abstractmethod
import logging
logger = logging.getLogger(__name__)
# ============================================================================
# Provider Interfaces
# ============================================================================
class TranscriberProvider(ABC):
"""Abstract base class for transcriber providers"""
@abstractmethod
async def transcribe_stream(self, audio_stream):
"""Transcribe streaming audio"""
pass
class LLMProvider(ABC):
"""Abstract base class for LLM providers"""
@abstractmethod
async def generate_response(self, messages, stream=True):
"""Generate response from messages"""
pass
class TTSProvider(ABC):
"""Abstract base class for TTS providers"""
@abstractmethod
async def synthesize_speech(self, text):
"""Synthesize speech from text"""
pass
# ============================================================================
# Multi-Provider Factory
# ============================================================================
class VoiceComponentFactory:
"""
Factory for creating voice AI components with multiple provider support
Supports:
- Multiple transcription providers (Deepgram, AssemblyAI, Azure, Google)
- Multiple LLM providers (OpenAI, Gemini, Claude)
- Multiple TTS providers (ElevenLabs, Azure, Google, Polly, Play.ht)
"""
def __init__(self):
self.transcriber_providers = {
"deepgram": self._create_deepgram_transcriber,
"assemblyai": self._create_assemblyai_transcriber,
"azure": self._create_azure_transcriber,
"google": self._create_google_transcriber,
}
self.llm_providers = {
"openai": self._create_openai_agent,
"gemini": self._create_gemini_agent,
"claude": self._create_claude_agent,
}
self.tts_providers = {
"elevenlabs": self._create_elevenlabs_synthesizer,
"azure": self._create_azure_synthesizer,
"google": self._create_google_synthesizer,
"polly": self._create_polly_synthesizer,
"playht": self._create_playht_synthesizer,
}
def create_transcriber(self, config: Dict[str, Any]):
"""
Create transcriber based on configuration
Args:
config: Configuration dict with 'transcriberProvider' key
Returns:
Transcriber instance
Raises:
ValueError: If provider is not supported
"""
provider = config.get("transcriberProvider", "deepgram").lower()
if provider not in self.transcriber_providers:
raise ValueError(
f"Unknown transcriber provider: {provider}. "
f"Supported: {list(self.transcriber_providers.keys())}"
)
logger.info(f"🎤 Creating transcriber: {provider}")
return self.transcriber_providers[provider](config)
def create_agent(self, config: Dict[str, Any]):
"""
Create LLM agent based on configuration
Args:
config: Configuration dict with 'llmProvider' key
Returns:
Agent instance
Raises:
ValueError: If provider is not supported
"""
provider = config.get("llmProvider", "openai").lower()
if provider not in self.llm_providers:
raise ValueError(
f"Unknown LLM provider: {provider}. "
f"Supported: {list(self.llm_providers.keys())}"
)
logger.info(f"🤖 Creating agent: {provider}")
return self.llm_providers[provider](config)
def create_synthesizer(self, config: Dict[str, Any]):
"""
Create TTS synthesizer based on configuration
Args:
config: Configuration dict with 'voiceProvider' key
Returns:
Synthesizer instance
Raises:
ValueError: If provider is not supported
"""
provider = config.get("voiceProvider", "elevenlabs").lower()
if provider not in self.tts_providers:
raise ValueError(
f"Unknown voice provider: {provider}. "
f"Supported: {list(self.tts_providers.keys())}"
)
logger.info(f"🔊 Creating synthesizer: {provider}")
return self.tts_providers[provider](config)
# ========================================================================
# Transcriber Implementations
# ========================================================================
def _create_deepgram_transcriber(self, config: Dict[str, Any]):
"""Create Deepgram transcriber"""
# TODO: Implement Deepgram transcriber
# from .transcribers.deepgram import DeepgramTranscriber
# return DeepgramTranscriber(
# api_key=config.get("deepgramApiKey"),
# model=config.get("deepgramModel", "nova-2"),
# language=config.get("language", "en-US")
# )
raise NotImplementedError("Deepgram transcriber not implemented")
def _create_assemblyai_transcriber(self, config: Dict[str, Any]):
"""Create AssemblyAI transcriber"""
# TODO: Implement AssemblyAI transcriber
raise NotImplementedError("AssemblyAI transcriber not implemented")
def _create_azure_transcriber(self, config: Dict[str, Any]):
"""Create Azure Speech transcriber"""
# TODO: Implement Azure transcriber
raise NotImplementedError("Azure transcriber not implemented")
def _create_google_transcriber(self, config: Dict[str, Any]):
"""Create Google Cloud Speech transcriber"""
# TODO: Implement Google transcriber
raise NotImplementedError("Google transcriber not implemented")
# ========================================================================
# LLM Agent Implementations
# ========================================================================
def _create_openai_agent(self, config: Dict[str, Any]):
"""Create OpenAI agent"""
# TODO: Implement OpenAI agent
# from .agents.openai import OpenAIAgent
# return OpenAIAgent(
# api_key=config.get("openaiApiKey"),
# model=config.get("openaiModel", "gpt-4"),
# system_prompt=config.get("prompt", "You are a helpful assistant.")
# )
raise NotImplementedError("OpenAI agent not implemented")
def _create_gemini_agent(self, config: Dict[str, Any]):
"""Create Google Gemini agent"""
# TODO: Implement Gemini agent
# from .agents.gemini import GeminiAgent
# return GeminiAgent(
# api_key=config.get("geminiApiKey"),
# model=config.get("geminiModel", "gemini-pro"),
# system_prompt=config.get("prompt", "You are a helpful assistant.")
# )
raise NotImplementedError("Gemini agent not implemented")
def _create_claude_agent(self, config: Dict[str, Any]):
"""Create Anthropic Claude agent"""
# TODO: Implement Claude agent
raise NotImplementedError("Claude agent not implemented")
# ========================================================================
# TTS Synthesizer Implementations
# ========================================================================
def _create_elevenlabs_synthesizer(self, config: Dict[str, Any]):
"""Create ElevenLabs synthesizer"""
# TODO: Implement ElevenLabs synthesizer
# from .synthesizers.elevenlabs import ElevenLabsSynthesizer
# return ElevenLabsSynthesizer(
# api_key=config.get("elevenlabsApiKey"),
# voice_id=config.get("elevenlabsVoiceId"),
# model_id=config.get("elevenlabsModel", "eleven_monolingual_v1")
# )
raise NotImplementedError("ElevenLabs synthesizer not implemented")
def _create_azure_synthesizer(self, config: Dict[str, Any]):
"""Create Azure TTS synthesizer"""
# TODO: Implement Azure synthesizer
raise NotImplementedError("Azure synthesizer not implemented")
def _create_google_synthesizer(self, config: Dict[str, Any]):
"""Create Google Cloud TTS synthesizer"""
# TODO: Implement Google synthesizer
raise NotImplementedError("Google synthesizer not implemented")
def _create_polly_synthesizer(self, config: Dict[str, Any]):
"""Create Amazon Polly synthesizer"""
# TODO: Implement Polly synthesizer
raise NotImplementedError("Polly synthesizer not implemented")
def _create_playht_synthesizer(self, config: Dict[str, Any]):
"""Create Play.ht synthesizer"""
# TODO: Implement Play.ht synthesizer
raise NotImplementedError("Play.ht synthesizer not implemented")
# ============================================================================
# Example Usage
# ============================================================================
def example_usage():
"""Example of how to use the factory"""
# Configuration
config = {
"transcriberProvider": "deepgram",
"deepgramApiKey": "your-api-key",
"llmProvider": "gemini",
"geminiApiKey": "your-api-key",
"voiceProvider": "elevenlabs",
"elevenlabsApiKey": "your-api-key",
"elevenlabsVoiceId": "your-voice-id",
"prompt": "You are a helpful AI assistant."
}
# Create factory
factory = VoiceComponentFactory()
try:
# Create components
transcriber = factory.create_transcriber(config)
agent = factory.create_agent(config)
synthesizer = factory.create_synthesizer(config)
print("✅ All components created successfully!")
except ValueError as e:
print(f"❌ Configuration error: {e}")
except NotImplementedError as e:
print(f"⚠️ Not implemented: {e}")
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
example_usage()