From 70a7e6863bd8cd8735b0a59a946956a8d0b1b43d Mon Sep 17 00:00:00 2001 From: rootflo-hardik Date: Fri, 9 Jan 2026 11:34:01 +0530 Subject: [PATCH 1/8] voice_agents_module - inbound support, enhancement - added alembic migration for removing phone_numbers from telephony_config, having inbound outbound numbers, supported default languages in voice_agents - respective changes in service, controller files - added phone and language validation --- ...49da528_add_inbound_voice_agent_support.py | 91 +++++ .../db_repo_module/models/telephony_config.py | 5 +- .../db_repo_module/models/voice_agent.py | 46 ++- .../telephony_config_controller.py | 3 - .../controllers/voice_agent_controller.py | 142 ++++++-- .../models/telephony_schemas.py | 9 +- .../models/voice_agent_schemas.py | 26 +- .../services/telephony_config_service.py | 5 +- .../services/voice_agent_service.py | 300 +++++++++++++++- .../utils/cache_invalidation.py | 10 +- .../voice_agents_module/utils/cache_utils.py | 5 + .../utils/language_validation.py | 337 ++++++++++++++++++ .../utils/phone_validation.py | 67 ++++ 13 files changed, 974 insertions(+), 72 deletions(-) create mode 100644 wavefront/server/modules/db_repo_module/db_repo_module/alembic/versions/2026_01_08_1547-6010e49da528_add_inbound_voice_agent_support.py create mode 100644 wavefront/server/modules/voice_agents_module/voice_agents_module/utils/language_validation.py create mode 100644 wavefront/server/modules/voice_agents_module/voice_agents_module/utils/phone_validation.py diff --git a/wavefront/server/modules/db_repo_module/db_repo_module/alembic/versions/2026_01_08_1547-6010e49da528_add_inbound_voice_agent_support.py b/wavefront/server/modules/db_repo_module/db_repo_module/alembic/versions/2026_01_08_1547-6010e49da528_add_inbound_voice_agent_support.py new file mode 100644 index 00000000..20a189ec --- /dev/null +++ b/wavefront/server/modules/db_repo_module/db_repo_module/alembic/versions/2026_01_08_1547-6010e49da528_add_inbound_voice_agent_support.py @@ -0,0 +1,91 @@ +"""add_inbound_voice_agent_support + +Revision ID: 6010e49da528 +Revises: f7572bcd9510 +Create Date: 2026-01-08 15:47:54.502531 + +""" + +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + + +# revision identifiers, used by Alembic. +revision: str = '6010e49da528' +down_revision: Union[str, None] = 'f7572bcd9510' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # Add new columns to voice_agents table (initially nullable) + op.add_column( + 'voice_agents', + sa.Column( + 'inbound_numbers', postgresql.JSONB(astext_type=sa.Text()), nullable=True + ), + ) + op.add_column( + 'voice_agents', + sa.Column( + 'outbound_numbers', postgresql.JSONB(astext_type=sa.Text()), nullable=True + ), + ) + op.add_column( + 'voice_agents', + sa.Column( + 'supported_languages', + postgresql.JSONB(astext_type=sa.Text()), + nullable=True, + ), + ) + op.add_column( + 'voice_agents', + sa.Column('default_language', sa.String(length=10), nullable=True), + ) + + # Set defaults for existing agents + op.execute(""" + UPDATE voice_agents + SET + inbound_numbers = '[]'::jsonb, + outbound_numbers = '[]'::jsonb, + supported_languages = '["en"]'::jsonb, + default_language = 'en' + WHERE inbound_numbers IS NULL + """) + + # Make columns non-nullable after setting defaults + op.alter_column('voice_agents', 'inbound_numbers', nullable=False) + op.alter_column('voice_agents', 'outbound_numbers', nullable=False) + op.alter_column('voice_agents', 'supported_languages', nullable=False) + op.alter_column('voice_agents', 'default_language', nullable=False) + + # Create GIN index for fast inbound number lookups (JSONB containment queries) + op.execute(""" + CREATE INDEX idx_voice_agents_inbound_numbers_gin + ON voice_agents USING gin (inbound_numbers jsonb_path_ops) + """) + + # Remove phone_numbers column from telephony_configs table + # Phone numbers are now managed at the voice_agent level + op.drop_column('telephony_configs', 'phone_numbers') + + +def downgrade() -> None: + # Restore phone_numbers column to telephony_configs table + op.add_column( + 'telephony_configs', sa.Column('phone_numbers', sa.Text(), nullable=True) + ) + + # Drop GIN index + op.drop_index('idx_voice_agents_inbound_numbers_gin', table_name='voice_agents') + + # Drop columns from voice_agents + op.drop_column('voice_agents', 'default_language') + op.drop_column('voice_agents', 'supported_languages') + op.drop_column('voice_agents', 'outbound_numbers') + op.drop_column('voice_agents', 'inbound_numbers') diff --git a/wavefront/server/modules/db_repo_module/db_repo_module/models/telephony_config.py b/wavefront/server/modules/db_repo_module/db_repo_module/models/telephony_config.py index ace8edf2..44f76d85 100644 --- a/wavefront/server/modules/db_repo_module/db_repo_module/models/telephony_config.py +++ b/wavefront/server/modules/db_repo_module/db_repo_module/models/telephony_config.py @@ -22,7 +22,6 @@ class TelephonyConfig(Base): provider: Mapped[str] = mapped_column(String(length=64), nullable=False) connection_type: Mapped[str] = mapped_column(String(length=64), nullable=False) credentials: Mapped[str] = mapped_column(Text, nullable=False) - phone_numbers: Mapped[str] = mapped_column(Text, nullable=False) webhook_config: Mapped[Optional[str]] = mapped_column(Text, nullable=True) sip_config: Mapped[Optional[str]] = mapped_column(Text, nullable=True) is_deleted: Mapped[bool] = mapped_column(default=False, nullable=False) @@ -48,9 +47,7 @@ def to_dict(self, exclude_credentials: bool = True): elif isinstance(value, datetime): result[column.name] = value.isoformat() elif ( - column.name - in ['credentials', 'phone_numbers', 'webhook_config', 'sip_config'] - and value + column.name in ['credentials', 'webhook_config', 'sip_config'] and value ): # Parse JSON fields try: diff --git a/wavefront/server/modules/db_repo_module/db_repo_module/models/voice_agent.py b/wavefront/server/modules/db_repo_module/db_repo_module/models/voice_agent.py index ebfda80b..495acaef 100644 --- a/wavefront/server/modules/db_repo_module/db_repo_module/models/voice_agent.py +++ b/wavefront/server/modules/db_repo_module/db_repo_module/models/voice_agent.py @@ -4,6 +4,7 @@ from typing import Optional from sqlalchemy import ForeignKey, String, Text, func +from sqlalchemy.dialects.postgresql import JSONB from sqlalchemy.orm import Mapped, mapped_column from ..database.base import Base @@ -33,6 +34,17 @@ class VoiceAgent(Base): conversation_config: Mapped[Optional[str]] = mapped_column(Text, nullable=True) welcome_message: Mapped[str] = mapped_column(Text, nullable=False) status: Mapped[str] = mapped_column(String(length=64), nullable=False) + + # Multi-language and phone number support + inbound_numbers: Mapped[list] = mapped_column(JSONB, nullable=False, default=list) + outbound_numbers: Mapped[list] = mapped_column(JSONB, nullable=False, default=list) + supported_languages: Mapped[list] = mapped_column( + JSONB, nullable=False, default=lambda: ['en'] + ) + default_language: Mapped[str] = mapped_column( + String(length=10), nullable=False, default='en' + ) + is_deleted: Mapped[bool] = mapped_column(default=False, nullable=False) created_at: Mapped[datetime] = mapped_column(default=func.now()) updated_at: Mapped[datetime] = mapped_column( @@ -51,12 +63,34 @@ def to_dict(self): result[column.name] = str(value) elif isinstance(value, datetime): result[column.name] = value.isoformat() - elif column.name == 'conversation_config' and value: - # Parse JSON field - try: - result[column.name] = json.loads(value) - except (json.JSONDecodeError, TypeError): - result[column.name] = value + elif column.name in [ + 'conversation_config', + 'inbound_numbers', + 'outbound_numbers', + 'supported_languages', + ]: + # Parse JSON/JSONB fields + if value: + try: + # JSONB fields are already deserialized by SQLAlchemy + if isinstance(value, str): + result[column.name] = json.loads(value) + else: + result[column.name] = value + except (json.JSONDecodeError, TypeError): + result[column.name] = value + else: + # Return empty list for JSONB array fields, None for others + result[column.name] = ( + [] + if column.name + in [ + 'inbound_numbers', + 'outbound_numbers', + 'supported_languages', + ] + else None + ) else: result[column.name] = value return result diff --git a/wavefront/server/modules/voice_agents_module/voice_agents_module/controllers/telephony_config_controller.py b/wavefront/server/modules/voice_agents_module/voice_agents_module/controllers/telephony_config_controller.py index 3d41a2ab..862e6654 100644 --- a/wavefront/server/modules/voice_agents_module/voice_agents_module/controllers/telephony_config_controller.py +++ b/wavefront/server/modules/voice_agents_module/voice_agents_module/controllers/telephony_config_controller.py @@ -51,7 +51,6 @@ async def create_telephony_config( provider=payload.provider.value, connection_type=payload.connection_type.value, credentials=payload.credentials, - phone_numbers=payload.phone_numbers, webhook_config=payload.webhook_config, sip_config=payload.sip_config, ) @@ -198,8 +197,6 @@ async def update_telephony_config( ) if payload.credentials is not UNSET: update_data['credentials'] = json.dumps(payload.credentials) - if payload.phone_numbers is not UNSET: - update_data['phone_numbers'] = json.dumps(payload.phone_numbers) if payload.webhook_config is not UNSET: update_data['webhook_config'] = ( json.dumps(payload.webhook_config.model_dump()) diff --git a/wavefront/server/modules/voice_agents_module/voice_agents_module/controllers/voice_agent_controller.py b/wavefront/server/modules/voice_agents_module/voice_agents_module/controllers/voice_agent_controller.py index 6d0bdf15..98305652 100644 --- a/wavefront/server/modules/voice_agents_module/voice_agents_module/controllers/voice_agent_controller.py +++ b/wavefront/server/modules/voice_agents_module/voice_agents_module/controllers/voice_agent_controller.py @@ -57,6 +57,10 @@ async def create_voice_agent( welcome_message=payload.welcome_message, conversation_config=payload.conversation_config, status=payload.status.value, + inbound_numbers=payload.inbound_numbers, + outbound_numbers=payload.outbound_numbers, + supported_languages=payload.supported_languages, + default_language=payload.default_language, ) return JSONResponse( @@ -198,6 +202,14 @@ async def update_voice_agent( f'Invalid status value. Must be one of: {valid_values}' ), ) + if payload.inbound_numbers is not UNSET: + update_data['inbound_numbers'] = payload.inbound_numbers + if payload.outbound_numbers is not UNSET: + update_data['outbound_numbers'] = payload.outbound_numbers + if payload.supported_languages is not UNSET: + update_data['supported_languages'] = payload.supported_languages + if payload.default_language is not UNSET: + update_data['default_language'] = payload.default_language if not update_data: return JSONResponse( @@ -317,48 +329,48 @@ async def initiate_call( ), ) - # Fetch telephony config - telephony_config_id = agent_dict.get('telephony_config_id') - telephony_config = await voice_agent_service.telephony_config_service.get_config( - telephony_config_id - ) - - if not telephony_config: - return JSONResponse( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - content=response_formatter.buildErrorResponse( - f'Telephony config not found with id: {telephony_config_id}' - ), - ) - - # Parse phone_numbers from telephony config - phone_numbers = telephony_config.get('phone_numbers') + # Get outbound numbers from agent + outbound_numbers = agent_dict.get('outbound_numbers', []) if ( - not phone_numbers - or not isinstance(phone_numbers, list) - or len(phone_numbers) == 0 + not outbound_numbers + or not isinstance(outbound_numbers, list) + or len(outbound_numbers) == 0 ): return JSONResponse( status_code=status.HTTP_400_BAD_REQUEST, content=response_formatter.buildErrorResponse( - 'No phone numbers configured in telephony config' + 'No outbound phone numbers configured for this agent' ), ) # Select from_number from_number = payload.from_number if from_number: - # Validate that provided from_number is in the configured numbers - if from_number not in phone_numbers: + # Validate that provided from_number is in the agent's outbound numbers + if from_number not in outbound_numbers: return JSONResponse( status_code=status.HTTP_400_BAD_REQUEST, content=response_formatter.buildErrorResponse( - f'from_number {from_number} is not in the configured phone numbers: {phone_numbers}' + f"from_number {from_number} is not in the agent's outbound numbers: {outbound_numbers}" ), ) else: - # Default to first configured number - from_number = phone_numbers[0] + # Default to first outbound number + from_number = outbound_numbers[0] + + # Fetch telephony config for Twilio credentials + telephony_config_id = agent_dict.get('telephony_config_id') + telephony_config = await voice_agent_service.telephony_config_service.get_config( + telephony_config_id + ) + + if not telephony_config: + return JSONResponse( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + content=response_formatter.buildErrorResponse( + f'Telephony config not found with id: {telephony_config_id}' + ), + ) # Extract Twilio credentials from telephony config credentials = telephony_config.get('credentials', {}) @@ -413,3 +425,83 @@ async def initiate_call( } ), ) + + +@voice_agent_router.get('/v1/voice-agents/by-inbound-number/{phone_number}') +@inject +async def get_voice_agent_by_inbound_number( + phone_number: str = Path(..., description='Inbound phone number (E.164 format)'), + response_formatter: ResponseFormatter = Depends( + Provide[CommonContainer.response_formatter] + ), + voice_agent_service: VoiceAgentService = Depends( + Provide[VoiceAgentsContainer.voice_agent_service] + ), +): + """ + Get voice agent by inbound phone number. + + This endpoint is used by call_processing to lookup which agent handles + a specific inbound phone number. + + Args: + phone_number: Inbound phone number in E.164 format + + Returns: + JSONResponse: Voice agent details or 404 if not found + """ + agent = await voice_agent_service.get_agent_by_inbound_number(phone_number) + + if not agent: + return JSONResponse( + status_code=status.HTTP_404_NOT_FOUND, + content=response_formatter.buildErrorResponse( + f'No voice agent found for inbound number: {phone_number}' + ), + ) + + return JSONResponse( + status_code=status.HTTP_200_OK, + content=response_formatter.buildSuccessResponse(agent), + ) + + +@voice_agent_router.get('/v1/voice-agents/{agent_id}/welcome-audio-url') +@inject +async def get_welcome_audio_url( + agent_id: UUID = Path(..., description='The ID of the voice agent'), + response_formatter: ResponseFormatter = Depends( + Provide[CommonContainer.response_formatter] + ), + voice_agent_service: VoiceAgentService = Depends( + Provide[VoiceAgentsContainer.voice_agent_service] + ), +): + """ + Get welcome message audio presigned URL for a voice agent. + + Returns a presigned URL (2-hour expiration) for accessing the agent's + welcome message audio file from cloud storage. + + Args: + agent_id: UUID of the voice agent + + Returns: + JSONResponse: Object with 'url' field containing presigned URL + """ + try: + url = await voice_agent_service.get_welcome_message_audio_url(agent_id) + + return JSONResponse( + status_code=status.HTTP_200_OK, + content=response_formatter.buildSuccessResponse({'url': url}), + ) + + except Exception as e: + logger.error(f'Failed to get welcome audio URL for agent {agent_id}: {str(e)}') + return JSONResponse( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + content=response_formatter.buildErrorResponse( + f'Failed to generate welcome message URL: {str(e)}' + ), + ) diff --git a/wavefront/server/modules/voice_agents_module/voice_agents_module/models/telephony_schemas.py b/wavefront/server/modules/voice_agents_module/voice_agents_module/models/telephony_schemas.py index 46daf998..e0f5220d 100644 --- a/wavefront/server/modules/voice_agents_module/voice_agents_module/models/telephony_schemas.py +++ b/wavefront/server/modules/voice_agents_module/voice_agents_module/models/telephony_schemas.py @@ -1,5 +1,5 @@ from pydantic import BaseModel, Field -from typing import Optional, Union, Any, Dict, List, Literal +from typing import Optional, Union, Any, Dict, Literal from enum import Enum from datetime import datetime import uuid @@ -78,11 +78,6 @@ class CreateTelephonyConfigPayload(BaseModel): ..., description='Provider credentials as JSON object (e.g., {account_sid, auth_token})', ) - phone_numbers: List[str] = Field( - ..., - description='List of phone numbers available for outbound calls', - example=['+1234567890', '+0987654321'], - ) webhook_config: Optional[WebhookConfig] = Field( None, description='Webhook configuration for status callbacks (optional for both connection types)', @@ -103,7 +98,6 @@ class UpdateTelephonyConfigPayload(BaseModel): provider: Union[TelephonyProvider, Any] = Field(default=UNSET) connection_type: Union[ConnectionType, Any] = Field(default=UNSET) credentials: Union[Dict[str, Any], Any] = Field(default=UNSET) - phone_numbers: Union[List[str], Any] = Field(default=UNSET) webhook_config: Union[WebhookConfig, None, Any] = Field(default=UNSET) sip_config: Union[SipConfig, None, Any] = Field(default=UNSET) @@ -114,7 +108,6 @@ class TelephonyConfigResponse(BaseModel): description: Optional[str] provider: str connection_type: str - phone_numbers: List[str] webhook_config: Optional[WebhookConfig] sip_config: Optional[SipConfig] is_deleted: bool diff --git a/wavefront/server/modules/voice_agents_module/voice_agents_module/models/voice_agent_schemas.py b/wavefront/server/modules/voice_agents_module/voice_agents_module/models/voice_agent_schemas.py index 8499b08b..27ae09d6 100644 --- a/wavefront/server/modules/voice_agents_module/voice_agents_module/models/voice_agent_schemas.py +++ b/wavefront/server/modules/voice_agents_module/voice_agents_module/models/voice_agent_schemas.py @@ -1,5 +1,5 @@ from pydantic import BaseModel, Field -from typing import Optional, Union, Any, Dict +from typing import Optional, Union, Any, Dict, List from enum import Enum from datetime import datetime import uuid @@ -34,6 +34,22 @@ class CreateVoiceAgentPayload(BaseModel): default=VoiceAgentStatus.INACTIVE, description='Agent status (active or inactive)', ) + inbound_numbers: Optional[List[str]] = Field( + None, + description='Phone numbers for receiving inbound calls (E.164 format, globally unique)', + ) + outbound_numbers: Optional[List[str]] = Field( + None, + description='Phone numbers for making outbound calls (E.164 format)', + ) + supported_languages: Optional[List[str]] = Field( + None, + description='List of supported language codes (ISO 639-1, e.g., ["en", "hi", "te"])', + ) + default_language: str = Field( + 'en', + description='Default language if detection fails (must be in supported_languages)', + ) class UpdateVoiceAgentPayload(BaseModel): @@ -47,6 +63,10 @@ class UpdateVoiceAgentPayload(BaseModel): conversation_config: Union[Dict[str, Any], None, Any] = Field(default=UNSET) welcome_message: Union[str, Any] = Field(default=UNSET) status: Union[VoiceAgentStatus, Any] = Field(default=UNSET) + inbound_numbers: Union[List[str], Any] = Field(default=UNSET) + outbound_numbers: Union[List[str], Any] = Field(default=UNSET) + supported_languages: Union[List[str], Any] = Field(default=UNSET) + default_language: Union[str, Any] = Field(default=UNSET) class VoiceAgentResponse(BaseModel): @@ -61,6 +81,10 @@ class VoiceAgentResponse(BaseModel): conversation_config: Optional[Dict[str, Any]] welcome_message: str status: str + inbound_numbers: List[str] + outbound_numbers: List[str] + supported_languages: List[str] + default_language: str is_deleted: bool created_at: datetime updated_at: datetime diff --git a/wavefront/server/modules/voice_agents_module/voice_agents_module/services/telephony_config_service.py b/wavefront/server/modules/voice_agents_module/voice_agents_module/services/telephony_config_service.py index 7d133fac..24f02840 100644 --- a/wavefront/server/modules/voice_agents_module/voice_agents_module/services/telephony_config_service.py +++ b/wavefront/server/modules/voice_agents_module/voice_agents_module/services/telephony_config_service.py @@ -42,7 +42,6 @@ async def create_config( provider: str = None, connection_type: str = None, credentials: dict = None, - phone_numbers: list = None, webhook_config: Optional[WebhookConfig] = None, sip_config: Optional[SipConfig] = None, ) -> dict: @@ -54,8 +53,7 @@ async def create_config( description: Optional description provider: Telephony provider connection_type: Connection type (websocket/sip) - credentials: Provider credentials - phone_numbers: List of phone numbers available for outbound calls + credentials: Provider credentials (e.g., Twilio account_sid, auth_token) webhook_config: Webhook configuration Pydantic model (optional) sip_config: SIP configuration Pydantic model (optional) @@ -72,7 +70,6 @@ async def create_config( provider=provider, connection_type=connection_type, credentials=json.dumps(credentials), - phone_numbers=json.dumps(phone_numbers), webhook_config=( json.dumps(webhook_config.model_dump()) if webhook_config else None ), diff --git a/wavefront/server/modules/voice_agents_module/voice_agents_module/services/voice_agent_service.py b/wavefront/server/modules/voice_agents_module/voice_agents_module/services/voice_agent_service.py index 7cbd32e4..26ab4a80 100644 --- a/wavefront/server/modules/voice_agents_module/voice_agents_module/services/voice_agent_service.py +++ b/wavefront/server/modules/voice_agents_module/voice_agents_module/services/voice_agent_service.py @@ -1,6 +1,6 @@ import json import uuid -from typing import List, Optional +from typing import List, Optional, Tuple from uuid import UUID from common_module.log.logger import logger @@ -24,6 +24,12 @@ invalidate_call_processing_cache, ) from voice_agents_module.utils.storage_utils import generate_welcome_message_key +from voice_agents_module.utils.language_validation import ( + validate_languages_for_configs, + validate_default_language, + format_language_prompt, +) +from voice_agents_module.utils.phone_validation import validate_phone_numbers class VoiceAgentService: @@ -114,16 +120,97 @@ async def _validate_foreign_keys( return True, None + async def _validate_language_and_phone_config( + self, + inbound_numbers: List[str], + outbound_numbers: List[str], + supported_languages: List[str], + default_language: str, + tts_config_id: UUID, + stt_config_id: UUID, + agent_id: Optional[UUID] = None, + ) -> Tuple[bool, Optional[str]]: + """ + Validate language and phone number configuration. + + Args: + inbound_numbers: List of inbound phone numbers + outbound_numbers: List of outbound phone numbers + supported_languages: List of language codes + default_language: Default language code + tts_config_id: TTS config ID + stt_config_id: STT config ID + agent_id: Agent ID (for update operations, to exclude self from uniqueness check) + + Returns: + Tuple of (is_valid, error_message). error_message is None if valid. + """ + # Validate phone numbers format + is_valid, error = validate_phone_numbers(inbound_numbers, 'inbound_numbers') + if not is_valid: + return False, error + + is_valid, error = validate_phone_numbers(outbound_numbers, 'outbound_numbers') + if not is_valid: + return False, error + + # Validate inbound number uniqueness + for number in inbound_numbers: + # Query all agents and check if number is already assigned + all_agents = await self.voice_agent_repository.find(is_deleted=False) + for agent in all_agents: + # Skip self when updating + if agent_id and agent.id == agent_id: + continue + + agent_dict = agent.to_dict() + if number in agent_dict.get('inbound_numbers', []): + return ( + False, + f"Inbound number {number} is already assigned to agent '{agent.name}' (ID: {agent.id})", + ) + + # Validate default language in supported languages + is_valid, error = validate_default_language( + default_language, supported_languages + ) + if not is_valid: + return False, error + + # Fetch TTS and STT configs to get providers + tts_config = await self.tts_config_service.get_config(tts_config_id) + stt_config = await self.stt_config_service.get_config(stt_config_id) + + if not tts_config or not stt_config: + return False, 'TTS or STT config not found' + + # Validate languages against provider capabilities + is_valid, error = validate_languages_for_configs( + supported_languages, tts_config['provider'], stt_config['provider'] + ) + if not is_valid: + return False, error + + return True, None + async def _generate_and_upload_welcome_audio( - self, welcome_message: str, tts_config_id: UUID, agent_id: UUID + self, + welcome_message: str, + tts_config_id: UUID, + agent_id: UUID, + supported_languages: List[str], + default_language: str, ) -> None: """ - Generate TTS audio for welcome message and upload to cloud storage + Generate TTS audio for welcome message and upload to cloud storage. + If multiple languages supported, concatenates language selection prompt. Args: welcome_message: Text of the welcome message tts_config_id: TTS config ID to use for generation agent_id: Voice agent ID (used for generating storage key) + supported_languages: List of supported language codes + default_language: Default language code for audio generation Raises: Exception: If TTS generation or upload fails @@ -135,10 +222,29 @@ async def _generate_and_upload_welcome_audio( if not tts_config: raise ValueError(f'TTS config {tts_config_id} not found') + # Build welcome audio text + audio_text = welcome_message + + # If multiple languages supported, append language selection prompt + if len(supported_languages) > 1: + language_list = format_language_prompt(supported_languages) + audio_text = ( + f'{welcome_message}. ' + f'Which language would you like to continue in? {language_list}.' + ) + + logger.info(f'Welcome audio text: {audio_text}') + + # Update TTS config to use default language + tts_config_with_lang = {**tts_config} + if 'parameters' not in tts_config_with_lang: + tts_config_with_lang['parameters'] = {} + tts_config_with_lang['parameters']['language'] = default_language + # Generate audio using TTS service try: audio_bytes = await self.tts_generator_service.generate_audio( - welcome_message, tts_config + audio_text, tts_config_with_lang ) logger.info(f'Generated audio: {len(audio_bytes)} bytes') except Exception as e: @@ -175,9 +281,13 @@ async def create_agent( description: Optional[str] = None, conversation_config: Optional[dict] = None, status: str = 'inactive', + inbound_numbers: Optional[List[str]] = None, + outbound_numbers: Optional[List[str]] = None, + supported_languages: Optional[List[str]] = None, + default_language: str = 'en', ) -> dict: """ - Create a new voice agent + Create a new voice agent with inbound/outbound numbers and language support Args: name: Name of the voice agent @@ -190,16 +300,25 @@ async def create_agent( description: Description of the agent (optional) conversation_config: Conversation configuration (optional) status: Agent status (default: inactive) + inbound_numbers: Phone numbers for receiving inbound calls (E.164 format) + outbound_numbers: Phone numbers for making outbound calls (E.164 format) + supported_languages: List of supported language codes (e.g., ["en", "es", "hi"]) + default_language: Default language code (must be in supported_languages) Returns: Created voice agent as dict Raises: - ValueError: If any foreign key validation fails + ValueError: If any validation fails Exception: If TTS generation or upload fails """ logger.info(f'Creating voice agent: {name}') + # Set defaults + inbound_numbers = inbound_numbers or [] + outbound_numbers = outbound_numbers or [] + supported_languages = supported_languages or ['en'] + # Validate all foreign keys is_valid, error_message = await self._validate_foreign_keys( llm_config_id, tts_config_id, stt_config_id, telephony_config_id @@ -208,13 +327,30 @@ async def create_agent( logger.error(f'FK validation failed: {error_message}') raise ValueError(error_message) + # Validate language and phone configuration + is_valid, error_message = await self._validate_language_and_phone_config( + inbound_numbers, + outbound_numbers, + supported_languages, + default_language, + tts_config_id, + stt_config_id, + ) + if not is_valid: + logger.error(f'Language/phone validation failed: {error_message}') + raise ValueError(error_message) + # Generate agent ID first agent_id = uuid.uuid4() # Generate and upload welcome message audio BEFORE creating agent # If this fails, no agent record is created await self._generate_and_upload_welcome_audio( - welcome_message, tts_config_id, agent_id + welcome_message, + tts_config_id, + agent_id, + supported_languages, + default_language, ) # Create agent only if audio generation succeeded @@ -232,6 +368,10 @@ async def create_agent( else None, welcome_message=welcome_message, status=status, + inbound_numbers=inbound_numbers, + outbound_numbers=outbound_numbers, + supported_languages=supported_languages, + default_language=default_language, ) # Convert to dict @@ -250,6 +390,10 @@ async def create_agent( # Invalidate cache in call_processing await invalidate_call_processing_cache('voice_agent', agent.id, 'create') + # Invalidate inbound number cache for each number + for number in inbound_numbers: + await self._invalidate_inbound_number_cache(number) + logger.info(f'Successfully created voice agent with id: {agent.id}') return agent_dict @@ -326,7 +470,7 @@ async def update_agent(self, agent_id: UUID, **update_data) -> Optional[dict]: Updated agent as dict or None if not found Raises: - ValueError: If any foreign key validation fails + ValueError: If any validation fails Exception: If TTS generation or upload fails """ logger.info(f'Updating voice agent: {agent_id}') @@ -337,14 +481,67 @@ async def update_agent(self, agent_id: UUID, **update_data) -> Optional[dict]: if not existing_agent: return None - # Check if welcome_message is being updated - welcome_message_changed = False + existing_dict = existing_agent.to_dict() + + # Track old inbound numbers for cache invalidation + old_inbound_numbers = existing_dict.get('inbound_numbers', []) + new_inbound_numbers = update_data.get('inbound_numbers', old_inbound_numbers) + + # Check if language/phone config is being updated + language_phone_fields = [ + 'inbound_numbers', + 'outbound_numbers', + 'supported_languages', + 'default_language', + ] + if any(key in update_data for key in language_phone_fields): + # Build full config (use existing if not being updated) + inbound_numbers = update_data.get('inbound_numbers', old_inbound_numbers) + outbound_numbers = update_data.get( + 'outbound_numbers', existing_dict.get('outbound_numbers', []) + ) + supported_languages = update_data.get( + 'supported_languages', existing_dict.get('supported_languages', ['en']) + ) + default_language = update_data.get( + 'default_language', existing_dict.get('default_language', 'en') + ) + tts_config_id = update_data.get( + 'tts_config_id', existing_agent.tts_config_id + ) + stt_config_id = update_data.get( + 'stt_config_id', existing_agent.stt_config_id + ) + + # Validate language/phone config (pass agent_id to exclude self from uniqueness check) + is_valid, error_message = await self._validate_language_and_phone_config( + inbound_numbers, + outbound_numbers, + supported_languages, + default_language, + tts_config_id, + stt_config_id, + agent_id=agent_id, + ) + if not is_valid: + logger.error(f'Language/phone validation failed: {error_message}') + raise ValueError(error_message) + + # Check if welcome_message or language config changed (requires audio regeneration) + audio_regeneration_needed = False if ( 'welcome_message' in update_data and update_data['welcome_message'] != existing_agent.welcome_message ): - welcome_message_changed = True - new_welcome_message = update_data['welcome_message'] + audio_regeneration_needed = True + if 'supported_languages' in update_data and update_data[ + 'supported_languages' + ] != existing_dict.get('supported_languages'): + audio_regeneration_needed = True + if 'default_language' in update_data and update_data[ + 'default_language' + ] != existing_dict.get('default_language'): + audio_regeneration_needed = True # If any FK fields are being updated, validate them if any( @@ -377,16 +574,33 @@ async def update_agent(self, agent_id: UUID, **update_data) -> Optional[dict]: logger.error(f'FK validation failed: {error_message}') raise ValueError(error_message) - # If welcome message changed, regenerate audio - if welcome_message_changed: - logger.info('Welcome message changed, regenerating audio') + # Regenerate welcome audio if needed + if audio_regeneration_needed: + logger.info( + 'Welcome message or language config changed, regenerating audio' + ) try: - # Use updated tts_config_id if provided, otherwise use existing + # Use updated values if provided, otherwise use existing + welcome_message = update_data.get( + 'welcome_message', existing_agent.welcome_message + ) tts_config_id = update_data.get( 'tts_config_id', existing_agent.tts_config_id ) + supported_languages = update_data.get( + 'supported_languages', + existing_dict.get('supported_languages', ['en']), + ) + default_language = update_data.get( + 'default_language', existing_dict.get('default_language', 'en') + ) + await self._generate_and_upload_welcome_audio( - new_welcome_message, tts_config_id, agent_id + welcome_message, + tts_config_id, + agent_id, + supported_languages, + default_language, ) except Exception as e: logger.error(f'Failed to regenerate welcome audio: {str(e)}') @@ -406,6 +620,16 @@ async def update_agent(self, agent_id: UUID, **update_data) -> Optional[dict]: # Invalidate cache in call_processing await invalidate_call_processing_cache('voice_agent', agent_id, 'update') + # Invalidate inbound number cache if numbers changed + if old_inbound_numbers != new_inbound_numbers: + # Invalidate old numbers + for number in old_inbound_numbers: + if number not in new_inbound_numbers: + await self._invalidate_inbound_number_cache(number) + # Invalidate new numbers + for number in new_inbound_numbers: + await self._invalidate_inbound_number_cache(number) + logger.info(f'Successfully updated voice agent: {agent_id}') return updated_agent.to_dict() @@ -488,3 +712,45 @@ async def delete_agent(self, agent_id: UUID) -> bool: logger.info(f'Successfully deleted voice agent: {agent_id}') return True + + async def _invalidate_inbound_number_cache(self, phone_number: str): + """Invalidate cache for inbound number lookup in call_processing""" + # Invalidate local cache + cache_key = f'inbound_number:{phone_number}' + self.cache_manager.remove(cache_key) + + # Also invalidate in call_processing + await invalidate_call_processing_cache('inbound_number', phone_number, 'update') + + async def get_agent_by_inbound_number(self, phone_number: str) -> Optional[dict]: + """ + Get voice agent by inbound phone number (with caching). + + Args: + phone_number: E.164 formatted phone number + + Returns: + Voice agent dict or None if not found + """ + cache_key = f'inbound_number:{phone_number}' + + # Try cache first + cached_agent_id = self.cache_manager.get_str(cache_key) + if cached_agent_id: + logger.info(f'Cache hit for inbound number: {phone_number}') + return await self.get_agent(UUID(cached_agent_id)) + + # Cache miss - query database + logger.info(f'Cache miss - fetching agent by inbound number: {phone_number}') + all_agents = await self.voice_agent_repository.find(is_deleted=False) + + for agent in all_agents: + agent_dict = agent.to_dict() + if phone_number in agent_dict.get('inbound_numbers', []): + # Cache the mapping + self.cache_manager.add( + cache_key, str(agent.id), expiry=self.voice_agent_cache_time + ) + return agent_dict + + return None diff --git a/wavefront/server/modules/voice_agents_module/voice_agents_module/utils/cache_invalidation.py b/wavefront/server/modules/voice_agents_module/voice_agents_module/utils/cache_invalidation.py index 099a91c4..42723197 100644 --- a/wavefront/server/modules/voice_agents_module/voice_agents_module/utils/cache_invalidation.py +++ b/wavefront/server/modules/voice_agents_module/voice_agents_module/utils/cache_invalidation.py @@ -8,15 +8,15 @@ async def invalidate_call_processing_cache( config_type: str, - config_id: UUID, + config_id: UUID | str, operation: str = 'update', ) -> bool: """ Invalidate cache in call_processing app Args: - config_type: Type of config (voice_agent, tts_config, stt_config, telephony_config) - config_id: UUID of the config + config_type: Type of config (voice_agent, tts_config, stt_config, telephony_config, inbound_number) + config_id: UUID of the config or string identifier (e.g., phone number for inbound_number type) operation: Operation type (create, update, or delete) Returns: @@ -38,7 +38,9 @@ async def invalidate_call_processing_cache( 'Content-Type': 'application/json', 'X-Passthrough': passthrough_secret, } - payload = {'config_type': config_type, 'config_id': str(config_id)} + # Convert UUID to string, keep strings as-is + resource_id = str(config_id) if isinstance(config_id, UUID) else config_id + payload = {'config_type': config_type, 'config_id': resource_id} try: async with httpx.AsyncClient(timeout=10.0) as client: diff --git a/wavefront/server/modules/voice_agents_module/voice_agents_module/utils/cache_utils.py b/wavefront/server/modules/voice_agents_module/voice_agents_module/utils/cache_utils.py index 688f23a6..6a3dbef1 100644 --- a/wavefront/server/modules/voice_agents_module/voice_agents_module/utils/cache_utils.py +++ b/wavefront/server/modules/voice_agents_module/voice_agents_module/utils/cache_utils.py @@ -44,3 +44,8 @@ def get_voice_agents_list_cache_key() -> str: def get_welcome_message_url_cache_key(agent_id: UUID) -> str: """Generate cache key for a voice agent's welcome message presigned URL""" return f'voice_agent_welcome_url:{agent_id}' + + +def get_inbound_number_cache_key(phone_number: str) -> str: + """Generate cache key for inbound number to agent ID mapping""" + return f'inbound_number:{phone_number}' diff --git a/wavefront/server/modules/voice_agents_module/voice_agents_module/utils/language_validation.py b/wavefront/server/modules/voice_agents_module/voice_agents_module/utils/language_validation.py new file mode 100644 index 00000000..fe509d48 --- /dev/null +++ b/wavefront/server/modules/voice_agents_module/voice_agents_module/utils/language_validation.py @@ -0,0 +1,337 @@ +"""Language validation utilities for voice agents""" + +from typing import List, Dict, Set, Optional, Tuple + +# Language code to human-readable name mapping +LANGUAGE_NAMES: Dict[str, str] = { + 'ar': 'Arabic', + 'bg': 'Bulgarian', + 'bn': 'Bengali', + 'cs': 'Czech', + 'da': 'Danish', + 'de': 'German', + 'el': 'Greek', + 'en': 'English', + 'es': 'Spanish', + 'fi': 'Finnish', + 'fil': 'Filipino', + 'fr': 'French', + 'gu': 'Gujarati', + 'he': 'Hebrew', + 'hi': 'Hindi', + 'hr': 'Croatian', + 'hu': 'Hungarian', + 'id': 'Indonesian', + 'it': 'Italian', + 'ja': 'Japanese', + 'ka': 'Georgian', + 'kn': 'Kannada', + 'ko': 'Korean', + 'ml': 'Malayalam', + 'mr': 'Marathi', + 'ms': 'Malay', + 'nl': 'Dutch', + 'no': 'Norwegian', + 'pa': 'Punjabi', + 'pl': 'Polish', + 'pt': 'Portuguese', + 'ro': 'Romanian', + 'ru': 'Russian', + 'sk': 'Slovak', + 'sv': 'Swedish', + 'ta': 'Tamil', + 'te': 'Telugu', + 'th': 'Thai', + 'tl': 'Tagalog', + 'tr': 'Turkish', + 'uk': 'Ukrainian', + 'vi': 'Vietnamese', + 'zh': 'Chinese', +} + +# Provider language support (extracted from pipecat language mappings) +ELEVENLABS_LANGUAGES: Set[str] = { + 'ar', + 'bg', + 'cs', + 'da', + 'de', + 'el', + 'en', + 'es', + 'fi', + 'fil', + 'fr', + 'hi', + 'hr', + 'hu', + 'id', + 'it', + 'ja', + 'ko', + 'ms', + 'nl', + 'no', + 'pl', + 'pt', + 'ro', + 'ru', + 'sk', + 'sv', + 'ta', + 'tr', + 'uk', + 'vi', + 'zh', +} + +CARTESIA_LANGUAGES: Set[str] = { + 'ar', + 'bg', + 'bn', + 'cs', + 'da', + 'de', + 'en', + 'el', + 'es', + 'fi', + 'fr', + 'gu', + 'he', + 'hi', + 'hr', + 'hu', + 'id', + 'it', + 'ja', + 'ka', + 'kn', + 'ko', + 'ml', + 'mr', + 'ms', + 'nl', + 'no', + 'pa', + 'pl', + 'pt', + 'ro', + 'ru', + 'sk', + 'sv', + 'ta', + 'te', + 'th', + 'tl', + 'tr', + 'uk', + 'vi', + 'zh', +} + +# Deepgram STT supports 40+ languages +DEEPGRAM_STT_LANGUAGES: Set[str] = { + 'ar', + 'bg', + 'ca', + 'cs', + 'da', + 'de', + 'el', + 'en', + 'es', + 'et', + 'fi', + 'fr', + 'hi', + 'hu', + 'id', + 'it', + 'ja', + 'ko', + 'lt', + 'lv', + 'ms', + 'nl', + 'no', + 'pl', + 'pt', + 'ro', + 'ru', + 'sk', + 'sv', + 'ta', + 'te', + 'th', + 'tr', + 'uk', + 'vi', + 'zh', +} + + +def get_tts_supported_languages(provider: str) -> Set[str]: + """ + Get supported languages for a TTS provider. + + Args: + provider: TTS provider name (elevenlabs, cartesia, deepgram, etc.) + + Returns: + Set of supported language codes + + Raises: + ValueError: If provider is unknown + """ + provider = provider.lower() + + if provider == 'elevenlabs': + return ELEVENLABS_LANGUAGES + elif provider == 'cartesia': + return CARTESIA_LANGUAGES + elif provider == 'deepgram': + # Deepgram TTS: language is implicit in voice_id, no explicit language param + # Return empty set to indicate validation should be skipped + return set() + elif provider in ['azure', 'google', 'aws']: + # For providers not yet fully implemented, skip validation + return set() + else: + raise ValueError(f'Unknown TTS provider: {provider}') + + +def get_stt_supported_languages(provider: str) -> Set[str]: + """ + Get supported languages for an STT provider. + + Args: + provider: STT provider name (deepgram, assemblyai, whisper, etc.) + + Returns: + Set of supported language codes + + Raises: + ValueError: If provider is unknown + """ + provider = provider.lower() + + if provider == 'deepgram': + return DEEPGRAM_STT_LANGUAGES + elif provider in ['assemblyai', 'whisper', 'google', 'azure']: + # For providers not yet fully implemented, skip validation + return set() + else: + raise ValueError(f'Unknown STT provider: {provider}') + + +def validate_languages_for_configs( + supported_languages: List[str], tts_provider: str, stt_provider: str +) -> Tuple[bool, Optional[str]]: + """ + Validate that all supported languages are available in both TTS and STT providers. + + Args: + supported_languages: List of language codes to validate + tts_provider: TTS provider name + stt_provider: STT provider name + + Returns: + Tuple of (is_valid, error_message). error_message is None if valid. + """ + if not supported_languages: + return False, 'supported_languages cannot be empty' + + # Get provider capabilities + try: + tts_langs = get_tts_supported_languages(tts_provider) + stt_langs = get_stt_supported_languages(stt_provider) + except ValueError as e: + return False, str(e) + + # Skip TTS validation if provider doesn't support explicit language param + if not tts_langs: + tts_langs = set(supported_languages) # Assume all are valid + + # Skip STT validation if not implemented + if not stt_langs: + stt_langs = set(supported_languages) # Assume all are valid + + # Validate each language + unsupported_tts = [] + unsupported_stt = [] + + for lang in supported_languages: + if lang not in tts_langs: + unsupported_tts.append(lang) + if lang not in stt_langs: + unsupported_stt.append(lang) + + if unsupported_tts or unsupported_stt: + errors = [] + if unsupported_tts: + errors.append( + f"TTS provider '{tts_provider}' does not support: {', '.join(unsupported_tts)}" + ) + if unsupported_stt: + errors.append( + f"STT provider '{stt_provider}' does not support: {', '.join(unsupported_stt)}" + ) + return False, '; '.join(errors) + + return True, None + + +def validate_default_language( + default_language: str, supported_languages: List[str] +) -> Tuple[bool, Optional[str]]: + """ + Validate that default_language is in supported_languages. + + Args: + default_language: Default language code + supported_languages: List of supported language codes + + Returns: + Tuple of (is_valid, error_message). error_message is None if valid. + """ + if default_language not in supported_languages: + return ( + False, + f"default_language '{default_language}' must be in supported_languages", + ) + return True, None + + +def get_language_names(language_codes: List[str]) -> List[str]: + """ + Convert language codes to human-readable names. + + Args: + language_codes: List of language codes (e.g., ['en', 'es', 'hi']) + + Returns: + List of language names (e.g., ['English', 'Spanish', 'Hindi']) + """ + return [LANGUAGE_NAMES.get(code, code) for code in language_codes] + + +def format_language_prompt(supported_languages: List[str]) -> str: + """ + Format language list for welcome audio prompt. + + Args: + supported_languages: List of language codes + + Returns: + Formatted string for audio prompt (e.g., "English, Spanish, or Hindi") + """ + names = get_language_names(supported_languages) + + if len(names) == 1: + return names[0] + elif len(names) == 2: + return f'{names[0]} or {names[1]}' + else: + # Oxford comma style: "English, Spanish, or Hindi" + return ', '.join(names[:-1]) + f', or {names[-1]}' diff --git a/wavefront/server/modules/voice_agents_module/voice_agents_module/utils/phone_validation.py b/wavefront/server/modules/voice_agents_module/voice_agents_module/utils/phone_validation.py new file mode 100644 index 00000000..40684dc8 --- /dev/null +++ b/wavefront/server/modules/voice_agents_module/voice_agents_module/utils/phone_validation.py @@ -0,0 +1,67 @@ +"""Phone number validation utilities""" + +import re +from typing import List, Tuple, Optional + +# E.164 phone number format: +[country code][subscriber number] +# Total length: 1-15 digits (including country code) +# Must start with + followed by country code (1-3 digits, cannot start with 0) +E164_PATTERN = re.compile(r'^\+[1-9]\d{1,14}$') + + +def validate_e164_format(phone_number: str) -> bool: + """ + Validate that a phone number is in E.164 format. + + E.164 format: + - Starts with + + - Followed by country code (1-3 digits, cannot start with 0) + - Followed by subscriber number + - Total length: 1-15 digits (excluding +) + - Examples: +12025551234 (US), +442071838750 (UK), +919876543210 (India) + + Args: + phone_number: Phone number string to validate + + Returns: + True if valid E.164 format, False otherwise + """ + return bool(E164_PATTERN.match(phone_number)) + + +def validate_phone_numbers( + phone_numbers: List[str], field_name: str = 'phone_numbers' +) -> Tuple[bool, Optional[str]]: + """ + Validate that all phone numbers in a list are in E.164 format. + + Args: + phone_numbers: List of phone numbers to validate + field_name: Name of the field for error messages + + Returns: + Tuple of (is_valid, error_message). error_message is None if valid. + """ + if not isinstance(phone_numbers, list): + return False, f'{field_name} must be a list' + + if not phone_numbers: + # Empty list is valid (agent might not have inbound/outbound numbers yet) + return True, None + + invalid_numbers = [] + for number in phone_numbers: + if not isinstance(number, str): + invalid_numbers.append(str(number)) + elif not validate_e164_format(number): + invalid_numbers.append(number) + + if invalid_numbers: + return False, ( + f"{field_name} contains invalid E.164 format numbers: " + f"{', '.join(invalid_numbers)}. " + f"Format must be +[country][number] with 1-15 digits total, " + f"e.g., +12025551234 (US), +442071838750 (UK), +919876543210 (India)" + ) + + return True, None From d449663efb01d5fae126abc0912a3ed4ea7adb25 Mon Sep 17 00:00:00 2001 From: rootflo-hardik Date: Fri, 9 Jan 2026 11:53:40 +0530 Subject: [PATCH 2/8] fe changes - inbound support, enhancements --- wavefront/client/src/constants/languages.ts | 65 +++++++ .../voice-agents/CreateVoiceAgentDialog.tsx | 168 ++++++++++++++++++ .../voice-agents/EditVoiceAgentDialog.tsx | 168 ++++++++++++++++++ .../voice-agents/OutboundCallDialog.tsx | 9 +- .../pages/apps/[appId]/voice-agents/index.tsx | 19 +- .../CreateTelephonyConfigDialog.tsx | 90 ---------- .../EditTelephonyConfigDialog.tsx | 89 ---------- .../voice-agents/telephony-configs/index.tsx | 7 +- .../client/src/types/telephony-config.ts | 3 - wavefront/client/src/types/voice-agent.ts | 12 ++ 10 files changed, 435 insertions(+), 195 deletions(-) create mode 100644 wavefront/client/src/constants/languages.ts diff --git a/wavefront/client/src/constants/languages.ts b/wavefront/client/src/constants/languages.ts new file mode 100644 index 00000000..8de0af4f --- /dev/null +++ b/wavefront/client/src/constants/languages.ts @@ -0,0 +1,65 @@ +/** + * Language constants for voice agent multi-language support + * Matches backend language validation + */ + +export interface LanguageOption { + code: string; + name: string; + nativeName?: string; +} + +export const SUPPORTED_LANGUAGES: LanguageOption[] = [ + { code: 'en', name: 'English' }, + { code: 'es', name: 'Spanish', nativeName: 'Español' }, + { code: 'hi', name: 'Hindi', nativeName: 'हिंदी' }, + { code: 'te', name: 'Telugu', nativeName: 'తెలుగు' }, + { code: 'ta', name: 'Tamil', nativeName: 'தமிழ்' }, + { code: 'kn', name: 'Kannada', nativeName: 'ಕನ್ನಡ' }, + { code: 'ml', name: 'Malayalam', nativeName: 'മലയാളം' }, + { code: 'mr', name: 'Marathi', nativeName: 'मराठी' }, + { code: 'gu', name: 'Gujarati', nativeName: 'ગુજરાતી' }, + { code: 'pa', name: 'Punjabi', nativeName: 'ਪੰਜਾਬੀ' }, + { code: 'bn', name: 'Bengali', nativeName: 'বাংলা' }, + { code: 'zh', name: 'Chinese', nativeName: '中文' }, + { code: 'ja', name: 'Japanese', nativeName: '日本語' }, + { code: 'ko', name: 'Korean', nativeName: '한국어' }, + { code: 'fr', name: 'French', nativeName: 'Français' }, + { code: 'de', name: 'German', nativeName: 'Deutsch' }, + { code: 'it', name: 'Italian', nativeName: 'Italiano' }, + { code: 'pt', name: 'Portuguese', nativeName: 'Português' }, + { code: 'ru', name: 'Russian', nativeName: 'Русский' }, + { code: 'ar', name: 'Arabic', nativeName: 'العربية' }, + { code: 'tr', name: 'Turkish', nativeName: 'Türkçe' }, + { code: 'vi', name: 'Vietnamese', nativeName: 'Tiếng Việt' }, + { code: 'th', name: 'Thai', nativeName: 'ภาษาไทย' }, + { code: 'id', name: 'Indonesian', nativeName: 'Bahasa Indonesia' }, + { code: 'ms', name: 'Malay', nativeName: 'Bahasa Melayu' }, + { code: 'nl', name: 'Dutch', nativeName: 'Nederlands' }, + { code: 'pl', name: 'Polish', nativeName: 'Polski' }, + { code: 'uk', name: 'Ukrainian', nativeName: 'Українська' }, + { code: 'ro', name: 'Romanian', nativeName: 'Română' }, + { code: 'cs', name: 'Czech', nativeName: 'Čeština' }, + { code: 'sv', name: 'Swedish', nativeName: 'Svenska' }, + { code: 'da', name: 'Danish', nativeName: 'Dansk' }, + { code: 'no', name: 'Norwegian', nativeName: 'Norsk' }, + { code: 'fi', name: 'Finnish', nativeName: 'Suomi' }, + { code: 'el', name: 'Greek', nativeName: 'Ελληνικά' }, + { code: 'he', name: 'Hebrew', nativeName: 'עברית' }, + { code: 'hu', name: 'Hungarian', nativeName: 'Magyar' }, + { code: 'sk', name: 'Slovak', nativeName: 'Slovenčina' }, + { code: 'bg', name: 'Bulgarian', nativeName: 'Български' }, + { code: 'hr', name: 'Croatian', nativeName: 'Hrvatski' }, + { code: 'fil', name: 'Filipino' }, +]; + +export const getLanguageName = (code: string): string => { + const lang = SUPPORTED_LANGUAGES.find((l) => l.code === code); + return lang ? lang.name : code; +}; + +export const getLanguageDisplayName = (code: string): string => { + const lang = SUPPORTED_LANGUAGES.find((l) => l.code === code); + if (!lang) return code; + return lang.nativeName ? `${lang.name} (${lang.nativeName})` : lang.name; +}; diff --git a/wavefront/client/src/pages/apps/[appId]/voice-agents/CreateVoiceAgentDialog.tsx b/wavefront/client/src/pages/apps/[appId]/voice-agents/CreateVoiceAgentDialog.tsx index a7d91e68..c3d7f7fc 100644 --- a/wavefront/client/src/pages/apps/[appId]/voice-agents/CreateVoiceAgentDialog.tsx +++ b/wavefront/client/src/pages/apps/[appId]/voice-agents/CreateVoiceAgentDialog.tsx @@ -19,6 +19,7 @@ import { } from '@app/components/ui/form'; import { Input } from '@app/components/ui/input'; import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from '@app/components/ui/select'; +import { Checkbox } from '@app/components/ui/checkbox'; import { useGetLLMConfigs, useGetSttConfigs, @@ -28,6 +29,7 @@ import { import { extractErrorMessage } from '@app/lib/utils'; import { useDashboardStore, useNotifyStore } from '@app/store'; import { CreateVoiceAgentRequest } from '@app/types/voice-agent'; +import { SUPPORTED_LANGUAGES, getLanguageDisplayName } from '@app/constants/languages'; import { zodResolver } from '@hookform/resolvers/zod'; import { langs } from '@uiw/codemirror-extensions-langs'; import CodeMirror from '@uiw/react-codemirror'; @@ -36,6 +38,8 @@ import { useForm } from 'react-hook-form'; import { useNavigate } from 'react-router'; import { z } from 'zod'; +const E164_REGEX = /^\+[1-9]\d{1,14}$/; + const createVoiceAgentSchema = z.object({ name: z.string().min(1, 'Name is required').max(100, 'Name must be 100 characters or less'), description: z.string().max(500, 'Description must be 500 characters or less').optional(), @@ -47,6 +51,10 @@ const createVoiceAgentSchema = z.object({ welcome_message: z.string().min(1, 'Welcome message is required'), conversation_config: z.string().optional(), status: z.enum(['active', 'inactive']), + inbound_numbers: z.string().optional(), + outbound_numbers: z.string().optional(), + supported_languages: z.array(z.string()).min(1, 'At least one language is required'), + default_language: z.string().min(1, 'Default language is required'), }); type CreateVoiceAgentInput = z.infer; @@ -83,6 +91,10 @@ const CreateVoiceAgentDialog: React.FC = ({ isOpen, welcome_message: '', conversation_config: '{}', status: 'inactive', + inbound_numbers: '', + outbound_numbers: '', + supported_languages: ['en'], + default_language: 'en', }, }); @@ -100,6 +112,10 @@ const CreateVoiceAgentDialog: React.FC = ({ isOpen, welcome_message: '', conversation_config: '{}', status: 'inactive', + inbound_numbers: '', + outbound_numbers: '', + supported_languages: ['en'], + default_language: 'en', }); } }, [isOpen, form]); @@ -116,6 +132,38 @@ const CreateVoiceAgentDialog: React.FC = ({ isOpen, } } + // Parse phone numbers (comma-separated) + const parsePhoneNumbers = (input: string): string[] => { + if (!input.trim()) return []; + return input + .split(',') + .map((num) => num.trim()) + .filter((num) => num); + }; + + const inboundNumbers = parsePhoneNumbers(data.inbound_numbers || ''); + const outboundNumbers = parsePhoneNumbers(data.outbound_numbers || ''); + + // Validate E.164 format + const invalidInbound = inboundNumbers.filter((num) => !E164_REGEX.test(num)); + const invalidOutbound = outboundNumbers.filter((num) => !E164_REGEX.test(num)); + + if (invalidInbound.length > 0) { + notifyError(`Invalid inbound phone numbers (must be E.164 format): ${invalidInbound.join(', ')}`); + return; + } + + if (invalidOutbound.length > 0) { + notifyError(`Invalid outbound phone numbers (must be E.164 format): ${invalidOutbound.join(', ')}`); + return; + } + + // Validate default language is in supported languages + if (!data.supported_languages.includes(data.default_language)) { + notifyError('Default language must be one of the supported languages'); + return; + } + setCreating(true); try { const requestData: CreateVoiceAgentRequest = { @@ -129,6 +177,10 @@ const CreateVoiceAgentDialog: React.FC = ({ isOpen, welcome_message: data.welcome_message.trim(), conversation_config: conversationConfig, status: data.status, + inbound_numbers: inboundNumbers.length > 0 ? inboundNumbers : undefined, + outbound_numbers: outboundNumbers.length > 0 ? outboundNumbers : undefined, + supported_languages: data.supported_languages, + default_language: data.default_language, }; const response = await floConsoleService.voiceAgentService.createVoiceAgent(requestData); @@ -368,6 +420,122 @@ const CreateVoiceAgentDialog: React.FC = ({ isOpen, + {/* Phone Numbers */} +
+

Phone Numbers

+
+ ( + + Inbound Phone Numbers + + + + + Phone numbers for receiving inbound calls (E.164 format, comma-separated, globally unique) + + + + )} + /> + + ( + + Outbound Phone Numbers + + + + + Phone numbers for making outbound calls (E.164 format, comma-separated) + + + + )} + /> +
+
+ + {/* Language Configuration */} +
+

Language Configuration

+ ( + + + Supported Languages* + +
+
+ {SUPPORTED_LANGUAGES.map((lang) => ( +
+ { + const current = field.value || []; + if (checked) { + field.onChange([...current, lang.code]); + } else { + field.onChange(current.filter((l) => l !== lang.code)); + } + }} + /> + +
+ ))} +
+
+ + Select languages this agent can converse in. If multiple languages are selected, the agent will + detect the caller's language. + + +
+ )} + /> + + ( + + + Default Language* + + + + Language used if detection fails or for single-language agents. Must be one of the supported + languages. + + + + )} + /> +
+ {/* Behavior */}

Behavior

diff --git a/wavefront/client/src/pages/apps/[appId]/voice-agents/EditVoiceAgentDialog.tsx b/wavefront/client/src/pages/apps/[appId]/voice-agents/EditVoiceAgentDialog.tsx index 245d7b59..56c1846d 100644 --- a/wavefront/client/src/pages/apps/[appId]/voice-agents/EditVoiceAgentDialog.tsx +++ b/wavefront/client/src/pages/apps/[appId]/voice-agents/EditVoiceAgentDialog.tsx @@ -19,6 +19,7 @@ import { } from '@app/components/ui/form'; import { Input } from '@app/components/ui/input'; import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from '@app/components/ui/select'; +import { Checkbox } from '@app/components/ui/checkbox'; import { useGetLLMConfigs, useGetSttConfigs, @@ -28,6 +29,7 @@ import { import { extractErrorMessage } from '@app/lib/utils'; import { useDashboardStore, useNotifyStore } from '@app/store'; import { UpdateVoiceAgentRequest, VoiceAgent } from '@app/types/voice-agent'; +import { SUPPORTED_LANGUAGES, getLanguageDisplayName } from '@app/constants/languages'; import { zodResolver } from '@hookform/resolvers/zod'; import { langs } from '@uiw/codemirror-extensions-langs'; import CodeMirror from '@uiw/react-codemirror'; @@ -35,6 +37,8 @@ import React, { useEffect, useState } from 'react'; import { useForm } from 'react-hook-form'; import { z } from 'zod'; +const E164_REGEX = /^\+[1-9]\d{1,14}$/; + const updateVoiceAgentSchema = z.object({ name: z.string().min(1, 'Name is required').max(100, 'Name must be 100 characters or less'), description: z.string().max(500, 'Description must be 500 characters or less').optional(), @@ -46,6 +50,10 @@ const updateVoiceAgentSchema = z.object({ welcome_message: z.string().min(1, 'Welcome message is required'), conversation_config: z.string().optional(), status: z.enum(['active', 'inactive']), + inbound_numbers: z.string().optional(), + outbound_numbers: z.string().optional(), + supported_languages: z.array(z.string()).min(1, 'At least one language is required'), + default_language: z.string().min(1, 'Default language is required'), }); type UpdateVoiceAgentInput = z.infer; @@ -88,6 +96,10 @@ const EditVoiceAgentDialog: React.FC = ({ welcome_message: agent.welcome_message, conversation_config: agent.conversation_config ? JSON.stringify(agent.conversation_config, null, 2) : '{}', status: agent.status, + inbound_numbers: agent.inbound_numbers?.join(', ') || '', + outbound_numbers: agent.outbound_numbers?.join(', ') || '', + supported_languages: agent.supported_languages || ['en'], + default_language: agent.default_language || 'en', }, }); @@ -105,6 +117,10 @@ const EditVoiceAgentDialog: React.FC = ({ welcome_message: agent.welcome_message, conversation_config: agent.conversation_config ? JSON.stringify(agent.conversation_config, null, 2) : '{}', status: agent.status, + inbound_numbers: agent.inbound_numbers?.join(', ') || '', + outbound_numbers: agent.outbound_numbers?.join(', ') || '', + supported_languages: agent.supported_languages || ['en'], + default_language: agent.default_language || 'en', }); } }, [isOpen, agent, form]); @@ -121,6 +137,38 @@ const EditVoiceAgentDialog: React.FC = ({ } } + // Parse phone numbers (comma-separated) + const parsePhoneNumbers = (input: string): string[] => { + if (!input.trim()) return []; + return input + .split(',') + .map((num) => num.trim()) + .filter((num) => num); + }; + + const inboundNumbers = parsePhoneNumbers(data.inbound_numbers || ''); + const outboundNumbers = parsePhoneNumbers(data.outbound_numbers || ''); + + // Validate E.164 format + const invalidInbound = inboundNumbers.filter((num) => !E164_REGEX.test(num)); + const invalidOutbound = outboundNumbers.filter((num) => !E164_REGEX.test(num)); + + if (invalidInbound.length > 0) { + notifyError(`Invalid inbound phone numbers (must be E.164 format): ${invalidInbound.join(', ')}`); + return; + } + + if (invalidOutbound.length > 0) { + notifyError(`Invalid outbound phone numbers (must be E.164 format): ${invalidOutbound.join(', ')}`); + return; + } + + // Validate default language is in supported languages + if (!data.supported_languages.includes(data.default_language)) { + notifyError('Default language must be one of the supported languages'); + return; + } + setUpdating(true); try { const requestData: UpdateVoiceAgentRequest = { @@ -134,6 +182,10 @@ const EditVoiceAgentDialog: React.FC = ({ welcome_message: data.welcome_message.trim(), conversation_config: conversationConfig, status: data.status, + inbound_numbers: inboundNumbers, + outbound_numbers: outboundNumbers, + supported_languages: data.supported_languages, + default_language: data.default_language, }; await floConsoleService.voiceAgentService.updateVoiceAgent(agent.id, requestData); @@ -366,6 +418,122 @@ const EditVoiceAgentDialog: React.FC = ({
+ {/* Phone Numbers */} +
+

Phone Numbers

+
+ ( + + Inbound Phone Numbers + + + + + Phone numbers for receiving inbound calls (E.164 format, comma-separated, globally unique) + + + + )} + /> + + ( + + Outbound Phone Numbers + + + + + Phone numbers for making outbound calls (E.164 format, comma-separated) + + + + )} + /> +
+
+ + {/* Language Configuration */} +
+

Language Configuration

+ ( + + + Supported Languages* + +
+
+ {SUPPORTED_LANGUAGES.map((lang) => ( +
+ { + const current = field.value || []; + if (checked) { + field.onChange([...current, lang.code]); + } else { + field.onChange(current.filter((l) => l !== lang.code)); + } + }} + /> + +
+ ))} +
+
+ + Select languages this agent can converse in. If multiple languages are selected, the agent will + detect the caller's language. + + +
+ )} + /> + + ( + + + Default Language* + + + + Language used if detection fails or for single-language agents. Must be one of the supported + languages. + + + + )} + /> +
+ {/* Behavior */}

Behavior

diff --git a/wavefront/client/src/pages/apps/[appId]/voice-agents/OutboundCallDialog.tsx b/wavefront/client/src/pages/apps/[appId]/voice-agents/OutboundCallDialog.tsx index 1cf00cc2..dee80587 100644 --- a/wavefront/client/src/pages/apps/[appId]/voice-agents/OutboundCallDialog.tsx +++ b/wavefront/client/src/pages/apps/[appId]/voice-agents/OutboundCallDialog.tsx @@ -11,7 +11,6 @@ import { import { Input } from '@app/components/ui/input'; import { Label } from '@app/components/ui/label'; import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from '@app/components/ui/select'; -import { useGetTelephonyConfig } from '@app/hooks/data/fetch-hooks'; import { extractErrorMessage } from '@app/lib/utils'; import { useNotifyStore } from '@app/store'; import { VoiceAgent } from '@app/types/voice-agent'; @@ -25,19 +24,17 @@ interface CallInfo { interface OutboundCallDialogProps { isOpen: boolean; onOpenChange: (open: boolean) => void; - appId: string; agent: VoiceAgent; } -const OutboundCallDialog: React.FC = ({ isOpen, onOpenChange, appId, agent }) => { +const OutboundCallDialog: React.FC = ({ isOpen, onOpenChange, agent }) => { const { notifySuccess, notifyError } = useNotifyStore(); const [toNumber, setToNumber] = useState(''); const [fromNumber, setFromNumber] = useState(''); const [callLoading, setCallLoading] = useState(false); - // Fetch the specific telephony config to get phone numbers for call initiation - const { data: currentTelephonyConfig } = useGetTelephonyConfig(appId, agent.telephony_config_id); - const availablePhoneNumbers = currentTelephonyConfig?.phone_numbers || []; + // Get outbound phone numbers from the voice agent (not telephony config) + const availablePhoneNumbers = agent.outbound_numbers || []; // E.164 phone number validation const isValidE164PhoneNumber = (phoneNumber: string): boolean => { diff --git a/wavefront/client/src/pages/apps/[appId]/voice-agents/index.tsx b/wavefront/client/src/pages/apps/[appId]/voice-agents/index.tsx index a37858ed..64b44495 100644 --- a/wavefront/client/src/pages/apps/[appId]/voice-agents/index.tsx +++ b/wavefront/client/src/pages/apps/[appId]/voice-agents/index.tsx @@ -9,6 +9,7 @@ import { getVoiceAgentsKey } from '@app/hooks/data/query-keys'; import { extractErrorMessage } from '@app/lib/utils'; import { useNotifyStore } from '@app/store'; import { VoiceAgent } from '@app/types/voice-agent'; +import { getLanguageName } from '@app/constants/languages'; import { useQueryClient } from '@tanstack/react-query'; import { Pencil, Phone, Trash2 } from 'lucide-react'; import React, { useState } from 'react'; @@ -133,6 +134,9 @@ const VoiceAgentsPage: React.FC = () => { Name Description + Inbound #s + Outbound #s + Languages Status Created Actions @@ -143,6 +147,20 @@ const VoiceAgentsPage: React.FC = () => { {agent.name} {agent.description || '-'} + + {agent.inbound_numbers?.length || 0} + + + {agent.outbound_numbers?.length || 0} + + + + {agent.supported_languages?.length || 1} ({agent.default_language || 'en'}) + + { !open && setCallItem(null)} - appId={app} agent={callItem} /> )} diff --git a/wavefront/client/src/pages/apps/[appId]/voice-agents/telephony-configs/CreateTelephonyConfigDialog.tsx b/wavefront/client/src/pages/apps/[appId]/voice-agents/telephony-configs/CreateTelephonyConfigDialog.tsx index 95a3424a..34120d76 100644 --- a/wavefront/client/src/pages/apps/[appId]/voice-agents/telephony-configs/CreateTelephonyConfigDialog.tsx +++ b/wavefront/client/src/pages/apps/[appId]/voice-agents/telephony-configs/CreateTelephonyConfigDialog.tsx @@ -18,14 +18,12 @@ import { FormMessage, } from '@app/components/ui/form'; import { Input } from '@app/components/ui/input'; -import { Label } from '@app/components/ui/label'; import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from '@app/components/ui/select'; import { Textarea } from '@app/components/ui/textarea'; import { getConnectionTypeOptions, getTelephonyProviderConfig, getTelephonyProviderOptions, - isValidE164PhoneNumber, requiresSipConfig, } from '@app/config/telephony-providers'; import { extractErrorMessage } from '@app/lib/utils'; @@ -63,7 +61,6 @@ const CreateTelephonyConfigDialog: React.FC = }) => { const { notifySuccess, notifyError } = useNotifyStore(); - const [phoneNumbers, setPhoneNumbers] = useState(['']); const [loading, setLoading] = useState(false); const form = useForm({ @@ -107,53 +104,10 @@ const CreateTelephonyConfigDialog: React.FC = sip_port: undefined, sip_transport: undefined, }); - setPhoneNumbers(['']); } }, [isOpen, form]); - const handleAddPhoneNumber = () => { - setPhoneNumbers([...phoneNumbers, '']); - }; - - const handleRemovePhoneNumber = (index: number) => { - if (phoneNumbers.length === 1) { - notifyError('At least one phone number is required'); - return; - } - setPhoneNumbers(phoneNumbers.filter((_, i) => i !== index)); - }; - - const handlePhoneNumberChange = (index: number, value: string) => { - const newPhoneNumbers = [...phoneNumbers]; - newPhoneNumbers[index] = value; - setPhoneNumbers(newPhoneNumbers); - }; - - const validatePhoneNumbers = (): boolean => { - const filledPhoneNumbers = phoneNumbers.filter((p) => p.trim()); - - if (filledPhoneNumbers.length === 0) { - notifyError('At least one phone number is required'); - return false; - } - - for (const phone of filledPhoneNumbers) { - if (!isValidE164PhoneNumber(phone.trim())) { - notifyError( - `Invalid phone number format: ${phone}. Phone numbers must be in E.164 format (e.g., +14155551234)` - ); - return false; - } - } - - return true; - }; - const onSubmit = async (data: CreateTelephonyConfigInput) => { - if (!validatePhoneNumbers()) { - return; - } - // Validate SIP config if required if (requiresSipConfig(data.provider as TelephonyProvider, data.connection_type)) { if (!data.sip_domain?.trim()) { @@ -162,8 +116,6 @@ const CreateTelephonyConfigDialog: React.FC = } } - const filledPhoneNumbers = phoneNumbers.filter((p) => p.trim()).map((p) => p.trim()); - setLoading(true); try { const requestData: { @@ -172,7 +124,6 @@ const CreateTelephonyConfigDialog: React.FC = provider: TelephonyProvider; connection_type: ConnectionType; credentials: { account_sid: string; auth_token: string }; - phone_numbers: string[]; webhook_config: null; sip_config?: { sip_domain: string; port?: number; transport?: SipTransport }; } = { @@ -184,7 +135,6 @@ const CreateTelephonyConfigDialog: React.FC = account_sid: data.account_sid.trim(), auth_token: data.auth_token.trim(), }, - phone_numbers: filledPhoneNumbers, webhook_config: null, }; @@ -351,46 +301,6 @@ const CreateTelephonyConfigDialog: React.FC = />
-
-
- - -
- -
- {phoneNumbers.map((phone, index) => ( -
- handlePhoneNumberChange(index, e.target.value)} - placeholder="+14155551234" - className="flex-1" - /> - -
- ))} -
- -

- Phone numbers must be in E.164 format (e.g., +14155551234). Include country code with + prefix. -

-
- {showSipConfig && (

SIP Configuration

diff --git a/wavefront/client/src/pages/apps/[appId]/voice-agents/telephony-configs/EditTelephonyConfigDialog.tsx b/wavefront/client/src/pages/apps/[appId]/voice-agents/telephony-configs/EditTelephonyConfigDialog.tsx index 0feb1c41..c83fbbcd 100644 --- a/wavefront/client/src/pages/apps/[appId]/voice-agents/telephony-configs/EditTelephonyConfigDialog.tsx +++ b/wavefront/client/src/pages/apps/[appId]/voice-agents/telephony-configs/EditTelephonyConfigDialog.tsx @@ -18,14 +18,12 @@ import { FormMessage, } from '@app/components/ui/form'; import { Input } from '@app/components/ui/input'; -import { Label } from '@app/components/ui/label'; import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from '@app/components/ui/select'; import { Textarea } from '@app/components/ui/textarea'; import { getConnectionTypeOptions, getTelephonyProviderConfig, getTelephonyProviderOptions, - isValidE164PhoneNumber, requiresSipConfig, } from '@app/config/telephony-providers'; import { extractErrorMessage } from '@app/lib/utils'; @@ -70,7 +68,6 @@ const EditTelephonyConfigDialog: React.FC = ({ }) => { const { notifySuccess, notifyError } = useNotifyStore(); - const [phoneNumbers, setPhoneNumbers] = useState(['']); const [loading, setLoading] = useState(false); const form = useForm({ @@ -105,7 +102,6 @@ const EditTelephonyConfigDialog: React.FC = ({ sip_port: config.sip_config?.port, sip_transport: config.sip_config?.transport, }); - setPhoneNumbers(config.phone_numbers.length > 0 ? config.phone_numbers : ['']); } }, [isOpen, config, form]); @@ -118,49 +114,7 @@ const EditTelephonyConfigDialog: React.FC = ({ } }, [watchedConnectionType, form]); - const handleAddPhoneNumber = () => { - setPhoneNumbers([...phoneNumbers, '']); - }; - - const handleRemovePhoneNumber = (index: number) => { - if (phoneNumbers.length === 1) { - notifyError('At least one phone number is required'); - return; - } - setPhoneNumbers(phoneNumbers.filter((_, i) => i !== index)); - }; - - const handlePhoneNumberChange = (index: number, value: string) => { - const newPhoneNumbers = [...phoneNumbers]; - newPhoneNumbers[index] = value; - setPhoneNumbers(newPhoneNumbers); - }; - - const validatePhoneNumbers = (): boolean => { - const filledPhoneNumbers = phoneNumbers.filter((p) => p.trim()); - - if (filledPhoneNumbers.length === 0) { - notifyError('At least one phone number is required'); - return false; - } - - for (const phone of filledPhoneNumbers) { - if (!isValidE164PhoneNumber(phone.trim())) { - notifyError( - `Invalid phone number format: ${phone}. Phone numbers must be in E.164 format (e.g., +14155551234)` - ); - return false; - } - } - - return true; - }; - const onSubmit = async (data: UpdateTelephonyConfigInput) => { - if (!validatePhoneNumbers()) { - return; - } - // Validate SIP config if required if (requiresSipConfig(data.provider as TelephonyProvider, data.connection_type)) { if (!data.sip_domain?.trim()) { @@ -169,8 +123,6 @@ const EditTelephonyConfigDialog: React.FC = ({ } } - const filledPhoneNumbers = phoneNumbers.filter((p) => p.trim()).map((p) => p.trim()); - setLoading(true); try { const updateData: UpdateTelephonyConfigRequest = { @@ -178,7 +130,6 @@ const EditTelephonyConfigDialog: React.FC = ({ description: data.description?.trim() || null, provider: data.provider as TelephonyProvider, connection_type: data.connection_type, - phone_numbers: filledPhoneNumbers, }; // Only include credentials if they were changed @@ -356,46 +307,6 @@ const EditTelephonyConfigDialog: React.FC = ({ />
-
-
- - -
- -
- {phoneNumbers.map((phone, index) => ( -
- handlePhoneNumberChange(index, e.target.value)} - placeholder="+14155551234" - className="flex-1" - /> - -
- ))} -
- -

- Phone numbers must be in E.164 format (e.g., +14155551234). Include country code with + prefix. -

-
- {showSipConfig && (

SIP Configuration

diff --git a/wavefront/client/src/pages/apps/[appId]/voice-agents/telephony-configs/index.tsx b/wavefront/client/src/pages/apps/[appId]/voice-agents/telephony-configs/index.tsx index 71bc2dc2..f4874680 100644 --- a/wavefront/client/src/pages/apps/[appId]/voice-agents/telephony-configs/index.tsx +++ b/wavefront/client/src/pages/apps/[appId]/voice-agents/telephony-configs/index.tsx @@ -81,8 +81,7 @@ const TelephonyConfigsPage: React.FC = () => { config.display_name.toLowerCase().includes(query) || (config.description && config.description.toLowerCase().includes(query)) || config.provider.toLowerCase().includes(query) || - config.connection_type.toLowerCase().includes(query) || - config.phone_numbers.some((phone) => phone.toLowerCase().includes(query)) + config.connection_type.toLowerCase().includes(query) ); }); @@ -131,7 +130,6 @@ const TelephonyConfigsPage: React.FC = () => { Display Name Provider Connection Type - Phone Numbers Description Created Actions @@ -143,9 +141,6 @@ const TelephonyConfigsPage: React.FC = () => { {config.display_name} {config.provider} {config.connection_type} - - {config.phone_numbers.length} number{config.phone_numbers.length !== 1 ? 's' : ''} - {config.description || '-'} {new Date(config.created_at).toLocaleDateString()} e.stopPropagation()}> diff --git a/wavefront/client/src/types/telephony-config.ts b/wavefront/client/src/types/telephony-config.ts index f3acea32..a33cbc55 100644 --- a/wavefront/client/src/types/telephony-config.ts +++ b/wavefront/client/src/types/telephony-config.ts @@ -35,7 +35,6 @@ export interface TelephonyConfig { provider: TelephonyProvider; connection_type: ConnectionType; credentials: TelephonyCredentials; - phone_numbers: string[]; webhook_config?: WebhookConfig | null; sip_config?: SipConfig | null; is_deleted: boolean; @@ -50,7 +49,6 @@ export interface CreateTelephonyConfigRequest { provider: TelephonyProvider; connection_type: ConnectionType; credentials: TelephonyCredentials; - phone_numbers: string[]; webhook_config?: WebhookConfig | null; sip_config?: SipConfig | null; } @@ -62,7 +60,6 @@ export interface UpdateTelephonyConfigRequest { provider?: TelephonyProvider; connection_type?: ConnectionType; credentials?: TelephonyCredentials; - phone_numbers?: string[]; webhook_config?: WebhookConfig | null; sip_config?: SipConfig | null; } diff --git a/wavefront/client/src/types/voice-agent.ts b/wavefront/client/src/types/voice-agent.ts index bca1a3bc..7a703ed1 100644 --- a/wavefront/client/src/types/voice-agent.ts +++ b/wavefront/client/src/types/voice-agent.ts @@ -15,6 +15,10 @@ export interface VoiceAgent { welcome_message: string; conversation_config: Record | null; status: 'active' | 'inactive'; + inbound_numbers: string[]; + outbound_numbers: string[]; + supported_languages: string[]; + default_language: string; is_deleted: boolean; created_at: string; updated_at: string; @@ -34,6 +38,10 @@ export interface CreateVoiceAgentRequest { welcome_message: string; conversation_config?: Record | null; status?: 'active' | 'inactive'; + inbound_numbers?: string[]; + outbound_numbers?: string[]; + supported_languages?: string[]; + default_language?: string; } /** @@ -51,6 +59,10 @@ export interface UpdateVoiceAgentRequest { welcome_message?: string; conversation_config?: Record | null; status?: 'active' | 'inactive'; + inbound_numbers?: string[]; + outbound_numbers?: string[]; + supported_languages?: string[]; + default_language?: string; } /** From d2a86284c1026b472bf3f3f2af94c0479e068979 Mon Sep 17 00:00:00 2001 From: rootflo-hardik Date: Fri, 9 Jan 2026 16:52:19 +0530 Subject: [PATCH 3/8] call_processing - inbound, language switch, enhancement --- .../constants/language_config.py | 100 +++++++ .../controllers/cache_controller.py | 49 +++- .../controllers/webhook_controller.py | 102 ++++++- .../services/floware_http_client.py | 91 ++++++ .../services/pipecat_service.py | 272 ++++++++++++++++-- .../services/voice_agent_cache_service.py | 134 +++++++++ 6 files changed, 711 insertions(+), 37 deletions(-) create mode 100644 wavefront/server/apps/call_processing/call_processing/constants/language_config.py diff --git a/wavefront/server/apps/call_processing/call_processing/constants/language_config.py b/wavefront/server/apps/call_processing/call_processing/constants/language_config.py new file mode 100644 index 00000000..563d3200 --- /dev/null +++ b/wavefront/server/apps/call_processing/call_processing/constants/language_config.py @@ -0,0 +1,100 @@ +""" +Language detection and configuration constants + +Contains keyword mappings and instructions for multi-language support +""" + +from typing import Dict, List + + +# Language keyword mappings for detection +LANGUAGE_KEYWORDS: Dict[str, List[str]] = { + 'en': ['english', 'inglés', 'inglese'], + 'es': ['spanish', 'español', 'espanol', 'castellano'], + 'hi': ['hindi', 'हिंदी', 'हिन्दी'], + 'te': ['telugu', 'తెలుగు'], + 'ta': ['tamil', 'தமிழ்'], + 'kn': ['kannada', 'ಕನ್ನಡ'], + 'ml': ['malayalam', 'മലയാളം'], + 'mr': ['marathi', 'मराठी'], + 'gu': ['gujarati', 'ગુજરાતી'], + 'pa': ['punjabi', 'ਪੰਜਾಬੀ'], + 'bn': ['bengali', 'বাংলা'], + 'zh': ['chinese', 'mandarin', '中文', '普通话'], + 'ja': ['japanese', '日本語'], + 'ko': ['korean', '한국어'], + 'fr': ['french', 'français', 'francais'], + 'de': ['german', 'deutsch'], + 'it': ['italian', 'italiano'], + 'pt': ['portuguese', 'português', 'portugues'], + 'ru': ['russian', 'русский'], + 'ar': ['arabic', 'عربي', 'العربية'], + 'tr': ['turkish', 'türkçe', 'turkce'], + 'fil': ['filipino', 'pilipino', 'tagalog'], + 'tl': ['tagalog', 'filipino'], + 'fi': ['finnish', 'suomi'], + 'cs': ['czech', 'čeština', 'cestina'], + 'da': ['danish', 'dansk'], + 'el': ['greek', 'ελληνικά', 'ellinika'], + 'he': ['hebrew', 'עברית', 'ivrit'], + 'hr': ['croatian', 'hrvatski'], + 'hu': ['hungarian', 'magyar'], + 'id': ['indonesian', 'bahasa indonesia'], + 'ka': ['georgian', 'ქართული', 'kartuli'], + 'ms': ['malay', 'bahasa melayu'], + 'nl': ['dutch', 'nederlands'], + 'no': ['norwegian', 'norsk'], + 'pl': ['polish', 'polski'], + 'ro': ['romanian', 'română', 'romana'], + 'sk': ['slovak', 'slovenčina', 'slovencina'], + 'sv': ['swedish', 'svenska'], + 'th': ['thai', 'ภาษาไทย', 'phasa thai'], + 'uk': ['ukrainian', 'українська', 'ukrainska'], + 'vi': ['vietnamese', 'tiếng việt', 'tieng viet'], +} + +# Language-specific LLM instructions +LANGUAGE_INSTRUCTIONS: Dict[str, str] = { + 'en': 'Respond in English.', + 'es': 'Respond in Spanish (Responde en español).', + 'hi': 'Respond in Hindi (हिंदी में जवाब दें).', + 'te': 'Respond in Telugu (తెలుగులో సమాధానం ఇవ్వండి).', + 'ta': 'Respond in Tamil (தமிழில் பதிலளிக்கவும்).', + 'kn': 'Respond in Kannada (ಕನ್ನಡದಲ್ಲಿ ಉತ್ತರಿಸಿ).', + 'ml': 'Respond in Malayalam (മലയാളത്തിൽ മറുപടി നൽകുക).', + 'mr': 'Respond in Marathi (मराठीत उत्तर द्या).', + 'gu': 'Respond in Gujarati (ગુજરાતીમાં જવાબ આપો).', + 'pa': 'Respond in Punjabi (ਪੰਜਾਬੀ ਵਿੱਚ ਜਵਾਬ ਦਿਓ).', + 'bn': 'Respond in Bengali (বাংলায় উত্তর দিন).', + 'zh': 'Respond in Chinese (用中文回复).', + 'ja': 'Respond in Japanese (日本語で応答してください).', + 'ko': 'Respond in Korean (한국어로 답변하세요).', + 'fr': 'Respond in French (Répondez en français).', + 'de': 'Respond in German (Auf Deutsch antworten).', + 'it': 'Respond in Italian (Rispondi in italiano).', + 'pt': 'Respond in Portuguese (Responda em português).', + 'ru': 'Respond in Russian (Отвечайте на русском).', + 'ar': 'Respond in Arabic (الرد بالعربية).', + 'tr': 'Respond in Turkish (Türkçe cevap verin).', + 'fil': 'Respond in Filipino (Tumugon sa Filipino).', + 'tl': 'Respond in Tagalog (Tumugon sa Tagalog).', + 'fi': 'Respond in Finnish (Vastaa suomeksi).', + 'cs': 'Respond in Czech (Odpovězte česky).', + 'da': 'Respond in Danish (Svar på dansk).', + 'el': 'Respond in Greek (Απαντήστε στα ελληνικά).', + 'he': 'Respond in Hebrew (ענה בעברית).', + 'hr': 'Respond in Croatian (Odgovorite na hrvatskom).', + 'hu': 'Respond in Hungarian (Válaszoljon magyarul).', + 'id': 'Respond in Indonesian (Jawab dalam bahasa Indonesia).', + 'ka': 'Respond in Georgian (უპასუხეთ ქართულად).', + 'ms': 'Respond in Malay (Jawab dalam bahasa Melayu).', + 'nl': 'Respond in Dutch (Antwoord in het Nederlands).', + 'no': 'Respond in Norwegian (Svar på norsk).', + 'pl': 'Respond in Polish (Odpowiedz po polsku).', + 'ro': 'Respond in Romanian (Răspundeți în română).', + 'sk': 'Respond in Slovak (Odpovedzte po slovensky).', + 'sv': 'Respond in Swedish (Svara på svenska).', + 'th': 'Respond in Thai (ตอบเป็นภาษาไทย).', + 'uk': 'Respond in Ukrainian (Відповідайте українською).', + 'vi': 'Respond in Vietnamese (Trả lời bằng tiếng Việt).', +} diff --git a/wavefront/server/apps/call_processing/call_processing/controllers/cache_controller.py b/wavefront/server/apps/call_processing/call_processing/controllers/cache_controller.py index eaae7c1a..28822b77 100644 --- a/wavefront/server/apps/call_processing/call_processing/controllers/cache_controller.py +++ b/wavefront/server/apps/call_processing/call_processing/controllers/cache_controller.py @@ -27,7 +27,7 @@ class InvalidateCacheRequest(BaseModel): """Request body for cache invalidation""" config_type: str - config_id: UUID + config_id: str # Can be UUID string or phone number for inbound_number type def verify_passthrough_auth(x_passthrough: Optional[str] = Header(None)) -> None: @@ -102,19 +102,50 @@ async def invalidate_cache( HTTPException: If config_type is invalid or API fetch fails """ config_type = request.config_type - config_id = request.config_id + config_id_str = request.config_id - # Validate config type - if config_type not in VALID_CONFIG_TYPES: + # Validate config type (including inbound_number) + valid_types = list(VALID_CONFIG_TYPES) + ['inbound_number'] + if config_type not in valid_types: logger.error(f'Invalid config type: {config_type}') raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, - detail=f'Invalid config_type. Must be one of: {", ".join(VALID_CONFIG_TYPES)}', + detail=f'Invalid config_type. Must be one of: {", ".join(valid_types)}', ) - logger.info(f'Invalidating cache for {config_type} {config_id}') + logger.info(f'Invalidating cache for {config_type} {config_id_str}') + + # Step 1: Handle inbound_number separately (simple string-based cache key) + if config_type == 'inbound_number': + # For inbound numbers, just remove the cache key + cache_key = f'inbound_number:{config_id_str}' + removed = voice_agent_cache_service.cache_manager.remove(cache_key) + + if removed: + logger.info(f'Removed inbound number cache for {config_id_str}') + else: + logger.warning(f'Inbound number {config_id_str} was not in cache') + + return JSONResponse( + status_code=status.HTTP_200_OK, + content={ + 'message': f'Cache invalidated for inbound_number {config_id_str}', + 'config_type': config_type, + 'config_id': config_id_str, + }, + ) + + # Step 2: Get the appropriate cache key for config types + # Convert config_id string to UUID for UUID-based configs + try: + config_id = UUID(config_id_str) + except ValueError: + logger.error(f'Invalid UUID format for config_id: {config_id_str}') + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f'config_id must be a valid UUID for config_type={config_type}', + ) - # Step 1: Get the appropriate cache key cache_key_funcs = { 'voice_agent': get_voice_agent_cache_key, 'llm_inference_config': get_llm_config_cache_key, @@ -133,14 +164,14 @@ async def invalidate_cache( cache_key = cache_key_func(config_id) - # Step 2: Remove from cache + # Step 3: Remove from cache removed = voice_agent_cache_service.cache_manager.remove(cache_key) if removed: logger.info(f'Removed {config_type} {config_id} from cache') else: logger.warning(f'{config_type} {config_id} was not in cache') - # Step 3: Fetch fresh config from floware API + # Step 4: Fetch fresh config from floware API try: if not voice_agent_cache_service.floware_http_client: raise HTTPException( diff --git a/wavefront/server/apps/call_processing/call_processing/controllers/webhook_controller.py b/wavefront/server/apps/call_processing/call_processing/controllers/webhook_controller.py index 8a34655f..fae95511 100644 --- a/wavefront/server/apps/call_processing/call_processing/controllers/webhook_controller.py +++ b/wavefront/server/apps/call_processing/call_processing/controllers/webhook_controller.py @@ -6,7 +6,7 @@ import os from uuid import UUID -from fastapi import APIRouter, WebSocket, Query, Depends +from fastapi import APIRouter, WebSocket, Query, Depends, Form from fastapi.responses import Response from twilio.twiml.voice_response import VoiceResponse, Connect, Stream from call_processing.log.logger import logger @@ -33,6 +33,100 @@ webhook_router = APIRouter() +@webhook_router.post('/inbound') +@inject +async def inbound_webhook( + From: str = Form(...), + To: str = Form(...), + CallSid: str = Form(...), + voice_agent_cache_service: VoiceAgentCacheService = Depends( + Provide[ApplicationContainer.voice_agent_cache_service] + ), +): + """ + Twilio inbound webhook endpoint + + Called by Twilio when an inbound call is received. + Looks up the voice agent by inbound phone number and redirects to TwiML endpoint. + + Form params (from Twilio): + From: Caller's phone number (E.164 format) + To: Called phone number (E.164 format, the inbound number) + CallSid: Twilio call identifier + """ + logger.info(f'Inbound call received: From={From}, To={To}, CallSid={CallSid}') + + # Look up agent by inbound number + agent = await voice_agent_cache_service.get_agent_by_inbound_number(To) + + if not agent: + logger.error(f'No voice agent found for inbound number: {To}') + # Return TwiML with error message + response = VoiceResponse() + response.say('Sorry, this number is not configured for voice services.') + response.hangup() + return Response(content=str(response), media_type='application/xml') + + agent_id = agent['id'] + logger.info(f'Agent found for inbound number {To}: {agent_id} ({agent["name"]})') + + # Generate welcome message audio URL + welcome_message_audio_url = '' + if agent.get('welcome_message'): + try: + # Note: This assumes voice_agent_cache_service has this method + # We'll implement it in the next step + welcome_message_audio_url = ( + await voice_agent_cache_service.get_welcome_message_audio_url(agent_id) + ) + except Exception as e: + logger.error(f'Failed to get welcome message URL: {e}') + # Continue without welcome message + + # Build WebSocket URL + base_url = os.getenv('CALL_PROCESSING_BASE_URL', 'http://localhost:8003') + + # Convert https:// to wss:// (or http:// to ws://) + if base_url.startswith('https://'): + websocket_url = base_url.replace('https://', 'wss://') + elif base_url.startswith('http://'): + websocket_url = base_url.replace('http://', 'ws://') + else: + websocket_url = f'wss://{base_url}' + + websocket_url = f'{websocket_url}/webhooks/ws' + + logger.info(f'WebSocket URL: {websocket_url}') + + # Generate TwiML response + response = VoiceResponse() + + # Play welcome message audio if URL is provided + if welcome_message_audio_url: + response.play(welcome_message_audio_url) + else: + logger.warning( + 'No welcome message audio URL provided, skipping welcome message' + ) + + connect = Connect() + stream = Stream(url=websocket_url) + + # Pass parameters to WebSocket stream + stream.parameter(name='voice_agent_id', value=agent_id) + + connect.append(stream) + response.append(connect) + + # Pause for 60 seconds before auto-hangup (adjust as needed) + response.pause(length=60) + + twiml_xml = str(response) + logger.info(f'Returning TwiML: {twiml_xml}') + + return Response(content=twiml_xml, media_type='application/xml') + + @webhook_router.post('/twiml') async def twiml_endpoint( voice_agent_id: str = Query(...), @@ -41,7 +135,7 @@ async def twiml_endpoint( """ Twilio TwiML endpoint - Called by Twilio when call connects. + Called by Twilio when call connects (directly or via inbound webhook redirect). Returns TwiML XML with WebSocket connection instructions. Query params: @@ -80,7 +174,7 @@ async def twiml_endpoint( connect = Connect() stream = Stream(url=websocket_url) - # Pass voice_agent_id as stream parameter + # Pass parameters to WebSocket stream stream.parameter(name='voice_agent_id', value=voice_agent_id) connect.append(stream) @@ -121,7 +215,7 @@ async def websocket_endpoint( logger.info(f'Auto-detected transport: {transport_type}') logger.info(f'Call data: {call_data}') - # Extract voice_agent_id from stream parameters + # Extract parameters from stream body_data = call_data.get('body', {}) voice_agent_id = body_data.get('voice_agent_id') diff --git a/wavefront/server/apps/call_processing/call_processing/services/floware_http_client.py b/wavefront/server/apps/call_processing/call_processing/services/floware_http_client.py index cd8fe97f..361de52a 100644 --- a/wavefront/server/apps/call_processing/call_processing/services/floware_http_client.py +++ b/wavefront/server/apps/call_processing/call_processing/services/floware_http_client.py @@ -123,3 +123,94 @@ async def fetch_config( except httpx.RequestError as e: logger.error(f'Request error fetching {config_type} {config_id}: {e}') raise + + async def get_agent_by_inbound_number( + self, phone_number: str + ) -> Optional[Dict[str, Any]]: + """ + Get voice agent by inbound phone number. + + Args: + phone_number: E.164 formatted phone number + + Returns: + Voice agent dict if found, None otherwise + + Raises: + httpx.HTTPStatusError: If API returns 4xx/5xx error (except 404) + httpx.RequestError: If request fails (network error, timeout, etc.) + """ + url = ( + f'{self.base_url}/floware/v1/voice-agents/by-inbound-number/{phone_number}' + ) + + async with httpx.AsyncClient(timeout=self.timeout) as client: + try: + response = await client.get(url, headers=self._get_headers()) + response.raise_for_status() + + # Extract agent from response + data = response.json() + if 'data' in data: + return data['data'] + return data + + except httpx.HTTPStatusError as e: + if e.response.status_code == 404: + logger.info(f'No agent found for inbound number: {phone_number}') + return None + logger.error( + f'HTTP error fetching agent by inbound number {phone_number}: ' + f'status={e.response.status_code}' + ) + raise + except httpx.RequestError as e: + logger.error( + f'Request error fetching agent by inbound number {phone_number}: {e}' + ) + raise + + async def get_welcome_message_audio_url(self, agent_id: str) -> str: + """ + Get welcome message audio presigned URL for a voice agent. + + Args: + agent_id: Voice agent UUID (string) + + Returns: + Presigned URL for welcome message audio, or empty string if not available + + Raises: + httpx.HTTPStatusError: If API returns 4xx/5xx error (except 404) + httpx.RequestError: If request fails (network error, timeout, etc.) + """ + url = f'{self.base_url}/floware/v1/voice-agents/{agent_id}/welcome-audio-url' + + async with httpx.AsyncClient(timeout=self.timeout) as client: + try: + response = await client.get(url, headers=self._get_headers()) + response.raise_for_status() + + # Extract URL from response + data = response.json() + if 'data' in data: + # Handle response_formatter wrapped response + if isinstance(data['data'], dict): + return data['data'].get('url', '') + return data['data'] if isinstance(data['data'], str) else '' + return data.get('url', '') if isinstance(data, dict) else '' + + except httpx.HTTPStatusError as e: + if e.response.status_code == 404: + logger.info(f'Welcome message URL not found for agent: {agent_id}') + return '' + logger.error( + f'HTTP error fetching welcome message URL for agent {agent_id}: ' + f'status={e.response.status_code}' + ) + raise + except httpx.RequestError as e: + logger.error( + f'Request error fetching welcome message URL for agent {agent_id}: {e}' + ) + raise diff --git a/wavefront/server/apps/call_processing/call_processing/services/pipecat_service.py b/wavefront/server/apps/call_processing/call_processing/services/pipecat_service.py index 1eaabea1..0f3f1bd0 100644 --- a/wavefront/server/apps/call_processing/call_processing/services/pipecat_service.py +++ b/wavefront/server/apps/call_processing/call_processing/services/pipecat_service.py @@ -4,7 +4,8 @@ Creates and runs the voice conversation pipeline using configured STT/LLM/TTS services """ -from typing import Dict, Any +from typing import Dict, Any, List +from copy import deepcopy from call_processing.log.logger import logger # Pipecat core imports @@ -12,7 +13,12 @@ from pipecat.audio.interruptions.min_words_interruption_strategy import ( MinWordsInterruptionStrategy, ) -from pipecat.frames.frames import TTSSpeakFrame, EndTaskFrame +from pipecat.frames.frames import ( + TTSSpeakFrame, + EndTaskFrame, + ManuallySwitchServiceFrame, + LLMMessagesUpdateFrame, +) from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask @@ -22,11 +28,23 @@ ) from pipecat.processors.frame_processor import FrameProcessor, FrameDirection from pipecat.processors.user_idle_processor import UserIdleProcessor +from pipecat.processors.transcript_processor import ( + TranscriptProcessor, + TranscriptionMessage, +) +from pipecat.pipeline.service_switcher import ( + ServiceSwitcher, + ServiceSwitcherStrategyManual, +) from pipecat.transports.base_transport import BaseTransport from pipecat.services.llm_service import FunctionCallParams from call_processing.services.stt_service import STTServiceFactory from call_processing.services.tts_service import TTSServiceFactory from call_processing.services.llm_service import LLMServiceFactory +from call_processing.constants.language_config import ( + LANGUAGE_KEYWORDS, + LANGUAGE_INSTRUCTIONS, +) # Advanced handler with retry logic @@ -50,12 +68,6 @@ async def handle_user_idle(processor: FrameProcessor, retry_count): return False # Stop monitoring -user_idle = UserIdleProcessor( - callback=handle_user_idle, # Your callback function - timeout=4.0, # Seconds of inactivity before triggering -) - - async def evaluate_completion_criteria(params: FunctionCallParams): """ Check if the last user message contains goodbye-related phrases. @@ -136,17 +148,90 @@ async def run_conversation( Args: transport: Pipecat transport (e.g., WebSocket transport from Twilio) - agent_config: Voice agent configuration including system_prompt + agent_config: Voice agent configuration including system_prompt, + supported_languages, and default_language llm_config: LLM provider configuration tts_config: TTS provider configuration stt_config: STT provider configuration """ + # Extract language configuration from agent_config + supported_languages = agent_config.get('supported_languages', ['en']) + default_language = agent_config.get('default_language', 'en') + is_multi_language = len(supported_languages) > 1 + logger.info(f"Starting conversation for agent: {agent_config['name']}") + logger.info( + f'Language config - supported: {supported_languages}, ' + f'default: {default_language}, multi-language: {is_multi_language}' + ) - # Create services using factories - stt = STTServiceFactory.create_stt_service(stt_config) + # Create LLM service (language-agnostic) llm = LLMServiceFactory.create_llm_service(llm_config) - tts = TTSServiceFactory.create_tts_service(tts_config) + + # Create STT/TTS services with multi-language support if needed + stt_services = {} + tts_services = {} + + if is_multi_language: + logger.info( + f'Multi-language mode enabled for languages: {supported_languages}' + ) + + # Create STT/TTS services for each supported language + for lang_code in supported_languages: + # Deep clone configs to avoid mutating original configs + stt_config_lang = deepcopy(stt_config) + tts_config_lang = deepcopy(tts_config) + + # Update language in parameters + if 'parameters' not in stt_config_lang: + stt_config_lang['parameters'] = {} + stt_config_lang['parameters']['language'] = lang_code + + if 'parameters' not in tts_config_lang: + tts_config_lang['parameters'] = {} + tts_config_lang['parameters']['language'] = lang_code + + # Create services + stt_services[lang_code] = STTServiceFactory.create_stt_service( + stt_config_lang + ) + tts_services[lang_code] = TTSServiceFactory.create_tts_service( + tts_config_lang + ) + + logger.info(f'Created STT/TTS services for language: {lang_code}') + + # Create service switchers with manual strategy + # Order services list with default language first (ServiceSwitcher uses first as initial) + stt_services_list = [] + tts_services_list = [] + + # Add default language service first + if default_language in stt_services: + stt_services_list.append(stt_services[default_language]) + tts_services_list.append(tts_services[default_language]) + + # Add remaining services + for lang_code in supported_languages: + if lang_code != default_language: + stt_services_list.append(stt_services[lang_code]) + tts_services_list.append(tts_services[lang_code]) + + stt = ServiceSwitcher( + services=stt_services_list, strategy_type=ServiceSwitcherStrategyManual + ) + tts = ServiceSwitcher( + services=tts_services_list, strategy_type=ServiceSwitcherStrategyManual + ) + + logger.info(f'Initialized with default language: {default_language}') + else: + logger.info('Single language mode - no language detection needed') + + # Create single STT/TTS services + stt = STTServiceFactory.create_stt_service(stt_config) + tts = TTSServiceFactory.create_tts_service(tts_config) # Create initial messages with system prompt messages = [ @@ -166,20 +251,35 @@ async def run_conversation( context = LLMContext(messages, tools=tools) context_aggregator = LLMContextAggregatorPair(context) - # Create pipeline - pipeline = Pipeline( - [ - transport.input(), # Audio input from Twilio - stt, # Speech-to-Text - user_idle, - context_aggregator.user(), # Add user message to context - llm, # LLM processing - tts, # Text-to-Speech - transport.output(), # Audio output to Twilio - context_aggregator.assistant(), # Add assistant response to context - ] + # Create transcript processor for language detection + transcript = TranscriptProcessor() + + # Track current language detection state (only for multi-language) + language_detected = {'detected': False, 'current_language': default_language} + + # Create user idle processor (fresh instance for each conversation) + user_idle = UserIdleProcessor( + callback=handle_user_idle, + timeout=4.0, ) + # Build pipeline components list + pipeline_components = [ + transport.input(), # Audio input from Twilio + stt, # Speech-to-Text (ServiceSwitcher for multi-lang, direct for single) + transcript.user(), # Transcript processor for user messages + user_idle, # User idle detection + context_aggregator.user(), # Add user message to context + llm, # LLM processing + tts, # Text-to-Speech (ServiceSwitcher for multi-lang, direct for single) + transport.output(), # Audio output to Twilio + transcript.assistant(), # Transcript processor for assistant messages + context_aggregator.assistant(), # Add assistant response to context + ] + + # Create pipeline + pipeline = Pipeline(pipeline_components) + # Create pipeline task with Twilio-specific parameters task = PipelineTask( pipeline, @@ -195,6 +295,130 @@ async def run_conversation( idle_timeout_secs=20, # Safety net - allows UserIdleProcessor to complete 3 retries (4s each = 12s total) ) + # Multi-language detection event handler + if is_multi_language: + + @transcript.event_handler('on_transcript_update') + async def handle_language_detection(processor, frame): + """Detect language from first user message and switch services""" + + # Only detect once + if language_detected['detected']: + return + + messages: List[TranscriptionMessage] = frame.messages + + # Look for user messages + for message in messages: + if message.role == 'user': + message_content = message.content.lower().strip() + + # Skip empty messages + if not message_content: + continue + + logger.info( + f"Analyzing message for language detection: '{message_content}'" + ) + + # Check for language keywords + detected_lang = None + for lang_code in supported_languages: + keywords = LANGUAGE_KEYWORDS.get(lang_code, []) + for keyword in keywords: + if keyword.lower() in message_content: + detected_lang = lang_code + logger.info( + f'Language detected: {detected_lang} ' + f"(matched keyword: '{keyword}')" + ) + break + if detected_lang: + break + + # Use detected language or fallback to default + target_language = detected_lang or default_language + + if not detected_lang: + logger.info( + f'No language detected, using default: {default_language}' + ) + + # Mark detection as complete + language_detected['detected'] = True + language_detected['current_language'] = target_language + + # Only switch services if target language is different from default + if target_language != default_language: + if ( + target_language in stt_services + and target_language in tts_services + ): + target_stt = stt_services[target_language] + target_tts = tts_services[target_language] + + try: + await task.queue_frames( + [ + ManuallySwitchServiceFrame( + service=target_stt + ), + ManuallySwitchServiceFrame( + service=target_tts + ), + ] + ) + logger.info( + f'Switched STT/TTS services to language: {target_language}' + ) + except Exception as e: + logger.error( + f'Error switching services: {e}', exc_info=True + ) + else: + logger.info( + f'Language {target_language} is default, no service switch needed' + ) + + # Update LLM system prompt with language instruction + language_instruction = LANGUAGE_INSTRUCTIONS.get( + target_language, + LANGUAGE_INSTRUCTIONS.get('en', 'Respond in English.'), + ) + + # Get current system prompt and append language instruction + current_messages = context.get_messages() + if current_messages and len(current_messages) > 0: + system_message = current_messages[0] + else: + system_message = { + 'role': 'system', + 'content': agent_config['system_prompt'], + } + + updated_content = ( + f"{system_message['content']}\n\n{language_instruction}" + ) + updated_system_message = { + 'role': 'system', + 'content': updated_content, + } + + # Update context with new system message + new_messages = [updated_system_message] + current_messages[1:] + await task.queue_frame( + LLMMessagesUpdateFrame(new_messages, run_llm=False) + ) + + logger.info( + f'Updated LLM context with language instruction for {target_language}' + ) + + # Exit after first detection + break + + logger.info('Language detection event handler registered') + # Register event handlers @transport.event_handler('on_client_connected') async def on_client_connected(transport, client): diff --git a/wavefront/server/apps/call_processing/call_processing/services/voice_agent_cache_service.py b/wavefront/server/apps/call_processing/call_processing/services/voice_agent_cache_service.py index c3c84890..8d928b07 100644 --- a/wavefront/server/apps/call_processing/call_processing/services/voice_agent_cache_service.py +++ b/wavefront/server/apps/call_processing/call_processing/services/voice_agent_cache_service.py @@ -262,3 +262,137 @@ async def get_all_agent_configs(self, agent_id: UUID) -> Dict[str, Any]: 'stt_config': stt_config, 'telephony_config': telephony_config, } + + async def get_agent_by_inbound_number( + self, phone_number: str + ) -> Optional[Dict[str, Any]]: + """ + Get voice agent by inbound phone number (with caching). + + Strategy: + 1. Try cache first using inbound_number:{phone_number} key + 2. If cache miss, fetch from floware API + 3. Cache the agent ID mapping with 24-hour TTL + 4. Return full agent dict + + Args: + phone_number: E.164 formatted phone number + + Returns: + Voice agent dict or None if not found + """ + cache_key = f'inbound_number:{phone_number}' + + # Try cache first + cached_agent_id = self.cache_manager.get_str(cache_key) + if cached_agent_id: + logger.info( + f'Cache hit for inbound number: {phone_number} -> agent {cached_agent_id}' + ) + + # Fetch agent from cache or API + agent_key = get_voice_agent_cache_key(UUID(cached_agent_id)) + agent = self.cache_manager.get_json(agent_key) + + if agent: + return agent + else: + # Agent not in cache, fetch from API + if self.floware_http_client: + try: + agent = await self.floware_http_client.fetch_voice_agent( + UUID(cached_agent_id) + ) + if agent: + # Cache the agent + self.cache_manager.set_json( + agent_key, agent, expiry=self.cache_ttl + ) + return agent + except Exception as e: + logger.error( + f'Failed to fetch agent {cached_agent_id} from API: {e}' + ) + + # Cache miss - fetch from floware API + logger.info(f'Cache miss - fetching agent by inbound number: {phone_number}') + + if not self.floware_http_client: + logger.error('No HTTP client configured for inbound number lookup') + return None + + try: + agent = await self.floware_http_client.get_agent_by_inbound_number( + phone_number + ) + + if agent: + agent_id = agent.get('id') + # Cache the inbound number -> agent ID mapping + self.cache_manager.add(cache_key, str(agent_id), expiry=self.cache_ttl) + + # Cache the agent itself + agent_key = get_voice_agent_cache_key(UUID(agent_id)) + self.cache_manager.set_json(agent_key, agent, expiry=self.cache_ttl) + + logger.info(f'Cached inbound number {phone_number} -> agent {agent_id}') + return agent + else: + logger.warning(f'No agent found for inbound number: {phone_number}') + return None + + except Exception as e: + logger.error( + f'Failed to fetch agent by inbound number {phone_number}: {e}', + exc_info=True, + ) + return None + + async def get_welcome_message_audio_url(self, agent_id: str) -> str: + """ + Get welcome message audio URL for an agent (with caching). + + Strategy: + 1. Try cache first using voice_agent_welcome_url:{agent_id} key + 2. If cache miss, fetch from floware API + 3. Cache the URL with ~2 hour TTL (same as floware) + + Args: + agent_id: Voice agent UUID (string) + + Returns: + Presigned URL for welcome message audio or empty string if not available + """ + cache_key = f'voice_agent_welcome_url:{agent_id}' + + # Try cache first + cached_url = self.cache_manager.get_str(cache_key) + if cached_url: + logger.info(f'Cache hit for welcome message URL: {agent_id}') + return cached_url + + # Cache miss - fetch from floware API + logger.info(f'Cache miss - fetching welcome message URL for agent: {agent_id}') + + if not self.floware_http_client: + logger.error('No HTTP client configured for welcome message URL fetch') + return '' + + try: + url = await self.floware_http_client.get_welcome_message_audio_url(agent_id) + + if url: + # Cache URL with ~2 hour TTL (7100 seconds - matches floware) + self.cache_manager.add(cache_key, url, expiry=7100) + logger.info(f'Cached welcome message URL for agent {agent_id}') + return url + else: + logger.warning(f'No welcome message URL for agent: {agent_id}') + return '' + + except Exception as e: + logger.error( + f'Failed to fetch welcome message URL for agent {agent_id}: {e}', + exc_info=True, + ) + return '' From 20a79668f18c855253a81a8938fcef2ffd53facf Mon Sep 17 00:00:00 2001 From: rootflo-hardik Date: Mon, 12 Jan 2026 15:32:50 +0530 Subject: [PATCH 4/8] floware - moved lang, params from stt, tts to voice_agent --- ...49da528_add_inbound_voice_agent_support.py | 173 +++++++++++++----- .../db_repo_module/models/stt_config.py | 11 +- .../db_repo_module/models/tts_config.py | 12 +- .../db_repo_module/models/voice_agent.py | 7 + .../controllers/stt_config_controller.py | 32 +--- .../controllers/tts_config_controller.py | 35 +--- .../controllers/voice_agent_controller.py | 9 + .../voice_agents_module/models/stt_schemas.py | 14 +- .../voice_agents_module/models/tts_schemas.py | 16 +- .../models/voice_agent_schemas.py | 13 ++ .../services/stt_config_service.py | 6 - .../services/tts_config_service.py | 11 +- .../services/voice_agent_service.py | 112 +++++++++++- 13 files changed, 273 insertions(+), 178 deletions(-) diff --git a/wavefront/server/modules/db_repo_module/db_repo_module/alembic/versions/2026_01_08_1547-6010e49da528_add_inbound_voice_agent_support.py b/wavefront/server/modules/db_repo_module/db_repo_module/alembic/versions/2026_01_08_1547-6010e49da528_add_inbound_voice_agent_support.py index 20a189ec..62b3ed1b 100644 --- a/wavefront/server/modules/db_repo_module/db_repo_module/alembic/versions/2026_01_08_1547-6010e49da528_add_inbound_voice_agent_support.py +++ b/wavefront/server/modules/db_repo_module/db_repo_module/alembic/versions/2026_01_08_1547-6010e49da528_add_inbound_voice_agent_support.py @@ -1,4 +1,4 @@ -"""add_inbound_voice_agent_support +"""add_inbound_voice_agent_support_and_refactor_tts_stt Revision ID: 6010e49da528 Revises: f7572bcd9510 @@ -6,12 +6,12 @@ """ +import json from typing import Sequence, Union from alembic import op import sqlalchemy as sa -from sqlalchemy.dialects import postgresql - +from sqlalchemy import text # revision identifiers, used by Alembic. revision: str = '6010e49da528' @@ -22,57 +22,131 @@ def upgrade() -> None: # Add new columns to voice_agents table (initially nullable) - op.add_column( - 'voice_agents', - sa.Column( - 'inbound_numbers', postgresql.JSONB(astext_type=sa.Text()), nullable=True - ), - ) - op.add_column( - 'voice_agents', - sa.Column( - 'outbound_numbers', postgresql.JSONB(astext_type=sa.Text()), nullable=True - ), - ) - op.add_column( - 'voice_agents', - sa.Column( - 'supported_languages', - postgresql.JSONB(astext_type=sa.Text()), - nullable=True, - ), - ) - op.add_column( - 'voice_agents', - sa.Column('default_language', sa.String(length=10), nullable=True), - ) + # Use raw SQL with IF NOT EXISTS for idempotency + op.execute(""" + ALTER TABLE voice_agents + ADD COLUMN IF NOT EXISTS inbound_numbers JSONB, + ADD COLUMN IF NOT EXISTS outbound_numbers JSONB, + ADD COLUMN IF NOT EXISTS supported_languages JSONB, + ADD COLUMN IF NOT EXISTS default_language VARCHAR(10), + ADD COLUMN IF NOT EXISTS tts_voice_id VARCHAR(255), + ADD COLUMN IF NOT EXISTS tts_parameters JSONB, + ADD COLUMN IF NOT EXISTS stt_parameters JSONB + """) # Set defaults for existing agents op.execute(""" UPDATE voice_agents SET - inbound_numbers = '[]'::jsonb, - outbound_numbers = '[]'::jsonb, - supported_languages = '["en"]'::jsonb, - default_language = 'en' + inbound_numbers = COALESCE(inbound_numbers, '[]'::jsonb), + outbound_numbers = COALESCE(outbound_numbers, '[]'::jsonb), + supported_languages = COALESCE(supported_languages, '["en"]'::jsonb), + default_language = COALESCE(default_language, 'en') WHERE inbound_numbers IS NULL + OR outbound_numbers IS NULL + OR supported_languages IS NULL + OR default_language IS NULL """) - # Make columns non-nullable after setting defaults - op.alter_column('voice_agents', 'inbound_numbers', nullable=False) - op.alter_column('voice_agents', 'outbound_numbers', nullable=False) - op.alter_column('voice_agents', 'supported_languages', nullable=False) - op.alter_column('voice_agents', 'default_language', nullable=False) - - # Create GIN index for fast inbound number lookups (JSONB containment queries) + # Migrate TTS/STT data from configs to voice_agents + connection = op.get_bind() + + # Fetch all non-deleted voice agents with their TTS/STT configs (using LEFT JOINs) + agents = connection.execute( + text(""" + SELECT + va.id as agent_id, + tc.voice_id as tts_voice_id, + tc.parameters as tts_params, + sc.parameters as stt_params + FROM voice_agents va + LEFT JOIN tts_configs tc ON va.tts_config_id = tc.id + LEFT JOIN stt_configs sc ON va.stt_config_id = sc.id + WHERE va.is_deleted = false + """) + ).fetchall() + + # Migrate data for each agent + for agent in agents: + # Parse JSON parameters (may be None) + tts_params_dict = json.loads(agent.tts_params) if agent.tts_params else {} + stt_params_dict = json.loads(agent.stt_params) if agent.stt_params else {} + + # Use default voice_id if not available + tts_voice_id = agent.tts_voice_id if agent.tts_voice_id else 'default' + + # Update agent with TTS/STT data + connection.execute( + text(""" + UPDATE voice_agents + SET + tts_voice_id = :voice_id, + tts_parameters = :tts_params, + stt_parameters = :stt_params + WHERE id = :agent_id + """), + { + 'voice_id': tts_voice_id, + 'tts_params': json.dumps(tts_params_dict), + 'stt_params': json.dumps(stt_params_dict), + 'agent_id': str(agent.agent_id), + }, + ) + + # Safety check: Set default tts_voice_id for any remaining NULL values + # This handles cases where agents don't have associated configs or were missed op.execute(""" - CREATE INDEX idx_voice_agents_inbound_numbers_gin - ON voice_agents USING gin (inbound_numbers jsonb_path_ops) + UPDATE voice_agents + SET tts_voice_id = 'default' + WHERE tts_voice_id IS NULL """) + # Make columns non-nullable after setting defaults (idempotent) + connection = op.get_bind() + + # Only alter if currently nullable + for col in [ + 'inbound_numbers', + 'outbound_numbers', + 'supported_languages', + 'default_language', + 'tts_voice_id', + ]: + result = connection.execute( + text(f""" + SELECT is_nullable FROM information_schema.columns + WHERE table_name = 'voice_agents' AND column_name = '{col}' + """) + ) + row = result.fetchone() + if row and row[0] == 'YES': + connection.execute( + text(f'ALTER TABLE voice_agents ALTER COLUMN {col} SET NOT NULL') + ) + # Remove phone_numbers column from telephony_configs table # Phone numbers are now managed at the voice_agent level - op.drop_column('telephony_configs', 'phone_numbers') + # Use conditional drop to make it idempotent + op.execute(""" + ALTER TABLE telephony_configs + DROP COLUMN IF EXISTS phone_numbers + """) + + # Remove provider-specific columns from tts_configs + # These are now stored in voice_agents + op.execute(""" + ALTER TABLE tts_configs + DROP COLUMN IF EXISTS voice_id, + DROP COLUMN IF EXISTS language, + DROP COLUMN IF EXISTS parameters + """) + + # Remove provider-specific columns from stt_configs + op.execute(""" + ALTER TABLE stt_configs + DROP COLUMN IF EXISTS language, + DROP COLUMN IF EXISTS parameters + """) def downgrade() -> None: @@ -81,11 +155,24 @@ def downgrade() -> None: 'telephony_configs', sa.Column('phone_numbers', sa.Text(), nullable=True) ) - # Drop GIN index - op.drop_index('idx_voice_agents_inbound_numbers_gin', table_name='voice_agents') + # Restore TTS/STT config columns + op.add_column( + 'tts_configs', sa.Column('voice_id', sa.String(length=255), nullable=True) + ) + op.add_column( + 'tts_configs', sa.Column('language', sa.String(length=64), nullable=True) + ) + op.add_column('tts_configs', sa.Column('parameters', sa.Text(), nullable=True)) + op.add_column( + 'stt_configs', sa.Column('language', sa.String(length=64), nullable=True) + ) + op.add_column('stt_configs', sa.Column('parameters', sa.Text(), nullable=True)) # Drop columns from voice_agents op.drop_column('voice_agents', 'default_language') op.drop_column('voice_agents', 'supported_languages') op.drop_column('voice_agents', 'outbound_numbers') op.drop_column('voice_agents', 'inbound_numbers') + op.drop_column('voice_agents', 'stt_parameters') + op.drop_column('voice_agents', 'tts_parameters') + op.drop_column('voice_agents', 'tts_voice_id') diff --git a/wavefront/server/modules/db_repo_module/db_repo_module/models/stt_config.py b/wavefront/server/modules/db_repo_module/db_repo_module/models/stt_config.py index 96569899..823daf16 100644 --- a/wavefront/server/modules/db_repo_module/db_repo_module/models/stt_config.py +++ b/wavefront/server/modules/db_repo_module/db_repo_module/models/stt_config.py @@ -1,9 +1,8 @@ -import json import uuid from datetime import datetime from typing import Optional -from sqlalchemy import String, Text, func +from sqlalchemy import String, func from sqlalchemy.orm import Mapped, mapped_column from ..database.base import Base @@ -21,8 +20,6 @@ class SttConfig(Base): ) provider: Mapped[str] = mapped_column(String(length=64), nullable=False) api_key: Mapped[str] = mapped_column(String(length=512), nullable=False) - language: Mapped[Optional[str]] = mapped_column(String(length=64), nullable=True) - parameters: Mapped[Optional[str]] = mapped_column(Text, nullable=True) is_deleted: Mapped[bool] = mapped_column(default=False, nullable=False) created_at: Mapped[datetime] = mapped_column(default=func.now()) updated_at: Mapped[datetime] = mapped_column( @@ -45,12 +42,6 @@ def to_dict(self, exclude_api_key: bool = True): result[column.name] = str(value) elif isinstance(value, datetime): result[column.name] = value.isoformat() - elif column.name == 'parameters' and value: - # Parse JSON field - try: - result[column.name] = json.loads(value) - except (json.JSONDecodeError, TypeError): - result[column.name] = value else: result[column.name] = value return result diff --git a/wavefront/server/modules/db_repo_module/db_repo_module/models/tts_config.py b/wavefront/server/modules/db_repo_module/db_repo_module/models/tts_config.py index ff5f4f88..0c855d2c 100644 --- a/wavefront/server/modules/db_repo_module/db_repo_module/models/tts_config.py +++ b/wavefront/server/modules/db_repo_module/db_repo_module/models/tts_config.py @@ -1,9 +1,8 @@ -import json import uuid from datetime import datetime from typing import Optional -from sqlalchemy import String, Text, func +from sqlalchemy import String, func from sqlalchemy.orm import Mapped, mapped_column from ..database.base import Base @@ -20,10 +19,7 @@ class TtsConfig(Base): String(length=500), nullable=True ) provider: Mapped[str] = mapped_column(String(length=64), nullable=False) - voice_id: Mapped[str] = mapped_column(String(length=255), nullable=False) api_key: Mapped[str] = mapped_column(String(length=512), nullable=False) - language: Mapped[Optional[str]] = mapped_column(String(length=64), nullable=True) - parameters: Mapped[Optional[str]] = mapped_column(Text, nullable=True) is_deleted: Mapped[bool] = mapped_column(default=False, nullable=False) created_at: Mapped[datetime] = mapped_column(default=func.now()) updated_at: Mapped[datetime] = mapped_column( @@ -46,12 +42,6 @@ def to_dict(self, exclude_api_key: bool = True): result[column.name] = str(value) elif isinstance(value, datetime): result[column.name] = value.isoformat() - elif column.name == 'parameters' and value: - # Parse JSON field - try: - result[column.name] = json.loads(value) - except (json.JSONDecodeError, TypeError): - result[column.name] = value else: result[column.name] = value return result diff --git a/wavefront/server/modules/db_repo_module/db_repo_module/models/voice_agent.py b/wavefront/server/modules/db_repo_module/db_repo_module/models/voice_agent.py index 495acaef..c4774616 100644 --- a/wavefront/server/modules/db_repo_module/db_repo_module/models/voice_agent.py +++ b/wavefront/server/modules/db_repo_module/db_repo_module/models/voice_agent.py @@ -35,6 +35,11 @@ class VoiceAgent(Base): welcome_message: Mapped[str] = mapped_column(Text, nullable=False) status: Mapped[str] = mapped_column(String(length=64), nullable=False) + # TTS/STT configuration + tts_voice_id: Mapped[str] = mapped_column(String(length=255), nullable=False) + tts_parameters: Mapped[Optional[dict]] = mapped_column(JSONB, nullable=True) + stt_parameters: Mapped[Optional[dict]] = mapped_column(JSONB, nullable=True) + # Multi-language and phone number support inbound_numbers: Mapped[list] = mapped_column(JSONB, nullable=False, default=list) outbound_numbers: Mapped[list] = mapped_column(JSONB, nullable=False, default=list) @@ -68,6 +73,8 @@ def to_dict(self): 'inbound_numbers', 'outbound_numbers', 'supported_languages', + 'tts_parameters', + 'stt_parameters', ]: # Parse JSON/JSONB fields if value: diff --git a/wavefront/server/modules/voice_agents_module/voice_agents_module/controllers/stt_config_controller.py b/wavefront/server/modules/voice_agents_module/voice_agents_module/controllers/stt_config_controller.py index 484f32f8..d0c94078 100644 --- a/wavefront/server/modules/voice_agents_module/voice_agents_module/controllers/stt_config_controller.py +++ b/wavefront/server/modules/voice_agents_module/voice_agents_module/controllers/stt_config_controller.py @@ -1,4 +1,3 @@ -import json from uuid import UUID from dependency_injector.wiring import Provide, inject @@ -10,7 +9,6 @@ from voice_agents_module.models.stt_schemas import ( CreateSttConfigPayload, UpdateSttConfigPayload, - SttProvider, UNSET, ) from voice_agents_module.services.stt_config_service import SttConfigService @@ -34,10 +32,10 @@ async def create_stt_config( """ Create a new STT configuration - Creates a Speech-to-Text provider configuration. + Creates a Speech-to-Text provider configuration with credentials only. Args: - payload: Configuration details including provider, api_key, etc. + payload: Configuration details including provider and api_key Returns: JSONResponse: Created configuration (api_key excluded) @@ -47,8 +45,6 @@ async def create_stt_config( description=payload.description, provider=payload.provider.value, api_key=payload.api_key, - language=payload.language, - parameters=payload.parameters, ) return JSONResponse( @@ -153,32 +149,8 @@ async def update_stt_config( update_data['display_name'] = payload.display_name if payload.description is not UNSET: update_data['description'] = payload.description - if payload.provider is not UNSET: - if hasattr(payload.provider, 'value'): - # It's an enum object - update_data['provider'] = payload.provider.value - elif isinstance(payload.provider, str) and payload.provider in [ - e.value for e in SttProvider - ]: - # It's a valid enum value string - update_data['provider'] = payload.provider - else: - # Invalid value - valid_values = [e.value for e in SttProvider] - return JSONResponse( - status_code=status.HTTP_400_BAD_REQUEST, - content=response_formatter.buildErrorResponse( - f'Invalid provider value. Must be one of: {valid_values}' - ), - ) if payload.api_key is not UNSET: update_data['api_key'] = payload.api_key - if payload.language is not UNSET: - update_data['language'] = payload.language - if payload.parameters is not UNSET: - update_data['parameters'] = ( - json.dumps(payload.parameters) if payload.parameters else None - ) if not update_data: return JSONResponse( diff --git a/wavefront/server/modules/voice_agents_module/voice_agents_module/controllers/tts_config_controller.py b/wavefront/server/modules/voice_agents_module/voice_agents_module/controllers/tts_config_controller.py index b4801dfb..63a49266 100644 --- a/wavefront/server/modules/voice_agents_module/voice_agents_module/controllers/tts_config_controller.py +++ b/wavefront/server/modules/voice_agents_module/voice_agents_module/controllers/tts_config_controller.py @@ -1,4 +1,3 @@ -import json from uuid import UUID from dependency_injector.wiring import Provide, inject @@ -10,7 +9,6 @@ from voice_agents_module.models.tts_schemas import ( CreateTtsConfigPayload, UpdateTtsConfigPayload, - TtsProvider, UNSET, ) from voice_agents_module.services.tts_config_service import TtsConfigService @@ -34,10 +32,10 @@ async def create_tts_config( """ Create a new TTS configuration - Creates a Text-to-Speech provider configuration with voice settings. + Creates a Text-to-Speech provider configuration with credentials only. Args: - payload: Configuration details including provider, voice_id, api_key, etc. + payload: Configuration details including provider and api_key Returns: JSONResponse: Created configuration (api_key excluded) @@ -46,10 +44,7 @@ async def create_tts_config( display_name=payload.display_name, description=payload.description, provider=payload.provider.value, - voice_id=payload.voice_id, api_key=payload.api_key, - language=payload.language, - parameters=payload.parameters, ) return JSONResponse( @@ -154,34 +149,8 @@ async def update_tts_config( update_data['display_name'] = payload.display_name if payload.description is not UNSET: update_data['description'] = payload.description - if payload.provider is not UNSET: - if hasattr(payload.provider, 'value'): - # It's an enum object - update_data['provider'] = payload.provider.value - elif isinstance(payload.provider, str) and payload.provider in [ - e.value for e in TtsProvider - ]: - # It's a valid enum value string - update_data['provider'] = payload.provider - else: - # Invalid value - valid_values = [e.value for e in TtsProvider] - return JSONResponse( - status_code=status.HTTP_400_BAD_REQUEST, - content=response_formatter.buildErrorResponse( - f'Invalid provider value. Must be one of: {valid_values}' - ), - ) - if payload.voice_id is not UNSET: - update_data['voice_id'] = payload.voice_id if payload.api_key is not UNSET: update_data['api_key'] = payload.api_key - if payload.language is not UNSET: - update_data['language'] = payload.language - if payload.parameters is not UNSET: - update_data['parameters'] = ( - json.dumps(payload.parameters) if payload.parameters else None - ) if not update_data: return JSONResponse( diff --git a/wavefront/server/modules/voice_agents_module/voice_agents_module/controllers/voice_agent_controller.py b/wavefront/server/modules/voice_agents_module/voice_agents_module/controllers/voice_agent_controller.py index 98305652..7ec584b3 100644 --- a/wavefront/server/modules/voice_agents_module/voice_agents_module/controllers/voice_agent_controller.py +++ b/wavefront/server/modules/voice_agents_module/voice_agents_module/controllers/voice_agent_controller.py @@ -55,6 +55,9 @@ async def create_voice_agent( telephony_config_id=payload.telephony_config_id, system_prompt=payload.system_prompt, welcome_message=payload.welcome_message, + tts_voice_id=payload.tts_voice_id, + tts_parameters=payload.tts_parameters, + stt_parameters=payload.stt_parameters, conversation_config=payload.conversation_config, status=payload.status.value, inbound_numbers=payload.inbound_numbers, @@ -178,6 +181,12 @@ async def update_voice_agent( update_data['system_prompt'] = payload.system_prompt if payload.welcome_message is not UNSET: update_data['welcome_message'] = payload.welcome_message + if payload.tts_voice_id is not UNSET: + update_data['tts_voice_id'] = payload.tts_voice_id + if payload.tts_parameters is not UNSET: + update_data['tts_parameters'] = payload.tts_parameters + if payload.stt_parameters is not UNSET: + update_data['stt_parameters'] = payload.stt_parameters if payload.conversation_config is not UNSET: update_data['conversation_config'] = ( json.dumps(payload.conversation_config) diff --git a/wavefront/server/modules/voice_agents_module/voice_agents_module/models/stt_schemas.py b/wavefront/server/modules/voice_agents_module/voice_agents_module/models/stt_schemas.py index c74a3da4..219011e2 100644 --- a/wavefront/server/modules/voice_agents_module/voice_agents_module/models/stt_schemas.py +++ b/wavefront/server/modules/voice_agents_module/voice_agents_module/models/stt_schemas.py @@ -1,5 +1,5 @@ from pydantic import BaseModel, Field -from typing import Optional, Union, Any, Dict +from typing import Optional, Union, Any from enum import Enum from datetime import datetime import uuid @@ -30,22 +30,12 @@ class CreateSttConfigPayload(BaseModel): ) provider: SttProvider = Field(..., description='STT provider') api_key: str = Field(..., description='API key for the STT provider') - language: Optional[str] = Field( - None, - description='ISO 639-1 language code (optional, most providers auto-detect)', - ) - parameters: Optional[Dict[str, Any]] = Field( - None, description='Provider-specific parameters as JSON object (optional)' - ) class UpdateSttConfigPayload(BaseModel): display_name: Union[str, Any] = Field(default=UNSET) description: Union[str, None, Any] = Field(default=UNSET) - provider: Union[SttProvider, Any] = Field(default=UNSET) api_key: Union[str, Any] = Field(default=UNSET) - language: Union[str, None, Any] = Field(default=UNSET) - parameters: Union[Dict[str, Any], None, Any] = Field(default=UNSET) class SttConfigResponse(BaseModel): @@ -53,8 +43,6 @@ class SttConfigResponse(BaseModel): display_name: str description: Optional[str] provider: str - language: Optional[str] - parameters: Optional[Dict[str, Any]] is_deleted: bool created_at: datetime updated_at: datetime diff --git a/wavefront/server/modules/voice_agents_module/voice_agents_module/models/tts_schemas.py b/wavefront/server/modules/voice_agents_module/voice_agents_module/models/tts_schemas.py index 090c2ccf..f0e702da 100644 --- a/wavefront/server/modules/voice_agents_module/voice_agents_module/models/tts_schemas.py +++ b/wavefront/server/modules/voice_agents_module/voice_agents_module/models/tts_schemas.py @@ -1,5 +1,5 @@ from pydantic import BaseModel, Field -from typing import Optional, Union, Any, Dict +from typing import Optional, Union, Any from enum import Enum from datetime import datetime import uuid @@ -30,24 +30,13 @@ class CreateTtsConfigPayload(BaseModel): description='Optional description of the TTS configuration', ) provider: TtsProvider = Field(..., description='TTS provider') - voice_id: str = Field(..., description='Provider-specific voice identifier') api_key: str = Field(..., description='API key for the TTS provider') - language: Optional[str] = Field( - None, description='ISO 639-1 language code (optional, for multi-lingual voices)' - ) - parameters: Optional[Dict[str, Any]] = Field( - None, description='Provider-specific parameters as JSON object (optional)' - ) class UpdateTtsConfigPayload(BaseModel): display_name: Union[str, Any] = Field(default=UNSET) description: Union[str, None, Any] = Field(default=UNSET) - provider: Union[TtsProvider, Any] = Field(default=UNSET) - voice_id: Union[str, Any] = Field(default=UNSET) api_key: Union[str, Any] = Field(default=UNSET) - language: Union[str, None, Any] = Field(default=UNSET) - parameters: Union[Dict[str, Any], None, Any] = Field(default=UNSET) class TtsConfigResponse(BaseModel): @@ -55,9 +44,6 @@ class TtsConfigResponse(BaseModel): display_name: str description: Optional[str] provider: str - voice_id: str - language: Optional[str] - parameters: Optional[Dict[str, Any]] is_deleted: bool created_at: datetime updated_at: datetime diff --git a/wavefront/server/modules/voice_agents_module/voice_agents_module/models/voice_agent_schemas.py b/wavefront/server/modules/voice_agents_module/voice_agents_module/models/voice_agent_schemas.py index 27ae09d6..000bc720 100644 --- a/wavefront/server/modules/voice_agents_module/voice_agents_module/models/voice_agent_schemas.py +++ b/wavefront/server/modules/voice_agents_module/voice_agents_module/models/voice_agent_schemas.py @@ -30,6 +30,13 @@ class CreateVoiceAgentPayload(BaseModel): ..., description='Welcome message to play at call start (will be converted to audio)', ) + tts_voice_id: str = Field(..., description='TTS voice identifier') + tts_parameters: Optional[Dict[str, Any]] = Field( + None, description='Provider-specific TTS parameters (model, stability, etc.)' + ) + stt_parameters: Optional[Dict[str, Any]] = Field( + None, description='Provider-specific STT parameters (model, endpointing, etc.)' + ) status: VoiceAgentStatus = Field( default=VoiceAgentStatus.INACTIVE, description='Agent status (active or inactive)', @@ -62,6 +69,9 @@ class UpdateVoiceAgentPayload(BaseModel): system_prompt: Union[str, Any] = Field(default=UNSET) conversation_config: Union[Dict[str, Any], None, Any] = Field(default=UNSET) welcome_message: Union[str, Any] = Field(default=UNSET) + tts_voice_id: Union[str, Any] = Field(default=UNSET) + tts_parameters: Union[Dict[str, Any], None, Any] = Field(default=UNSET) + stt_parameters: Union[Dict[str, Any], None, Any] = Field(default=UNSET) status: Union[VoiceAgentStatus, Any] = Field(default=UNSET) inbound_numbers: Union[List[str], Any] = Field(default=UNSET) outbound_numbers: Union[List[str], Any] = Field(default=UNSET) @@ -80,6 +90,9 @@ class VoiceAgentResponse(BaseModel): system_prompt: str conversation_config: Optional[Dict[str, Any]] welcome_message: str + tts_voice_id: str + tts_parameters: Optional[Dict[str, Any]] + stt_parameters: Optional[Dict[str, Any]] status: str inbound_numbers: List[str] outbound_numbers: List[str] diff --git a/wavefront/server/modules/voice_agents_module/voice_agents_module/services/stt_config_service.py b/wavefront/server/modules/voice_agents_module/voice_agents_module/services/stt_config_service.py index c9c452b0..b55c441a 100644 --- a/wavefront/server/modules/voice_agents_module/voice_agents_module/services/stt_config_service.py +++ b/wavefront/server/modules/voice_agents_module/voice_agents_module/services/stt_config_service.py @@ -40,8 +40,6 @@ async def create_config( description: Optional[str] = None, provider: str = None, api_key: str = None, - language: Optional[str] = None, - parameters: Optional[dict] = None, ) -> dict: """ Create a new STT configuration @@ -51,8 +49,6 @@ async def create_config( description: Optional description provider: STT provider api_key: API key for the STT provider - language: ISO 639-1 language code (optional) - parameters: Provider-specific parameters (optional) Returns: Created STT config as dict @@ -66,8 +62,6 @@ async def create_config( description=description, provider=provider, api_key=api_key, - language=language, - parameters=json.dumps(parameters) if parameters else None, ) # Convert to dict diff --git a/wavefront/server/modules/voice_agents_module/voice_agents_module/services/tts_config_service.py b/wavefront/server/modules/voice_agents_module/voice_agents_module/services/tts_config_service.py index 2678bf82..fa655cdb 100644 --- a/wavefront/server/modules/voice_agents_module/voice_agents_module/services/tts_config_service.py +++ b/wavefront/server/modules/voice_agents_module/voice_agents_module/services/tts_config_service.py @@ -39,10 +39,7 @@ async def create_config( display_name: str, description: Optional[str] = None, provider: str = None, - voice_id: str = None, api_key: str = None, - language: Optional[str] = None, - parameters: Optional[dict] = None, ) -> dict: """ Create a new TTS configuration @@ -51,26 +48,20 @@ async def create_config( display_name: Display name for the configuration description: Optional description provider: TTS provider - voice_id: Provider-specific voice identifier api_key: API key for the TTS provider - language: ISO 639-1 language code (optional) - parameters: Provider-specific parameters (optional) Returns: Created TTS config as dict """ logger.info( - f'Creating TTS config - display_name: {display_name}, provider: {provider}, voice_id: {voice_id}' + f'Creating TTS config - display_name: {display_name}, provider: {provider}' ) config = await self.tts_config_repository.create( display_name=display_name, description=description, provider=provider, - voice_id=voice_id, api_key=api_key, - language=language, - parameters=json.dumps(parameters) if parameters else None, ) # Convert to dict diff --git a/wavefront/server/modules/voice_agents_module/voice_agents_module/services/voice_agent_service.py b/wavefront/server/modules/voice_agents_module/voice_agents_module/services/voice_agent_service.py index 26ab4a80..79c826a3 100644 --- a/wavefront/server/modules/voice_agents_module/voice_agents_module/services/voice_agent_service.py +++ b/wavefront/server/modules/voice_agents_module/voice_agents_module/services/voice_agent_service.py @@ -120,6 +120,37 @@ async def _validate_foreign_keys( return True, None + def _validate_tts_stt_parameters( + self, + tts_voice_id: str, + tts_parameters: Optional[dict] = None, + stt_parameters: Optional[dict] = None, + ) -> Tuple[bool, Optional[str]]: + """ + Validate TTS/STT parameters. + + Args: + tts_voice_id: TTS voice identifier + tts_parameters: Provider-specific TTS parameters + stt_parameters: Provider-specific STT parameters + + Returns: + Tuple of (is_valid, error_message). error_message is None if valid. + """ + # Validate TTS voice_id is not empty + if not tts_voice_id or not tts_voice_id.strip(): + return False, 'TTS voice_id is required and cannot be empty' + + # Validate TTS parameters is a dict if provided + if tts_parameters is not None and not isinstance(tts_parameters, dict): + return False, 'TTS parameters must be a dictionary' + + # Validate STT parameters is a dict if provided + if stt_parameters is not None and not isinstance(stt_parameters, dict): + return False, 'STT parameters must be a dictionary' + + return True, None + async def _validate_language_and_phone_config( self, inbound_numbers: List[str], @@ -197,6 +228,8 @@ async def _generate_and_upload_welcome_audio( self, welcome_message: str, tts_config_id: UUID, + tts_voice_id: str, + tts_parameters: Optional[dict], agent_id: UUID, supported_languages: List[str], default_language: str, @@ -208,6 +241,8 @@ async def _generate_and_upload_welcome_audio( Args: welcome_message: Text of the welcome message tts_config_id: TTS config ID to use for generation + tts_voice_id: Voice ID for TTS + tts_parameters: Provider-specific TTS parameters agent_id: Voice agent ID (used for generating storage key) supported_languages: List of supported language codes default_language: Default language code for audio generation @@ -217,7 +252,7 @@ async def _generate_and_upload_welcome_audio( """ logger.info(f'Generating welcome message audio for agent {agent_id}') - # Fetch TTS config + # Fetch TTS config (credentials only) tts_config = await self.tts_config_service.get_config(tts_config_id) if not tts_config: raise ValueError(f'TTS config {tts_config_id} not found') @@ -235,16 +270,21 @@ async def _generate_and_upload_welcome_audio( logger.info(f'Welcome audio text: {audio_text}') - # Update TTS config to use default language - tts_config_with_lang = {**tts_config} - if 'parameters' not in tts_config_with_lang: - tts_config_with_lang['parameters'] = {} - tts_config_with_lang['parameters']['language'] = default_language + # Merge config credentials with agent's voice and parameters + tts_config_with_params = { + 'provider': tts_config['provider'], + 'api_key': tts_config['api_key'], + 'voice_id': tts_voice_id, + 'parameters': tts_parameters or {}, + } + + # Add language to parameters for TTS generation + tts_config_with_params['parameters']['language'] = default_language # Generate audio using TTS service try: audio_bytes = await self.tts_generator_service.generate_audio( - audio_text, tts_config_with_lang + audio_text, tts_config_with_params ) logger.info(f'Generated audio: {len(audio_bytes)} bytes') except Exception as e: @@ -278,6 +318,7 @@ async def create_agent( telephony_config_id: UUID, system_prompt: str, welcome_message: str, + tts_voice_id: str, description: Optional[str] = None, conversation_config: Optional[dict] = None, status: str = 'inactive', @@ -285,6 +326,8 @@ async def create_agent( outbound_numbers: Optional[List[str]] = None, supported_languages: Optional[List[str]] = None, default_language: str = 'en', + tts_parameters: Optional[dict] = None, + stt_parameters: Optional[dict] = None, ) -> dict: """ Create a new voice agent with inbound/outbound numbers and language support @@ -297,6 +340,7 @@ async def create_agent( telephony_config_id: Telephony config ID system_prompt: System prompt for the agent welcome_message: Welcome message text (will be converted to audio) + tts_voice_id: TTS voice identifier description: Description of the agent (optional) conversation_config: Conversation configuration (optional) status: Agent status (default: inactive) @@ -304,6 +348,8 @@ async def create_agent( outbound_numbers: Phone numbers for making outbound calls (E.164 format) supported_languages: List of supported language codes (e.g., ["en", "es", "hi"]) default_language: Default language code (must be in supported_languages) + tts_parameters: Provider-specific TTS parameters (optional) + stt_parameters: Provider-specific STT parameters (optional) Returns: Created voice agent as dict @@ -327,6 +373,14 @@ async def create_agent( logger.error(f'FK validation failed: {error_message}') raise ValueError(error_message) + # Validate TTS/STT parameters + is_valid, error_message = self._validate_tts_stt_parameters( + tts_voice_id, tts_parameters, stt_parameters + ) + if not is_valid: + logger.error(f'TTS/STT validation failed: {error_message}') + raise ValueError(error_message) + # Validate language and phone configuration is_valid, error_message = await self._validate_language_and_phone_config( inbound_numbers, @@ -348,6 +402,8 @@ async def create_agent( await self._generate_and_upload_welcome_audio( welcome_message, tts_config_id, + tts_voice_id, + tts_parameters, agent_id, supported_languages, default_language, @@ -368,6 +424,9 @@ async def create_agent( else None, welcome_message=welcome_message, status=status, + tts_voice_id=tts_voice_id, + tts_parameters=tts_parameters, + stt_parameters=stt_parameters, inbound_numbers=inbound_numbers, outbound_numbers=outbound_numbers, supported_languages=supported_languages, @@ -527,6 +586,24 @@ async def update_agent(self, agent_id: UUID, **update_data) -> Optional[dict]: logger.error(f'Language/phone validation failed: {error_message}') raise ValueError(error_message) + # Validate TTS/STT parameters if being updated + tts_stt_fields = ['tts_voice_id', 'tts_parameters', 'stt_parameters'] + if any(key in update_data for key in tts_stt_fields): + tts_voice_id = update_data.get('tts_voice_id', existing_agent.tts_voice_id) + tts_parameters = update_data.get( + 'tts_parameters', existing_dict.get('tts_parameters') + ) + stt_parameters = update_data.get( + 'stt_parameters', existing_dict.get('stt_parameters') + ) + + is_valid, error_message = self._validate_tts_stt_parameters( + tts_voice_id, tts_parameters, stt_parameters + ) + if not is_valid: + logger.error(f'TTS/STT validation failed: {error_message}') + raise ValueError(error_message) + # Check if welcome_message or language config changed (requires audio regeneration) audio_regeneration_needed = False if ( @@ -542,6 +619,19 @@ async def update_agent(self, agent_id: UUID, **update_data) -> Optional[dict]: 'default_language' ] != existing_dict.get('default_language'): audio_regeneration_needed = True + if ( + 'tts_voice_id' in update_data + and update_data['tts_voice_id'] != existing_agent.tts_voice_id + ): + audio_regeneration_needed = True + if 'tts_parameters' in update_data and update_data[ + 'tts_parameters' + ] != existing_dict.get('tts_parameters'): + audio_regeneration_needed = True + if 'tts_config_id' in update_data and update_data[ + 'tts_config_id' + ] != existing_dict.get('tts_config_id'): + audio_regeneration_needed = True # If any FK fields are being updated, validate them if any( @@ -587,6 +677,12 @@ async def update_agent(self, agent_id: UUID, **update_data) -> Optional[dict]: tts_config_id = update_data.get( 'tts_config_id', existing_agent.tts_config_id ) + tts_voice_id = update_data.get( + 'tts_voice_id', existing_agent.tts_voice_id + ) + tts_parameters = update_data.get( + 'tts_parameters', existing_dict.get('tts_parameters') + ) supported_languages = update_data.get( 'supported_languages', existing_dict.get('supported_languages', ['en']), @@ -598,6 +694,8 @@ async def update_agent(self, agent_id: UUID, **update_data) -> Optional[dict]: await self._generate_and_upload_welcome_audio( welcome_message, tts_config_id, + tts_voice_id, + tts_parameters, agent_id, supported_languages, default_language, From a815b5ea619b207d6c094b00485cad435e31b42c Mon Sep 17 00:00:00 2001 From: rootflo-hardik Date: Mon, 12 Jan 2026 18:11:13 +0530 Subject: [PATCH 5/8] call_processing changes for tts, stt schema change --- .../services/pipecat_service.py | 38 ++++++++++++++----- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/wavefront/server/apps/call_processing/call_processing/services/pipecat_service.py b/wavefront/server/apps/call_processing/call_processing/services/pipecat_service.py index 0f3f1bd0..ad97e0af 100644 --- a/wavefront/server/apps/call_processing/call_processing/services/pipecat_service.py +++ b/wavefront/server/apps/call_processing/call_processing/services/pipecat_service.py @@ -149,16 +149,21 @@ async def run_conversation( Args: transport: Pipecat transport (e.g., WebSocket transport from Twilio) agent_config: Voice agent configuration including system_prompt, - supported_languages, and default_language + supported_languages, default_language, tts_voice_id, tts_parameters, stt_parameters llm_config: LLM provider configuration - tts_config: TTS provider configuration - stt_config: STT provider configuration + tts_config: TTS provider configuration (credentials only) + stt_config: STT provider configuration (credentials only) """ # Extract language configuration from agent_config supported_languages = agent_config.get('supported_languages', ['en']) default_language = agent_config.get('default_language', 'en') is_multi_language = len(supported_languages) > 1 + # Extract TTS/STT parameters from agent + tts_voice_id = agent_config.get('tts_voice_id') + tts_parameters = agent_config.get('tts_parameters', {}) + stt_parameters = agent_config.get('stt_parameters', {}) + logger.info(f"Starting conversation for agent: {agent_config['name']}") logger.info( f'Language config - supported: {supported_languages}, ' @@ -168,6 +173,21 @@ async def run_conversation( # Create LLM service (language-agnostic) llm = LLMServiceFactory.create_llm_service(llm_config) + # Merge TTS config credentials with agent's voice and parameters + tts_config_with_params = { + 'provider': tts_config['provider'], + 'api_key': tts_config['api_key'], + 'voice_id': tts_voice_id, + 'parameters': tts_parameters or {}, + } + + # Merge STT config credentials with agent's parameters + stt_config_with_params = { + 'provider': stt_config['provider'], + 'api_key': stt_config['api_key'], + 'parameters': stt_parameters or {}, + } + # Create STT/TTS services with multi-language support if needed stt_services = {} tts_services = {} @@ -180,8 +200,8 @@ async def run_conversation( # Create STT/TTS services for each supported language for lang_code in supported_languages: # Deep clone configs to avoid mutating original configs - stt_config_lang = deepcopy(stt_config) - tts_config_lang = deepcopy(tts_config) + stt_config_lang = deepcopy(stt_config_with_params) + tts_config_lang = deepcopy(tts_config_with_params) # Update language in parameters if 'parameters' not in stt_config_lang: @@ -229,9 +249,9 @@ async def run_conversation( else: logger.info('Single language mode - no language detection needed') - # Create single STT/TTS services - stt = STTServiceFactory.create_stt_service(stt_config) - tts = TTSServiceFactory.create_tts_service(tts_config) + # Create single STT/TTS services using merged configs + stt = STTServiceFactory.create_stt_service(stt_config_with_params) + tts = TTSServiceFactory.create_tts_service(tts_config_with_params) # Create initial messages with system prompt messages = [ @@ -289,7 +309,7 @@ async def run_conversation( enable_metrics=True, # enable_usage_metrics=True, allow_interruptions=True, - interruption_strategies=[MinWordsInterruptionStrategy(min_words=2)], + interruption_strategies=[MinWordsInterruptionStrategy(min_words=3)], # report_only_initial_ttfb=True ), idle_timeout_secs=20, # Safety net - allows UserIdleProcessor to complete 3 retries (4s each = 12s total) From 27e281bfa5a80c72f3b0ec3672a8c2abfc22716b Mon Sep 17 00:00:00 2001 From: rootflo-hardik Date: Mon, 12 Jan 2026 18:28:47 +0530 Subject: [PATCH 6/8] fe - lang, tts, stt params in voice_agent --- .../voice-agents/CreateVoiceAgentDialog.tsx | 327 ++++++++++++++ .../voice-agents/EditVoiceAgentDialog.tsx | 407 +++++++++++++++++- .../stt-configs/CreateSttConfigDialog.tsx | 236 +--------- .../stt-configs/EditSttConfigDialog.tsx | 234 +--------- .../voice-agents/stt-configs/index.tsx | 5 +- .../tts-configs/CreateTtsConfigDialog.tsx | 314 +------------- .../tts-configs/EditTtsConfigDialog.tsx | 255 +---------- .../voice-agents/tts-configs/index.tsx | 8 +- wavefront/client/src/types/stt-config.ts | 7 - wavefront/client/src/types/tts-config.ts | 10 - wavefront/client/src/types/voice-agent.ts | 9 + 11 files changed, 776 insertions(+), 1036 deletions(-) diff --git a/wavefront/client/src/pages/apps/[appId]/voice-agents/CreateVoiceAgentDialog.tsx b/wavefront/client/src/pages/apps/[appId]/voice-agents/CreateVoiceAgentDialog.tsx index c3d7f7fc..7929eb74 100644 --- a/wavefront/client/src/pages/apps/[appId]/voice-agents/CreateVoiceAgentDialog.tsx +++ b/wavefront/client/src/pages/apps/[appId]/voice-agents/CreateVoiceAgentDialog.tsx @@ -20,6 +20,8 @@ import { import { Input } from '@app/components/ui/input'; import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from '@app/components/ui/select'; import { Checkbox } from '@app/components/ui/checkbox'; +import { Label } from '@app/components/ui/label'; +import { Slider } from '@app/components/ui/slider'; import { useGetLLMConfigs, useGetSttConfigs, @@ -30,6 +32,13 @@ import { extractErrorMessage } from '@app/lib/utils'; import { useDashboardStore, useNotifyStore } from '@app/store'; import { CreateVoiceAgentRequest } from '@app/types/voice-agent'; import { SUPPORTED_LANGUAGES, getLanguageDisplayName } from '@app/constants/languages'; +import { getProviderConfig, initializeParameters } from '@app/config/voice-providers'; +import { + getBooleanParameterWithDefault, + getNumberOrStringParameter, + getNumberParameterWithDefault, + getStringParameter, +} from '@app/utils/parameter-helpers'; import { zodResolver } from '@hookform/resolvers/zod'; import { langs } from '@uiw/codemirror-extensions-langs'; import CodeMirror from '@uiw/react-codemirror'; @@ -47,6 +56,7 @@ const createVoiceAgentSchema = z.object({ tts_config_id: z.string().min(1, 'TTS configuration is required'), stt_config_id: z.string().min(1, 'STT configuration is required'), telephony_config_id: z.string().min(1, 'Telephony configuration is required'), + tts_voice_id: z.string().min(1, 'TTS Voice ID is required'), system_prompt: z.string().min(1, 'System prompt is required'), welcome_message: z.string().min(1, 'Welcome message is required'), conversation_config: z.string().optional(), @@ -71,6 +81,8 @@ const CreateVoiceAgentDialog: React.FC = ({ isOpen, const { notifySuccess, notifyError } = useNotifyStore(); const { selectedApp } = useDashboardStore(); const [creating, setCreating] = useState(false); + const [ttsParameters, setTtsParameters] = useState>({}); + const [sttParameters, setSttParameters] = useState>({}); // Fetch configs for dropdowns const { data: llmConfigs = [] } = useGetLLMConfigs(appId); @@ -87,6 +99,7 @@ const CreateVoiceAgentDialog: React.FC = ({ isOpen, tts_config_id: '', stt_config_id: '', telephony_config_id: '', + tts_voice_id: '', system_prompt: '', welcome_message: '', conversation_config: '{}', @@ -98,6 +111,27 @@ const CreateVoiceAgentDialog: React.FC = ({ isOpen, }, }); + // Watch config selections to determine providers + const watchedTtsConfigId = form.watch('tts_config_id'); + const watchedSttConfigId = form.watch('stt_config_id'); + + // Get selected providers + const selectedTtsProvider = ttsConfigs.find((c) => c.id === watchedTtsConfigId)?.provider; + const selectedSttProvider = sttConfigs.find((c) => c.id === watchedSttConfigId)?.provider; + + // Initialize parameters when provider changes + useEffect(() => { + if (isOpen && selectedTtsProvider) { + setTtsParameters(initializeParameters('tts', selectedTtsProvider)); + } + }, [selectedTtsProvider, isOpen]); + + useEffect(() => { + if (isOpen && selectedSttProvider) { + setSttParameters(initializeParameters('stt', selectedSttProvider)); + } + }, [selectedSttProvider, isOpen]); + // Reset form when dialog closes useEffect(() => { if (!isOpen) { @@ -108,6 +142,7 @@ const CreateVoiceAgentDialog: React.FC = ({ isOpen, tts_config_id: '', stt_config_id: '', telephony_config_id: '', + tts_voice_id: '', system_prompt: '', welcome_message: '', conversation_config: '{}', @@ -117,6 +152,8 @@ const CreateVoiceAgentDialog: React.FC = ({ isOpen, supported_languages: ['en'], default_language: 'en', }); + setTtsParameters({}); + setSttParameters({}); } }, [isOpen, form]); @@ -132,6 +169,22 @@ const CreateVoiceAgentDialog: React.FC = ({ isOpen, } } + // Build TTS parameters (filter out empty values) + const builtTtsParameters: Record = {}; + Object.entries(ttsParameters).forEach(([key, value]) => { + if (value !== '' && value !== undefined && value !== null) { + builtTtsParameters[key] = value; + } + }); + + // Build STT parameters (filter out empty values) + const builtSttParameters: Record = {}; + Object.entries(sttParameters).forEach(([key, value]) => { + if (value !== '' && value !== undefined && value !== null) { + builtSttParameters[key] = value; + } + }); + // Parse phone numbers (comma-separated) const parsePhoneNumbers = (input: string): string[] => { if (!input.trim()) return []; @@ -173,6 +226,9 @@ const CreateVoiceAgentDialog: React.FC = ({ isOpen, tts_config_id: data.tts_config_id.trim(), stt_config_id: data.stt_config_id.trim(), telephony_config_id: data.telephony_config_id.trim(), + tts_voice_id: data.tts_voice_id.trim(), + tts_parameters: Object.keys(builtTtsParameters).length > 0 ? builtTtsParameters : null, + stt_parameters: Object.keys(builtSttParameters).length > 0 ? builtSttParameters : null, system_prompt: data.system_prompt.trim(), welcome_message: data.welcome_message.trim(), conversation_config: conversationConfig, @@ -208,6 +264,231 @@ const CreateVoiceAgentDialog: React.FC = ({ isOpen, } }; + // Helper functions for parameter management + const setTtsParameter = (key: string, value: unknown) => { + setTtsParameters((prev) => ({ ...prev, [key]: value })); + }; + + const setSttParameter = (key: string, value: unknown) => { + setSttParameters((prev) => ({ ...prev, [key]: value })); + }; + + // Render TTS parameter field + const renderTtsParameterField = (key: string) => { + if (!selectedTtsProvider) return null; + const config = getProviderConfig('tts', selectedTtsProvider); + if (!config) return null; + + const paramConfig = config.parameters[key]; + if (!paramConfig) return null; + + switch (paramConfig.type) { + case 'boolean': + return ( +
+
+ setTtsParameter(key, checked)} + /> + +
+
+ ); + + case 'number': + if (paramConfig.min !== undefined && paramConfig.max !== undefined) { + const sliderValue = getNumberParameterWithDefault(ttsParameters, key, paramConfig.default, paramConfig.min); + return ( +
+ + setTtsParameter(key, values[0])} + /> +

+ {paramConfig.min} - {paramConfig.max} +

+
+ ); + } + + return ( +
+ + setTtsParameter(key, e.target.value ? parseFloat(e.target.value) : undefined)} + placeholder={paramConfig.placeholder} + step={paramConfig.step} + /> + {paramConfig.placeholder && ( +

e.g., {paramConfig.placeholder}

+ )} +
+ ); + + case 'string': + default: + if (key === 'language') return null; + + if (paramConfig.options && paramConfig.options.length > 0) { + const selectValue = + getStringParameter(ttsParameters, key) || (paramConfig.default ? String(paramConfig.default) : '') || ''; + return ( +
+ + +
+ ); + } + + return ( +
+ + setTtsParameter(key, e.target.value)} + placeholder={paramConfig.placeholder} + /> + {paramConfig.placeholder && ( +

Default: {paramConfig.placeholder}

+ )} +
+ ); + } + }; + + // Render STT parameter field + const renderSttParameterField = (key: string) => { + if (!selectedSttProvider) return null; + const config = getProviderConfig('stt', selectedSttProvider); + if (!config) return null; + + const paramConfig = config.parameters[key]; + if (!paramConfig) return null; + + switch (paramConfig.type) { + case 'boolean': + return ( +
+
+ setSttParameter(key, checked)} + /> + +
+
+ ); + + case 'number': + if (paramConfig.min !== undefined && paramConfig.max !== undefined) { + const sliderValue = getNumberParameterWithDefault(sttParameters, key, paramConfig.default, paramConfig.min); + return ( +
+ + setSttParameter(key, values[0])} + /> +

+ {paramConfig.min} - {paramConfig.max} +

+
+ ); + } + + return ( +
+ + setSttParameter(key, e.target.value ? parseInt(e.target.value) : undefined)} + placeholder={paramConfig.placeholder} + /> + {paramConfig.placeholder && ( +

e.g., {paramConfig.placeholder}

+ )} +
+ ); + + case 'string': + default: + if (key === 'language') return null; + + if (paramConfig.options && paramConfig.options.length > 0) { + const selectValue = + getStringParameter(sttParameters, key) || (paramConfig.default ? String(paramConfig.default) : '') || ''; + return ( +
+ + +
+ ); + } + + return ( +
+ + setSttParameter(key, e.target.value)} + placeholder={paramConfig.placeholder} + /> + {paramConfig.placeholder && ( +

Default: {paramConfig.placeholder}

+ )} +
+ ); + } + }; + + const ttsProviderConfig = selectedTtsProvider ? getProviderConfig('tts', selectedTtsProvider) : null; + const sttProviderConfig = selectedSttProvider ? getProviderConfig('stt', selectedSttProvider) : null; + return ( @@ -418,6 +699,52 @@ const CreateVoiceAgentDialog: React.FC = ({ isOpen, )} />
+ +
+

TTS Voice Settings

+ ( + + + TTS Voice ID* + + + + + + Provider-specific voice identifier (e.g., for Deepgram: aura-2-helena-en) + + + + )} + /> + + {ttsProviderConfig && + Object.keys(ttsProviderConfig.parameters).filter((key) => key !== 'language').length > 0 && ( +
+

TTS Parameters

+
+ {Object.keys(ttsProviderConfig.parameters) + .filter((key) => key !== 'language') + .map((key) => renderTtsParameterField(key))} +
+
+ )} + + {sttProviderConfig && + Object.keys(sttProviderConfig.parameters).filter((key) => key !== 'language').length > 0 && ( +
+

STT Parameters

+
+ {Object.keys(sttProviderConfig.parameters) + .filter((key) => key !== 'language') + .map((key) => renderSttParameterField(key))} +
+
+ )} +
{/* Phone Numbers */} diff --git a/wavefront/client/src/pages/apps/[appId]/voice-agents/EditVoiceAgentDialog.tsx b/wavefront/client/src/pages/apps/[appId]/voice-agents/EditVoiceAgentDialog.tsx index 56c1846d..21f358f4 100644 --- a/wavefront/client/src/pages/apps/[appId]/voice-agents/EditVoiceAgentDialog.tsx +++ b/wavefront/client/src/pages/apps/[appId]/voice-agents/EditVoiceAgentDialog.tsx @@ -18,6 +18,8 @@ import { FormMessage, } from '@app/components/ui/form'; import { Input } from '@app/components/ui/input'; +import { Label } from '@app/components/ui/label'; +import { Slider } from '@app/components/ui/slider'; import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from '@app/components/ui/select'; import { Checkbox } from '@app/components/ui/checkbox'; import { @@ -30,6 +32,8 @@ import { extractErrorMessage } from '@app/lib/utils'; import { useDashboardStore, useNotifyStore } from '@app/store'; import { UpdateVoiceAgentRequest, VoiceAgent } from '@app/types/voice-agent'; import { SUPPORTED_LANGUAGES, getLanguageDisplayName } from '@app/constants/languages'; +import { getProviderConfig } from '@app/config/voice-providers'; +import { getBooleanParameterWithDefault, getNumberParameterWithDefault } from '@app/utils/parameter-helpers'; import { zodResolver } from '@hookform/resolvers/zod'; import { langs } from '@uiw/codemirror-extensions-langs'; import CodeMirror from '@uiw/react-codemirror'; @@ -46,6 +50,7 @@ const updateVoiceAgentSchema = z.object({ tts_config_id: z.string().min(1, 'TTS configuration is required'), stt_config_id: z.string().min(1, 'STT configuration is required'), telephony_config_id: z.string().min(1, 'Telephony configuration is required'), + tts_voice_id: z.string().min(1, 'TTS Voice ID is required'), system_prompt: z.string().min(1, 'System prompt is required'), welcome_message: z.string().min(1, 'Welcome message is required'), conversation_config: z.string().optional(), @@ -83,6 +88,10 @@ const EditVoiceAgentDialog: React.FC = ({ const { data: sttConfigs = [] } = useGetSttConfigs(appId); const { data: telephonyConfigs = [] } = useGetTelephonyConfigs(appId); + // State for TTS/STT parameters (managed separately from form) + const [ttsParameters, setTtsParameters] = useState>({}); + const [sttParameters, setSttParameters] = useState>({}); + const form = useForm({ resolver: zodResolver(updateVoiceAgentSchema), defaultValues: { @@ -92,6 +101,7 @@ const EditVoiceAgentDialog: React.FC = ({ tts_config_id: agent.tts_config_id, stt_config_id: agent.stt_config_id, telephony_config_id: agent.telephony_config_id, + tts_voice_id: agent.tts_voice_id, system_prompt: agent.system_prompt, welcome_message: agent.welcome_message, conversation_config: agent.conversation_config ? JSON.stringify(agent.conversation_config, null, 2) : '{}', @@ -103,6 +113,16 @@ const EditVoiceAgentDialog: React.FC = ({ }, }); + // Watch for config changes to determine providers + const watchedTtsConfigId = form.watch('tts_config_id'); + const watchedSttConfigId = form.watch('stt_config_id'); + + const selectedTtsProvider = ttsConfigs.find((c) => c.id === watchedTtsConfigId)?.provider; + const selectedSttProvider = sttConfigs.find((c) => c.id === watchedSttConfigId)?.provider; + + const ttsProviderConfig = selectedTtsProvider ? getProviderConfig('tts', selectedTtsProvider) : null; + const sttProviderConfig = selectedSttProvider ? getProviderConfig('stt', selectedSttProvider) : null; + // Reset form when dialog opens with agent data useEffect(() => { if (isOpen && agent) { @@ -113,6 +133,7 @@ const EditVoiceAgentDialog: React.FC = ({ tts_config_id: agent.tts_config_id, stt_config_id: agent.stt_config_id, telephony_config_id: agent.telephony_config_id, + tts_voice_id: agent.tts_voice_id, system_prompt: agent.system_prompt, welcome_message: agent.welcome_message, conversation_config: agent.conversation_config ? JSON.stringify(agent.conversation_config, null, 2) : '{}', @@ -125,6 +146,29 @@ const EditVoiceAgentDialog: React.FC = ({ } }, [isOpen, agent, form]); + // Initialize TTS parameters when dialog opens - use only existing values, not merged with defaults + useEffect(() => { + if (isOpen) { + setTtsParameters(agent.tts_parameters || {}); + } + }, [isOpen, agent.tts_parameters]); + + // Initialize STT parameters when dialog opens - use only existing values, not merged with defaults + useEffect(() => { + if (isOpen) { + setSttParameters(agent.stt_parameters || {}); + } + }, [isOpen, agent.stt_parameters]); + + // Helper functions to update parameters + const setTtsParameter = (key: string, value: unknown) => { + setTtsParameters((prev) => ({ ...prev, [key]: value })); + }; + + const setSttParameter = (key: string, value: unknown) => { + setSttParameters((prev) => ({ ...prev, [key]: value })); + }; + const onSubmit = async (data: UpdateVoiceAgentInput) => { // Validate JSON if provided let conversationConfig = null; @@ -137,6 +181,22 @@ const EditVoiceAgentDialog: React.FC = ({ } } + // Build TTS parameters (filter out empty values) + const builtTtsParameters: Record = {}; + Object.entries(ttsParameters).forEach(([key, value]) => { + if (value !== '' && value !== undefined && value !== null) { + builtTtsParameters[key] = value; + } + }); + + // Build STT parameters (filter out empty values) + const builtSttParameters: Record = {}; + Object.entries(sttParameters).forEach(([key, value]) => { + if (value !== '' && value !== undefined && value !== null) { + builtSttParameters[key] = value; + } + }); + // Parse phone numbers (comma-separated) const parsePhoneNumbers = (input: string): string[] => { if (!input.trim()) return []; @@ -171,22 +231,91 @@ const EditVoiceAgentDialog: React.FC = ({ setUpdating(true); try { - const requestData: UpdateVoiceAgentRequest = { - name: data.name.trim(), - description: data.description?.trim() || null, - llm_config_id: data.llm_config_id.trim(), - tts_config_id: data.tts_config_id.trim(), - stt_config_id: data.stt_config_id.trim(), - telephony_config_id: data.telephony_config_id.trim(), - system_prompt: data.system_prompt.trim(), - welcome_message: data.welcome_message.trim(), - conversation_config: conversationConfig, - status: data.status, - inbound_numbers: inboundNumbers, - outbound_numbers: outboundNumbers, - supported_languages: data.supported_languages, - default_language: data.default_language, - }; + // Build partial update - only include changed fields + const requestData: Partial = {}; + + if (data.name.trim() !== agent.name) { + requestData.name = data.name.trim(); + } + + const newDescription = data.description?.trim() || null; + if (newDescription !== (agent.description || null)) { + requestData.description = newDescription; + } + + if (data.llm_config_id !== agent.llm_config_id) { + requestData.llm_config_id = data.llm_config_id; + } + + if (data.tts_config_id !== agent.tts_config_id) { + requestData.tts_config_id = data.tts_config_id; + } + + if (data.stt_config_id !== agent.stt_config_id) { + requestData.stt_config_id = data.stt_config_id; + } + + if (data.telephony_config_id !== agent.telephony_config_id) { + requestData.telephony_config_id = data.telephony_config_id; + } + + if (data.tts_voice_id.trim() !== agent.tts_voice_id) { + requestData.tts_voice_id = data.tts_voice_id.trim(); + } + + // Check if TTS parameters changed + const newTtsParams = Object.keys(builtTtsParameters).length > 0 ? builtTtsParameters : null; + if (JSON.stringify(newTtsParams) !== JSON.stringify(agent.tts_parameters || null)) { + requestData.tts_parameters = newTtsParams; + } + + // Check if STT parameters changed + const newSttParams = Object.keys(builtSttParameters).length > 0 ? builtSttParameters : null; + if (JSON.stringify(newSttParams) !== JSON.stringify(agent.stt_parameters || null)) { + requestData.stt_parameters = newSttParams; + } + + if (data.system_prompt.trim() !== agent.system_prompt) { + requestData.system_prompt = data.system_prompt.trim(); + } + + if (data.welcome_message.trim() !== agent.welcome_message) { + requestData.welcome_message = data.welcome_message.trim(); + } + + // Check if conversation config changed + if (JSON.stringify(conversationConfig) !== JSON.stringify(agent.conversation_config || null)) { + requestData.conversation_config = conversationConfig; + } + + if (data.status !== agent.status) { + requestData.status = data.status; + } + + // Check if phone numbers changed + if (JSON.stringify(inboundNumbers) !== JSON.stringify(agent.inbound_numbers || [])) { + requestData.inbound_numbers = inboundNumbers; + } + + if (JSON.stringify(outboundNumbers) !== JSON.stringify(agent.outbound_numbers || [])) { + requestData.outbound_numbers = outboundNumbers; + } + + // Check if languages changed + if (JSON.stringify(data.supported_languages) !== JSON.stringify(agent.supported_languages || ['en'])) { + requestData.supported_languages = data.supported_languages; + } + + if (data.default_language !== agent.default_language) { + requestData.default_language = data.default_language; + } + + // Only send request if there are changes + if (Object.keys(requestData).length === 0) { + notifySuccess('No changes to update'); + onOpenChange(false); + return; + } await floConsoleService.voiceAgentService.updateVoiceAgent(agent.id, requestData); @@ -206,6 +335,206 @@ const EditVoiceAgentDialog: React.FC = ({ } }; + // Render TTS parameter field based on type + const renderTtsParameterField = (key: string) => { + if (!selectedTtsProvider) return null; + const config = getProviderConfig('tts', selectedTtsProvider); + if (!config) return null; + + const paramConfig = config.parameters[key]; + if (!paramConfig) return null; + + switch (paramConfig.type) { + case 'boolean': + return ( +
+
+ setTtsParameter(key, checked)} + /> + +
+
+ ); + + case 'number': + if (paramConfig.min !== undefined && paramConfig.max !== undefined) { + const sliderValue = getNumberParameterWithDefault(ttsParameters, key, paramConfig.default, paramConfig.min); + return ( +
+ + setTtsParameter(key, values[0])} + /> +

+ {paramConfig.min} - {paramConfig.max} +

+
+ ); + } + // Number without range - use input + return ( +
+ + setTtsParameter(key, parseFloat(e.target.value) || 0)} + /> +
+ ); + + case 'string': + if (paramConfig.options && paramConfig.options.length > 0) { + // Dropdown for predefined options + const currentValue = String(ttsParameters[key] ?? paramConfig.default ?? ''); + return ( +
+ + +
+ ); + } + // Text input for free-form strings + return ( +
+ + setTtsParameter(key, e.target.value)} + /> +
+ ); + + default: + return null; + } + }; + + // Render STT parameter field based on type + const renderSttParameterField = (key: string) => { + if (!selectedSttProvider) return null; + const config = getProviderConfig('stt', selectedSttProvider); + if (!config) return null; + + const paramConfig = config.parameters[key]; + if (!paramConfig) return null; + + switch (paramConfig.type) { + case 'boolean': + return ( +
+
+ setSttParameter(key, checked)} + /> + +
+
+ ); + + case 'number': + if (paramConfig.min !== undefined && paramConfig.max !== undefined) { + const sliderValue = getNumberParameterWithDefault(sttParameters, key, paramConfig.default, paramConfig.min); + return ( +
+ + setSttParameter(key, values[0])} + /> +

+ {paramConfig.min} - {paramConfig.max} +

+
+ ); + } + // Number without range - use input + return ( +
+ + setSttParameter(key, parseFloat(e.target.value) || 0)} + /> +
+ ); + + case 'string': + if (paramConfig.options && paramConfig.options.length > 0) { + // Dropdown for predefined options + const currentValue = String(sttParameters[key] ?? paramConfig.default ?? ''); + return ( +
+ + +
+ ); + } + // Text input for free-form strings + return ( +
+ + setSttParameter(key, e.target.value)} + /> +
+ ); + + default: + return null; + } + }; + return ( @@ -416,6 +745,52 @@ const EditVoiceAgentDialog: React.FC = ({ )} /> + +
+

TTS Voice Settings

+ ( + + + TTS Voice ID* + + + + + + Provider-specific voice identifier (e.g., for Deepgram: aura-2-helena-en) + + + + )} + /> + + {ttsProviderConfig && + Object.keys(ttsProviderConfig.parameters).filter((key) => key !== 'language').length > 0 && ( +
+

TTS Parameters

+
+ {Object.keys(ttsProviderConfig.parameters) + .filter((key) => key !== 'language') + .map((key) => renderTtsParameterField(key))} +
+
+ )} + + {sttProviderConfig && + Object.keys(sttProviderConfig.parameters).filter((key) => key !== 'language').length > 0 && ( +
+

STT Parameters

+
+ {Object.keys(sttProviderConfig.parameters) + .filter((key) => key !== 'language') + .map((key) => renderSttParameterField(key))} +
+
+ )} +
{/* Phone Numbers */} diff --git a/wavefront/client/src/pages/apps/[appId]/voice-agents/stt-configs/CreateSttConfigDialog.tsx b/wavefront/client/src/pages/apps/[appId]/voice-agents/stt-configs/CreateSttConfigDialog.tsx index 10f67c80..5d233c91 100644 --- a/wavefront/client/src/pages/apps/[appId]/voice-agents/stt-configs/CreateSttConfigDialog.tsx +++ b/wavefront/client/src/pages/apps/[appId]/voice-agents/stt-configs/CreateSttConfigDialog.tsx @@ -1,7 +1,6 @@ import floConsoleService from '@app/api'; import { Alert, AlertDescription } from '@app/components/ui/alert'; import { Button } from '@app/components/ui/button'; -import { Checkbox } from '@app/components/ui/checkbox'; import { Dialog, DialogContent, @@ -20,19 +19,11 @@ import { FormMessage, } from '@app/components/ui/form'; import { Input } from '@app/components/ui/input'; -import { Label } from '@app/components/ui/label'; import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from '@app/components/ui/select'; -import { Slider } from '@app/components/ui/slider'; import { Textarea } from '@app/components/ui/textarea'; -import { VOICE_PROVIDERS_CONFIG, getProviderConfig, initializeParameters } from '@app/config/voice-providers'; +import { VOICE_PROVIDERS_CONFIG, getProviderConfig } from '@app/config/voice-providers'; import { extractErrorMessage } from '@app/lib/utils'; import { useNotifyStore } from '@app/store'; -import { - getBooleanParameterWithDefault, - getNumberOrStringParameter, - getNumberParameterWithDefault, - getStringParameter, -} from '@app/utils/parameter-helpers'; import { zodResolver } from '@hookform/resolvers/zod'; import React, { useEffect, useState } from 'react'; import { useForm } from 'react-hook-form'; @@ -43,7 +34,6 @@ const createSttConfigSchema = z.object({ description: z.string().max(500, 'Description must be 500 characters or less').optional(), provider: z.enum(['deepgram'] as [string, ...string[]]), api_key: z.string().min(1, 'API key is required'), - language: z.string().optional(), }); type CreateSttConfigInput = z.infer; @@ -56,8 +46,6 @@ interface CreateSttConfigDialogProps { const CreateSttConfigDialog: React.FC = ({ isOpen, onOpenChange, onSuccess }) => { const { notifySuccess, notifyError } = useNotifyStore(); - - const [parameters, setParameters] = useState>({}); const [loading, setLoading] = useState(false); const form = useForm({ @@ -67,19 +55,9 @@ const CreateSttConfigDialog: React.FC = ({ isOpen, o description: '', provider: 'deepgram', api_key: '', - language: '', }, }); - const watchedProvider = form.watch('provider'); - - // Reset parameters when provider changes - useEffect(() => { - if (isOpen && watchedProvider) { - setParameters(initializeParameters('stt', watchedProvider)); - } - }, [watchedProvider, isOpen]); - // Reset form when dialog closes useEffect(() => { if (!isOpen) { @@ -88,34 +66,10 @@ const CreateSttConfigDialog: React.FC = ({ isOpen, o description: '', provider: 'deepgram', api_key: '', - language: '', }); - setParameters({}); } }, [isOpen, form]); - const setParameter = (key: string, value: unknown) => { - setParameters((prev) => ({ ...prev, [key]: value })); - }; - - const buildParameters = () => { - const config = getProviderConfig('stt', watchedProvider); - if (!config) return null; - - const params: Record = {}; - - Object.entries(parameters).forEach(([key, value]) => { - const paramConfig = config.parameters[key]; - if (!paramConfig) return; - - if (value !== '' && value !== undefined) { - params[key] = value; - } - }); - - return Object.keys(params).length > 0 ? params : null; - }; - const onSubmit = async (data: CreateSttConfigInput) => { setLoading(true); try { @@ -124,8 +78,6 @@ const CreateSttConfigDialog: React.FC = ({ isOpen, o description: data.description?.trim() || null, provider: data.provider as 'deepgram', api_key: data.api_key.trim(), - language: data.language?.trim() || null, - parameters: buildParameters(), }); notifySuccess('STT configuration created successfully'); onSuccess?.(); @@ -138,142 +90,6 @@ const CreateSttConfigDialog: React.FC = ({ isOpen, o } }; - const renderParameterField = (key: string) => { - const config = getProviderConfig('stt', watchedProvider); - if (!config) return null; - - const paramConfig = config.parameters[key]; - if (!paramConfig) return null; - - switch (paramConfig.type) { - case 'boolean': - return ( -
-
- setParameter(key, checked)} - /> - -
-
- ); - - case 'number': - if (paramConfig.options && paramConfig.options.length > 0) { - const numValue = getNumberParameterWithDefault(parameters, key, paramConfig.default); - return ( -
- - -
- ); - } - - if (paramConfig.min !== undefined && paramConfig.max !== undefined) { - const sliderValue = getNumberParameterWithDefault(parameters, key, paramConfig.default, paramConfig.min); - return ( -
- - setParameter(key, values[0])} - /> - {paramConfig.description && ( -

{paramConfig.description}

- )} -
- ); - } - - return ( -
- - setParameter(key, e.target.value ? parseInt(e.target.value) : undefined)} - placeholder={paramConfig.placeholder} - /> - {paramConfig.placeholder && ( -

e.g., {paramConfig.placeholder}

- )} -
- ); - - case 'string': - default: - if (key === 'language') return null; - - if (paramConfig.options && paramConfig.options.length > 0) { - const selectValue = - getStringParameter(parameters, key) || (paramConfig.default ? String(paramConfig.default) : '') || ''; - return ( -
- - -
- ); - } - - return ( -
- - setParameter(key, e.target.value)} - placeholder={paramConfig.placeholder} - /> - {paramConfig.placeholder && ( -

Default: {paramConfig.placeholder}

- )} -
- ); - } - }; - - const providerConfig = getProviderConfig('stt', watchedProvider); - return ( @@ -294,7 +110,7 @@ const CreateSttConfigDialog: React.FC = ({ isOpen, o Display Name * - + {field.value?.length || 0}/100 characters @@ -337,7 +153,7 @@ const CreateSttConfigDialog: React.FC = ({ isOpen, o control={form.control} name="description" render={({ field }) => ( - + Description (Optional)