1+ const _ = require ( 'lodash' )
2+ const { createClient } = require ( '@deepgram/sdk' )
3+ const axios = require ( 'axios' )
4+ const debug = require ( 'debug' ) ( 'botium-speech-processing-deepgram-tts' )
5+
6+ const { deepgramOptions, ttsFilename } = require ( '../utils' )
7+
8+ class DeepgramTTS {
9+ async _fetchVoicesFromDocs ( ) {
10+ try {
11+ // Fetch Deepgram TTS documentation page
12+ const response = await axios . get ( 'https://developers.deepgram.com/docs/tts-models' , {
13+ timeout : 5000 ,
14+ headers : {
15+ 'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
16+ }
17+ } )
18+
19+ const html = response . data
20+ const voices = [ ]
21+
22+ // Parse voice models from documentation
23+ // Look for patterns like "aura-2-asteria-en" (only Aura-2 generation)
24+ const voicePattern = / a u r a - 2 - ( [ a - z ] + ) - ( [ a - z ] { 2 , 3 } ) / g
25+ let match
26+
27+ while ( ( match = voicePattern . exec ( html ) ) !== null ) {
28+ console . log ( match )
29+ const fullMatch = match [ 0 ] // Full match like "aura-2-asteria-en"
30+ const name = fullMatch
31+ const voiceName = match [ 1 ] // asteria
32+ const language = match [ 2 ] // en
33+
34+ // Determine gender based on common name patterns
35+ const femaleNames = [ 'asteria' , 'luna' , 'stella' , 'athena' , 'hera' , 'esperanza' , 'ramona' , 'margot' , 'claire' , 'liesel' , 'greta' , 'lucia' , 'sofia' , 'valentina' , 'giulia' , 'hina' , 'yuki' , 'yuna' , 'soo' , 'xiaoxiao' , 'mei' , 'nova' , 'emma' , 'klara' , 'katya' , 'natasha' , 'zeynep' , 'maya' , 'astrid' , 'ingrid' , 'maja' , 'aino' , 'oksana' , 'tereza' , 'zsofia' , 'elena' , 'maria' , 'ana' , 'milica' , 'jana' , 'meta' , 'ausra' , 'liga' , 'kadri' , 'sarah' , 'layla' , 'siriporn' , 'linh' , 'sari' , 'siti' , 'priya' , 'rashida' , 'fatima' , 'maryam' , 'amara' ]
36+
37+ const gender = femaleNames . includes ( voiceName ) ? 'female' : 'male'
38+
39+ voices . push ( { name, gender, language } )
40+ }
41+
42+ // Remove duplicates
43+ const uniqueVoices = _ . uniqBy ( voices , 'name' )
44+
45+ // Filter out non-existent languages (keep only valid ISO codes)
46+ const validLanguages = [
47+ 'ar' , 'bg' , 'bn' , 'cs' , 'da' , 'de' , 'el' , 'en' , 'es' , 'et' ,
48+ 'fa' , 'fi' , 'fr' , 'he' , 'hi' , 'hr' , 'hu' , 'id' , 'it' , 'ja' ,
49+ 'ko' , 'lt' , 'lv' , 'ms' , 'nl' , 'no' , 'pl' , 'pt' , 'ro' , 'ru' ,
50+ 'sk' , 'sl' , 'sr' , 'sv' , 'sw' , 'ta' , 'th' , 'tr' , 'uk' , 'ur' ,
51+ 'vi' , 'zh'
52+ ]
53+
54+ const filteredVoices = uniqueVoices . filter ( voice =>
55+ validLanguages . includes ( voice . language )
56+ )
57+
58+ debug ( `Fetched ${ filteredVoices . length } voices with valid languages from Deepgram documentation` )
59+ return filteredVoices . length > 0 ? filteredVoices : null
60+
61+ } catch ( err ) {
62+ debug ( `Failed to fetch voices from documentation: ${ err . message } ` )
63+ return null
64+ }
65+ }
66+
67+ async voices ( req ) {
68+ // Try to fetch from documentation first
69+ const docVoices = await this . _fetchVoicesFromDocs ( )
70+ if ( docVoices && docVoices . length > 0 ) {
71+ return docVoices
72+ }
73+
74+ // Fallback to static list if documentation parsing fails
75+ debug ( 'Using fallback static voice list (Aura-2 only)' )
76+ return [
77+ // English voices (Aura-2)
78+ { name : 'aura-2-asteria-en' , gender : 'female' , language : 'en' } ,
79+ { name : 'aura-2-luna-en' , gender : 'female' , language : 'en' } ,
80+ { name : 'aura-2-stella-en' , gender : 'female' , language : 'en' } ,
81+ { name : 'aura-2-athena-en' , gender : 'female' , language : 'en' } ,
82+ { name : 'aura-2-hera-en' , gender : 'female' , language : 'en' } ,
83+ { name : 'aura-2-orion-en' , gender : 'male' , language : 'en' } ,
84+ { name : 'aura-2-arcas-en' , gender : 'male' , language : 'en' } ,
85+ { name : 'aura-2-perseus-en' , gender : 'male' , language : 'en' } ,
86+ { name : 'aura-2-angus-en' , gender : 'male' , language : 'en' } ,
87+ { name : 'aura-2-orpheus-en' , gender : 'male' , language : 'en' } ,
88+ { name : 'aura-2-helios-en' , gender : 'male' , language : 'en' } ,
89+ { name : 'aura-2-zeus-en' , gender : 'male' , language : 'en' }
90+ ]
91+ }
92+
93+ async languages ( req ) {
94+ const voicesList = await this . voices ( req )
95+ return _ . uniq ( voicesList . map ( v => v . language ) ) . sort ( )
96+ }
97+
98+ async tts ( req , { language, voice, text } ) {
99+ const options = deepgramOptions ( req )
100+ if ( ! options . apiKey ) {
101+ throw new Error ( 'Deepgram API key not configured' )
102+ }
103+
104+ const deepgram = createClient ( options . apiKey )
105+
106+ const speakOptions = {
107+ model : voice || 'aura-2-asteria-en' ,
108+ encoding : 'linear16' ,
109+ sample_rate : 16000
110+ }
111+
112+ // Apply default config from environment
113+ if ( process . env . BOTIUM_SPEECH_DEEPGRAM_TTS_CONFIG ) {
114+ try {
115+ const defaultConfig = JSON . parse ( process . env . BOTIUM_SPEECH_DEEPGRAM_TTS_CONFIG )
116+ Object . assign ( speakOptions , defaultConfig )
117+ } catch ( err ) {
118+ throw new Error ( `Deepgram TTS config in BOTIUM_SPEECH_DEEPGRAM_TTS_CONFIG invalid: ${ err . message } ` )
119+ }
120+ }
121+
122+ // Apply request-specific config
123+ if ( req . body && req . body . deepgram && req . body . deepgram . config ) {
124+ Object . assign ( speakOptions , req . body . deepgram . config )
125+ }
126+
127+ try {
128+ debug ( `Calling Deepgram TTS API with options: ${ JSON . stringify ( speakOptions ) } ` )
129+
130+ const response = await deepgram . speak . request (
131+ { text } ,
132+ speakOptions
133+ )
134+
135+ // Get the audio stream
136+ const stream = await response . getStream ( )
137+ if ( ! stream ) {
138+ throw new Error ( 'No audio stream received from Deepgram' )
139+ }
140+
141+ // Convert stream to buffer
142+ const chunks = [ ]
143+ for await ( const chunk of stream ) {
144+ chunks . push ( chunk )
145+ }
146+ const buffer = Buffer . concat ( chunks )
147+
148+ debug ( `Deepgram TTS response received, buffer size: ${ buffer . length } ` )
149+
150+ return {
151+ buffer : buffer ,
152+ name : `${ ttsFilename ( text ) } .wav`
153+ }
154+ } catch ( err ) {
155+ debug ( err )
156+ throw new Error ( `Deepgram TTS failed: ${ err . message || err } ` )
157+ }
158+ }
159+ }
160+
161+ module . exports = DeepgramTTS
0 commit comments