1+ const _ = require ( 'lodash' )
2+ const { createClient } = require ( '@deepgram/sdk' )
3+ const { PassThrough } = require ( 'stream' )
4+ const EventEmitter = require ( 'events' )
5+ const debug = require ( 'debug' ) ( 'botium-speech-processing-deepgram-stt' )
6+
7+ const { deepgramOptions } = require ( '../utils' )
8+
9+ class DeepgramSTT {
10+ async languages ( req ) {
11+ // Deepgram supports a wide range of languages
12+ // This is a subset of commonly used languages
13+ return [
14+ 'da' , 'de' , 'en' , 'en-AU' , 'en-GB' , 'en-IN' , 'en-NZ' , 'en-US' ,
15+ 'es' , 'es-419' , 'fi' , 'fr' , 'fr-CA' , 'hi' , 'id' , 'it' , 'ja' ,
16+ 'ko' , 'nl' , 'no' , 'pl' , 'pt' , 'pt-BR' , 'pt-PT' , 'ru' , 'sv' ,
17+ 'ta' , 'th' , 'tr' , 'uk' , 'zh' , 'zh-CN' , 'zh-TW'
18+ ] . sort ( )
19+ }
20+
21+ async stt_OpenStream ( req , { language } ) {
22+ const options = deepgramOptions ( req )
23+ if ( ! options . apiKey ) {
24+ throw new Error ( 'Deepgram API key not configured' )
25+ }
26+
27+ const deepgram = createClient ( options . apiKey )
28+
29+ const streamOptions = {
30+ model : 'general' ,
31+ language : language ,
32+ smart_format : true ,
33+ punctuate : true ,
34+ interim_results : true ,
35+ utterance_end_ms : 1000 ,
36+ vad_events : true
37+ }
38+
39+ // Apply default config from environment
40+ if ( process . env . BOTIUM_SPEECH_DEEPGRAM_CONFIG ) {
41+ try {
42+ const defaultConfig = JSON . parse ( process . env . BOTIUM_SPEECH_DEEPGRAM_CONFIG )
43+ Object . assign ( streamOptions , defaultConfig )
44+ } catch ( err ) {
45+ throw new Error ( `Deepgram config in BOTIUM_SPEECH_DEEPGRAM_CONFIG invalid: ${ err . message } ` )
46+ }
47+ }
48+
49+ // Apply request-specific config
50+ if ( req . body && req . body . deepgram && req . body . deepgram . config ) {
51+ Object . assign ( streamOptions , req . body . deepgram . config )
52+ }
53+
54+ const events = new EventEmitter ( )
55+ let eventHistory = [ ]
56+ let connection = null
57+
58+ try {
59+ connection = deepgram . listen . live ( streamOptions )
60+
61+ connection . on ( 'open' , ( ) => {
62+ debug ( 'Deepgram WebSocket opened' )
63+ } )
64+
65+ connection . on ( 'Results' , ( data ) => {
66+ console . log ( data )
67+ const result = data . channel . alternatives [ 0 ]
68+ if ( result && result . transcript ) {
69+ const event = {
70+ status : 'ok' ,
71+ text : result . transcript ,
72+ final : data . is_final || false ,
73+ debug : data
74+ }
75+
76+ // Add timing information if available
77+ if ( data . start && data . duration ) {
78+ event . start = _ . round ( data . start , 3 )
79+ event . end = _ . round ( data . start + data . duration , 3 )
80+ }
81+
82+ events . emit ( 'data' , event )
83+ if ( eventHistory ) {
84+ eventHistory . push ( event )
85+ }
86+ }
87+ } )
88+
89+ connection . on ( 'UtteranceEnd' , ( data ) => {
90+ debug ( 'Deepgram utterance end detected' )
91+ } )
92+
93+ connection . on ( 'error' , ( err ) => {
94+ const event = {
95+ status : 'error' ,
96+ err : `Deepgram STT failed: ${ err . message || err } `
97+ }
98+ events . emit ( 'data' , event )
99+ if ( eventHistory ) {
100+ eventHistory . push ( event )
101+ }
102+ } )
103+
104+ connection . on ( 'close' , ( ) => {
105+ debug ( 'Deepgram WebSocket closed' )
106+ events . emit ( 'close' )
107+ } )
108+
109+ } catch ( err ) {
110+ debug ( err )
111+ throw new Error ( `Deepgram STT streaming setup failed: ${ err . message } ` )
112+ }
113+
114+ return {
115+ events,
116+ write : ( buffer ) => {
117+ if ( connection && connection . getReadyState ( ) === 1 ) {
118+ connection . send ( buffer )
119+ }
120+ } ,
121+ end : ( ) => {
122+ if ( connection ) {
123+ connection . finish ( )
124+ }
125+ } ,
126+ close : ( ) => {
127+ if ( connection ) {
128+ connection . finish ( )
129+ connection = null
130+ }
131+ eventHistory = null
132+ } ,
133+ triggerHistoryEmit : ( ) => {
134+ for ( const eh of eventHistory ) {
135+ events . emit ( 'data' , eh )
136+ }
137+ }
138+ }
139+ }
140+
141+ async stt ( req , { language, buffer, hint } ) {
142+ const options = deepgramOptions ( req )
143+ if ( ! options . apiKey ) {
144+ throw new Error ( 'Deepgram API key not configured' )
145+ }
146+
147+ const deepgram = createClient ( options . apiKey )
148+
149+ const transcribeOptions = {
150+ model : 'general' ,
151+ language : language ,
152+ smart_format : true ,
153+ punctuate : true
154+ }
155+
156+ // Add search terms if hint is provided
157+ if ( hint && hint . length > 0 ) {
158+ transcribeOptions . search = [ hint ]
159+ }
160+
161+ // Apply default config from environment
162+ if ( process . env . BOTIUM_SPEECH_DEEPGRAM_CONFIG ) {
163+ try {
164+ const defaultConfig = JSON . parse ( process . env . BOTIUM_SPEECH_DEEPGRAM_CONFIG )
165+ Object . assign ( transcribeOptions , defaultConfig )
166+ } catch ( err ) {
167+ throw new Error ( `Deepgram config in BOTIUM_SPEECH_DEEPGRAM_CONFIG invalid: ${ err . message } ` )
168+ }
169+ }
170+
171+ // Apply request-specific config
172+ if ( req . body && req . body . deepgram && req . body . deepgram . config ) {
173+ Object . assign ( transcribeOptions , req . body . deepgram . config )
174+ }
175+
176+ try {
177+ debug ( `Calling Deepgram API with options: ${ JSON . stringify ( transcribeOptions ) } ` )
178+
179+ const response = await deepgram . listen . prerecorded . transcribeFile (
180+ buffer ,
181+ transcribeOptions
182+ )
183+
184+ debug ( `Deepgram response: ${ JSON . stringify ( response , null , 2 ) } ` )
185+
186+ if ( response . results && response . results . channels && response . results . channels [ 0 ] ) {
187+ const channel = response . results . channels [ 0 ]
188+ if ( channel . alternatives && channel . alternatives [ 0 ] ) {
189+ const transcript = channel . alternatives [ 0 ] . transcript || ''
190+ return {
191+ text : transcript ,
192+ debug : response
193+ }
194+ }
195+ }
196+
197+ return {
198+ text : '' ,
199+ debug : response
200+ }
201+ } catch ( err ) {
202+ debug ( err )
203+ throw new Error ( `Deepgram STT failed: ${ err . message || err } ` )
204+ }
205+ }
206+ }
207+
208+ module . exports = DeepgramSTT
0 commit comments