Skip to content

Commit b1d83bb

Browse files
authored
Merge pull request #38 from codeforequity-at/tts-streaming
Tts streaming
2 parents 45abbb1 + cded7eb commit b1d83bb

File tree

6 files changed

+1247
-18
lines changed

6 files changed

+1247
-18
lines changed

frontend/src/routes.js

Lines changed: 152 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -486,6 +486,140 @@ router.post('/api/stt/:language', async (req, res, next) => {
486486
}
487487
}))
488488

489+
/**
490+
* @swagger
491+
* /api/ttsstream/{language}:
492+
* post:
493+
* description: Open a Websocket stream for converting text stream to audio
494+
* security:
495+
* - ApiKeyAuth: []
496+
* produces:
497+
* - application/json
498+
* parameters:
499+
* - name: language
500+
* description: Language code (as returned from ttslanguages endpoint)
501+
* in: path
502+
* required: true
503+
* schema:
504+
* type: string
505+
* - name: tts
506+
* description: Text-to-speech backend
507+
* in: query
508+
* required: false
509+
* schema:
510+
* type: string
511+
* enum: [google, azure, polly, ibm, deepgram]
512+
* - name: voice
513+
* description: Voice name (as returned from ttsvoices endpoint)
514+
* in: query
515+
* required: false
516+
* schema:
517+
* type: string
518+
* responses:
519+
* 200:
520+
* description: Websocket Url to stream the text to, and the uri to check status and end the stream
521+
* schema:
522+
* properties:
523+
* wsUri:
524+
* type: string
525+
* statusUri:
526+
* type: string
527+
* endUri:
528+
* type: string
529+
*/
530+
;[router.get.bind(router), router.post.bind(router)].forEach(m => m('/api/ttsstream/:language', async (req, res, next) => {
531+
try {
532+
const tts = ttsEngines[(req.query.tts && sanitize(req.query.tts)) || process.env.BOTIUM_SPEECH_PROVIDER_TTS]
533+
534+
if (!tts.tts_OpenStream) {
535+
return next(new Error(`TTS provider ${(req.query.tts && sanitize(req.query.tts)) || process.env.BOTIUM_SPEECH_PROVIDER_TTS} does not support streaming`))
536+
}
537+
538+
const streamId = uuidv1()
539+
const stream = await tts.tts_OpenStream(req, {
540+
language: req.params.language,
541+
voice: req.query.voice
542+
})
543+
stream.events.on('close', () => delete wssStreams[streamId])
544+
stream.dateTimeStart = new Date()
545+
stream.type = 'tts'
546+
wssStreams[streamId] = stream
547+
548+
const baseUrls = readBaseUrls(req)
549+
res.json({
550+
wsUri: `${baseUrls.wsUri}/${streamId}`,
551+
statusUri: `${baseUrls.baseUri}/api/ttsstatus/${streamId}`,
552+
endUri: `${baseUrls.baseUri}/api/ttsend/${streamId}`
553+
}).end()
554+
} catch (err) {
555+
return next(err)
556+
}
557+
}))
558+
559+
/**
560+
* @swagger
561+
* /api/ttsstatus/{streamId}:
562+
* get:
563+
* description: Check a Websocket stream for converting text stream to audio
564+
* security:
565+
* - ApiKeyAuth: []
566+
* produces:
567+
* - application/json
568+
* parameters:
569+
* - name: streamId
570+
* description: Stream Id (as returned from ttsstream endpoint)
571+
* in: path
572+
* required: true
573+
* schema:
574+
* type: string
575+
* responses:
576+
* 200:
577+
* description: Websocket stream ok
578+
* 404:
579+
* description: Websocket stream not available
580+
*/
581+
;[router.get.bind(router), router.post.bind(router)].forEach(m => m('/api/ttsstatus/:streamId', async (req, res, next) => {
582+
const stream = wssStreams[req.params.streamId]
583+
if (stream) {
584+
const streamDuration = ((new Date() - stream.dateTimeStart) / 1000).toFixed(3)
585+
res.status(200).json({ status: 'OK', streamId: req.params.streamId, streamDuration })
586+
} else {
587+
res.status(404).json({ status: 'NOTFOUND', streamId: req.params.streamId })
588+
}
589+
}))
590+
591+
/**
592+
* @swagger
593+
* /api/ttsend/{streamId}:
594+
* get:
595+
* description: Close a Websocket stream for converting text stream to audio
596+
* security:
597+
* - ApiKeyAuth: []
598+
* produces:
599+
* - application/json
600+
* parameters:
601+
* - name: streamId
602+
* description: Stream Id (as returned from ttsstream endpoint)
603+
* in: path
604+
* required: true
605+
* schema:
606+
* type: string
607+
* responses:
608+
* 200:
609+
* description: Websocket stream closed
610+
*/
611+
;[router.get.bind(router), router.post.bind(router)].forEach(m => m('/api/ttsend/:streamId', async (req, res, next) => {
612+
const stream = wssStreams[req.params.streamId]
613+
if (stream) {
614+
try {
615+
stream.end()
616+
} catch (err) {
617+
return next(err)
618+
}
619+
}
620+
res.end()
621+
}))
622+
489623
/**
490624
* @swagger
491625
* /api/audio/info:
@@ -806,6 +940,7 @@ const wssStreams = {}
806940
const stream = await stt.stt_OpenStream(req, { language: req.params.language })
807941
stream.events.on('close', () => delete wssStreams[streamId])
808942
stream.dateTimeStart = new Date()
943+
stream.type = 'stt'
809944
wssStreams[streamId] = stream
810945

811946
const baseUrls = readBaseUrls(req)
@@ -893,15 +1028,28 @@ const wssUpgrade = (req, socket, head) => {
8931028
stream.events.on('data', async (data) => {
8941029
if (data.err) debug(data)
8951030
data.streamDuration = ((new Date() - stream.dateTimeStart) / 1000).toFixed(3)
896-
ws.send(JSON.stringify(data))
1031+
1032+
// For TTS streams, send audio data as binary, for STT send JSON
1033+
if (stream.type === 'tts' && data.buffer) {
1034+
ws.send(data.buffer)
1035+
} else {
1036+
ws.send(JSON.stringify(data))
1037+
}
8971038
})
8981039
stream.events.on('close', () => {
8991040
ws.close()
9001041
wss1.close()
9011042
})
9021043
ws.on('message', (data) => {
903-
if (Buffer.isBuffer(data)) {
904-
stream.write(data)
1044+
if (stream.type === 'tts') {
1045+
// TTS streams expect text messages
1046+
const textData = Buffer.isBuffer(data) ? data.toString('utf8') : data.toString()
1047+
stream.write(textData)
1048+
} else {
1049+
// STT streams expect audio buffers
1050+
if (Buffer.isBuffer(data)) {
1051+
stream.write(data)
1052+
}
9051053
}
9061054
})
9071055
ws.on('close', () => {
@@ -917,7 +1065,7 @@ const wssUpgrade = (req, socket, head) => {
9171065
}
9181066

9191067
module.exports = {
920-
skipSecurityCheck: (req) => (req.url.startsWith('/api/sttstatus/') || req.url.startsWith('/api/sttend/')),
1068+
skipSecurityCheck: (req) => (req.url.startsWith('/api/sttstatus/') || req.url.startsWith('/api/sttend/') || req.url.startsWith('/api/ttsstatus/') || req.url.startsWith('/api/ttsend/')),
9211069
router,
9221070
wssUpgrade
9231071
}

0 commit comments

Comments
 (0)