@@ -486,6 +486,140 @@ router.post('/api/stt/:language', async (req, res, next) => {
486486 }
487487} ) )
488488
489+ /**
490+ * @swagger
491+ * /api/ttsstream/{language}:
492+ * post:
493+ * description: Open a Websocket stream for converting text stream to audio
494+ * security:
495+ * - ApiKeyAuth: []
496+ * produces:
497+ * - application/json
498+ * parameters:
499+ * - name: language
500+ * description: Language code (as returned from ttslanguages endpoint)
501+ * in: path
502+ * required: true
503+ * schema:
504+ * type: string
505+ * - name: tts
506+ * description: Text-to-speech backend
507+ * in: query
508+ * required: false
509+ * schema:
510+ * type: string
511+ * enum: [google, azure, polly, ibm, deepgram]
512+ * - name: voice
513+ * description: Voice name (as returned from ttsvoices endpoint)
514+ * in: query
515+ * required: false
516+ * schema:
517+ * type: string
518+ * responses:
519+ * 200:
520+ * description: Websocket Url to stream the text to, and the uri to check status and end the stream
521+ * schema:
522+ * properties:
523+ * wsUri:
524+ * type: string
525+ * statusUri:
526+ * type: string
527+ * endUri:
528+ * type: string
529+ */
530+ ; [ router . get . bind ( router ) , router . post . bind ( router ) ] . forEach ( m => m ( '/api/ttsstream/:language' , async ( req , res , next ) => {
531+ try {
532+ const tts = ttsEngines [ ( req . query . tts && sanitize ( req . query . tts ) ) || process . env . BOTIUM_SPEECH_PROVIDER_TTS ]
533+
534+ if ( ! tts . tts_OpenStream ) {
535+ return next ( new Error ( `TTS provider ${ ( req . query . tts && sanitize ( req . query . tts ) ) || process . env . BOTIUM_SPEECH_PROVIDER_TTS } does not support streaming` ) )
536+ }
537+
538+ const streamId = uuidv1 ( )
539+ const stream = await tts . tts_OpenStream ( req , {
540+ language : req . params . language ,
541+ voice : req . query . voice
542+ } )
543+ stream . events . on ( 'close' , ( ) => delete wssStreams [ streamId ] )
544+ stream . dateTimeStart = new Date ( )
545+ stream . type = 'tts'
546+ wssStreams [ streamId ] = stream
547+
548+ const baseUrls = readBaseUrls ( req )
549+ res . json ( {
550+ wsUri : `${ baseUrls . wsUri } /${ streamId } ` ,
551+ statusUri : `${ baseUrls . baseUri } /api/ttsstatus/${ streamId } ` ,
552+ endUri : `${ baseUrls . baseUri } /api/ttsend/${ streamId } `
553+ } ) . end ( )
554+ } catch ( err ) {
555+ return next ( err )
556+ }
557+ } ) )
558+
559+ /**
560+ * @swagger
561+ * /api/ttsstatus/{streamId}:
562+ * get:
563+ * description: Check a Websocket stream for converting text stream to audio
564+ * security:
565+ * - ApiKeyAuth: []
566+ * produces:
567+ * - application/json
568+ * parameters:
569+ * - name: streamId
570+ * description: Stream Id (as returned from ttsstream endpoint)
571+ * in: path
572+ * required: true
573+ * schema:
574+ * type: string
575+ * responses:
576+ * 200:
577+ * description: Websocket stream ok
578+ * 404:
579+ * description: Websocket stream not available
580+ */
581+ ; [ router . get . bind ( router ) , router . post . bind ( router ) ] . forEach ( m => m ( '/api/ttsstatus/:streamId' , async ( req , res , next ) => {
582+ const stream = wssStreams [ req . params . streamId ]
583+ if ( stream ) {
584+ const streamDuration = ( ( new Date ( ) - stream . dateTimeStart ) / 1000 ) . toFixed ( 3 )
585+ res . status ( 200 ) . json ( { status : 'OK' , streamId : req . params . streamId , streamDuration } )
586+ } else {
587+ res . status ( 404 ) . json ( { status : 'NOTFOUND' , streamId : req . params . streamId } )
588+ }
589+ } ) )
590+
591+ /**
592+ * @swagger
593+ * /api/ttsend/{streamId}:
594+ * get:
595+ * description: Close a Websocket stream for converting text stream to audio
596+ * security:
597+ * - ApiKeyAuth: []
598+ * produces:
599+ * - application/json
600+ * parameters:
601+ * - name: streamId
602+ * description: Stream Id (as returned from ttsstream endpoint)
603+ * in: path
604+ * required: true
605+ * schema:
606+ * type: string
607+ * responses:
608+ * 200:
609+ * description: Websocket stream closed
610+ */
611+ ; [ router . get . bind ( router ) , router . post . bind ( router ) ] . forEach ( m => m ( '/api/ttsend/:streamId' , async ( req , res , next ) => {
612+ const stream = wssStreams [ req . params . streamId ]
613+ if ( stream ) {
614+ try {
615+ stream . end ( )
616+ } catch ( err ) {
617+ return next ( err )
618+ }
619+ }
620+ res . end ( )
621+ } ) )
622+
489623/**
490624 * @swagger
491625 * /api/audio/info:
@@ -806,6 +940,7 @@ const wssStreams = {}
806940 const stream = await stt . stt_OpenStream ( req , { language : req . params . language } )
807941 stream . events . on ( 'close' , ( ) => delete wssStreams [ streamId ] )
808942 stream . dateTimeStart = new Date ( )
943+ stream . type = 'stt'
809944 wssStreams [ streamId ] = stream
810945
811946 const baseUrls = readBaseUrls ( req )
@@ -893,15 +1028,28 @@ const wssUpgrade = (req, socket, head) => {
8931028 stream . events . on ( 'data' , async ( data ) => {
8941029 if ( data . err ) debug ( data )
8951030 data . streamDuration = ( ( new Date ( ) - stream . dateTimeStart ) / 1000 ) . toFixed ( 3 )
896- ws . send ( JSON . stringify ( data ) )
1031+
1032+ // For TTS streams, send audio data as binary, for STT send JSON
1033+ if ( stream . type === 'tts' && data . buffer ) {
1034+ ws . send ( data . buffer )
1035+ } else {
1036+ ws . send ( JSON . stringify ( data ) )
1037+ }
8971038 } )
8981039 stream . events . on ( 'close' , ( ) => {
8991040 ws . close ( )
9001041 wss1 . close ( )
9011042 } )
9021043 ws . on ( 'message' , ( data ) => {
903- if ( Buffer . isBuffer ( data ) ) {
904- stream . write ( data )
1044+ if ( stream . type === 'tts' ) {
1045+ // TTS streams expect text messages
1046+ const textData = Buffer . isBuffer ( data ) ? data . toString ( 'utf8' ) : data . toString ( )
1047+ stream . write ( textData )
1048+ } else {
1049+ // STT streams expect audio buffers
1050+ if ( Buffer . isBuffer ( data ) ) {
1051+ stream . write ( data )
1052+ }
9051053 }
9061054 } )
9071055 ws . on ( 'close' , ( ) => {
@@ -917,7 +1065,7 @@ const wssUpgrade = (req, socket, head) => {
9171065}
9181066
9191067module . exports = {
920- skipSecurityCheck : ( req ) => ( req . url . startsWith ( '/api/sttstatus/' ) || req . url . startsWith ( '/api/sttend/' ) ) ,
1068+ skipSecurityCheck : ( req ) => ( req . url . startsWith ( '/api/sttstatus/' ) || req . url . startsWith ( '/api/sttend/' ) || req . url . startsWith ( '/api/ttsstatus/' ) || req . url . startsWith ( '/api/ttsend/' ) ) ,
9211069 router,
9221070 wssUpgrade
9231071}
0 commit comments