Skip to content

Commit ef470f0

Browse files
authored
Merge pull request #1068 from TakahiroSakuda/fix_stt_audio_metrics
Fix AudioMetrics bug (Speech To Text)
2 parents 0dfa433 + ea71adb commit ef470f0

File tree

2 files changed

+20
-8
lines changed

2 files changed

+20
-8
lines changed

speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/websocket/SpeechToTextWebSocketListener.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ public final class SpeechToTextWebSocketListener extends WebSocketListener {
5454
private static final String ERROR = "error";
5555
private static final String RESULTS = "results";
5656
private static final String SPEAKER_LABELS = "speaker_labels";
57+
private static final String AUDIO_METRICS = "audio_metrics";
5758
private static final String CUSTOMIZATION_ID = "customization_id";
5859
private static final String LANGUAGE_CUSTOMIZATION_ID = "language_customization_id";
5960
private static final String ACOUSTIC_CUSTOMIZATION_ID = "acoustic_customization_id";
@@ -129,7 +130,7 @@ public void onMessage(WebSocket webSocket, String message) {
129130
// notify that the service timeouts because of inactivity
130131
callback.onInactivityTimeout(new RuntimeException(error));
131132
}
132-
} else if (json.has(RESULTS) || json.has(SPEAKER_LABELS)) {
133+
} else if (json.has(RESULTS) || json.has(SPEAKER_LABELS) || json.has(AUDIO_METRICS)) {
133134
callback.onTranscription(GSON.fromJson(message, SpeechRecognitionResults.class));
134135

135136
} else if (json.has(STATE)) {

speech-to-text/src/test/java/com/ibm/watson/speech_to_text/v1/SpeechToTextIT.java

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,8 @@ public class SpeechToTextIT extends WatsonServiceTest {
103103

104104
private CountDownLatch lock = new CountDownLatch(1);
105105
private SpeechToText service;
106-
private SpeechRecognitionResults asyncResults;
106+
private SpeechRecognitionResults asyncTranscriptionResults;
107+
private SpeechRecognitionResults asyncAudioMetricsResults;
107108
private Boolean inactivityTimeoutOccurred;
108109
private String customizationId;
109110
private String acousticCustomizationId;
@@ -221,11 +222,13 @@ public void testRecognizeFileStringRecognizeOptions() throws FileNotFoundExcepti
221222
.model(EN_BROADBAND16K)
222223
.contentType(contentType)
223224
.profanityFilter(false)
225+
.audioMetrics(true)
224226
.build();
225227
SpeechRecognitionResults results = service.recognize(options).execute().getResult();
226228
assertNotNull(results.getResults().get(0).getAlternatives().get(0).getTranscript());
227229
assertNotNull(results.getResults().get(0).getAlternatives().get(0).getTimestamps());
228230
assertNotNull(results.getResults().get(0).getAlternatives().get(0).getWordConfidence());
231+
assertNotNull(results.getAudioMetrics());
229232
}
230233

231234
/**
@@ -286,6 +289,7 @@ public void testRecognizeWebSocket() throws FileNotFoundException, InterruptedEx
286289
.interimResults(true)
287290
.processingMetrics(true)
288291
.processingMetricsInterval(0.2f)
292+
.audioMetrics(true)
289293
.build();
290294

291295
service.recognizeUsingWebSocket(options, new BaseRecognizeCallback() {
@@ -314,22 +318,29 @@ public void onError(Exception e) {
314318

315319
@Override
316320
public void onTranscription(SpeechRecognitionResults speechResults) {
317-
if (speechResults != null && speechResults.getResults().get(0).isFinalResults()) {
318-
asyncResults = speechResults;
321+
if (speechResults != null) {
322+
if (speechResults.getResults() != null && speechResults.getResults().get(0).isFinalResults()) {
323+
asyncTranscriptionResults = speechResults;
324+
}
325+
if (speechResults.getAudioMetrics() != null) {
326+
asyncAudioMetricsResults = speechResults;
327+
}
319328
System.out.println(speechResults);
320329
}
321330
}
322331

323332
});
324333

325334
lock.await(2, TimeUnit.MINUTES);
326-
assertNotNull(asyncResults);
335+
assertNotNull(asyncTranscriptionResults);
336+
assertNotNull(asyncAudioMetricsResults);
327337

328-
List<WordAlternativeResults> wordAlternatives = asyncResults.getResults().get(asyncResults.getResultIndex()
329-
.intValue()).getWordAlternatives();
338+
List<WordAlternativeResults> wordAlternatives = asyncTranscriptionResults.getResults()
339+
.get(asyncTranscriptionResults.getResultIndex().intValue()).getWordAlternatives();
330340
assertTrue(wordAlternatives != null && !wordAlternatives.isEmpty());
331341
assertNotNull(wordAlternatives.get(0).getAlternatives());
332-
assertNotNull(asyncResults.getProcessingMetrics());
342+
assertNotNull(asyncTranscriptionResults.getProcessingMetrics());
343+
assertNotNull(asyncAudioMetricsResults.getAudioMetrics());
333344
}
334345

335346
/**

0 commit comments

Comments
 (0)