Skip to content

Commit e2fd23e

Browse files
[speech-to-text] Detect the audio format automatically based on the
file extension. #135
1 parent a958256 commit e2fd23e

File tree

4 files changed

+97
-5
lines changed

4 files changed

+97
-5
lines changed

src/main/java/com/ibm/watson/developer_cloud/http/HttpMediaType.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,17 @@ public interface HttpMediaType {
8080
* Field AUDIO_WAV. (value is "audio/wav")
8181
*/
8282
public final static String AUDIO_WAV = "audio/wav";
83+
84+
/**
85+
* Field AUDIO_FLAC. (value is "audio/flac")
86+
*/
87+
public static final String AUDIO_FLAC = "audio/flac";
88+
89+
/**
90+
* Field AUDIO_RAW. (value is "audio/l16")
91+
*/
92+
public static final String AUDIO_RAW = "audio/l16";
93+
8394
/**
8495
* Field BINARY_FILE. (value is "application/octet-stream")
8596
*/

src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/SpeechToText.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechModelSet;
2626
import com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechResults;
2727
import com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechSession;
28+
import com.ibm.watson.developer_cloud.speech_to_text.v1.util.MediaTypeUtils;
2829
import com.ibm.watson.developer_cloud.util.GsonSingleton;
2930
import com.ibm.watson.developer_cloud.util.ResponseUtil;
3031
import com.ibm.watson.developer_cloud.util.Validate;
@@ -206,6 +207,19 @@ public SessionStatus getRecognizeStatus(final SpeechSession session) {
206207
return GsonSingleton.getGson().fromJson(jsonObject.get(SESSION), SessionStatus.class);
207208
}
208209

210+
/**
211+
* Recognizes an audio file and returns {@link SpeechResults}. It will try to recognize the audio
212+
* format based on the file extension.
213+
*
214+
* @param audio the audio file
215+
* @return the {@link SpeechResults}
216+
*/
217+
public SpeechResults recognize(File audio) {
218+
String contentType = MediaTypeUtils.getMediaTypeFromFile(audio);
219+
Validate.notNull(contentType, "Audio format cannot be recognized");
220+
return recognize(audio, contentType, null);
221+
}
222+
209223
/**
210224
* Recognizes an audio file and returns {@link SpeechResults}.
211225
*
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
/**
2+
* Copyright 2015 IBM Corp. All Rights Reserved.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5+
* in compliance with the License. You may obtain a copy of the License at
6+
*
7+
* http://www.apache.org/licenses/LICENSE-2.0
8+
*
9+
* Unless required by applicable law or agreed to in writing, software distributed under the License
10+
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11+
* or implied. See the License for the specific language governing permissions and limitations under
12+
* the License.
13+
*/
14+
package com.ibm.watson.developer_cloud.speech_to_text.v1.util;
15+
16+
import java.io.File;
17+
18+
import com.ibm.watson.developer_cloud.http.HttpMediaType;
19+
import com.ibm.watson.developer_cloud.speech_to_text.v1.SpeechToText;
20+
21+
/**
22+
* The utilities required for processing audio files using the {@link SpeechToText} service.
23+
*
24+
* @see SpeechToText
25+
*/
26+
public class MediaTypeUtils {
27+
private static String[] SUPPORTED_EXTENSION = {".wav", ".ogg", ".oga", ".flac", ".raw"};
28+
29+
private static String[] SUPPORTED_MEDIA_TYPES = {HttpMediaType.AUDIO_WAV,
30+
HttpMediaType.AUDIO_OGG, HttpMediaType.AUDIO_OGG, HttpMediaType.AUDIO_FLAC,
31+
HttpMediaType.AUDIO_RAW};
32+
33+
/**
34+
* Returns the media type for a given file.
35+
*
36+
* @param file the file object for which media type needs to be provided
37+
* @return Internet media type for the file
38+
*/
39+
public static String getMediaTypeFromFile(final File file) {
40+
if (file != null) {
41+
final String fileName = file.getName().toLowerCase();
42+
for (int i = 0; i < SUPPORTED_MEDIA_TYPES.length; i++) {
43+
if (fileName.endsWith(SUPPORTED_EXTENSION[i])) {
44+
return SUPPORTED_MEDIA_TYPES[i];
45+
}
46+
}
47+
}
48+
return null;
49+
}
50+
51+
/**
52+
* Checks if the media type is supported by the service.
53+
*
54+
* @param mediaType Internet media type for the file
55+
* @return true if it is supported, false if not.
56+
*/
57+
public static Boolean isValidMediaType(final String mediaType) {
58+
if (mediaType != null) {
59+
for (final String supportedMediaType : SUPPORTED_MEDIA_TYPES) {
60+
if (mediaType.equalsIgnoreCase(supportedMediaType)) {
61+
return true;
62+
}
63+
}
64+
}
65+
return false;
66+
}
67+
68+
}

src/test/java/com/ibm/watson/developer_cloud/speech_to_text/v1/SpeechToTextIT.java

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
/**
22
* Copyright 2015 IBM Corp. All Rights Reserved.
3-
*
3+
*
44
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
55
* in compliance with the License. You may obtain a copy of the License at
6-
*
6+
*
77
* http://www.apache.org/licenses/LICENSE-2.0
8-
*
8+
*
99
* Unless required by applicable law or agreed to in writing, software distributed under the License
1010
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
1111
* or implied. See the License for the specific language governing permissions and limitations under
@@ -108,8 +108,7 @@ public void testGetRecognizeStatus() {
108108
@Test
109109
public void testRecognizeFileString() {
110110
final File audio = new File("src/test/resources/speech_to_text/sample1.wav");
111-
final String contentType = HttpMediaType.AUDIO_WAV;
112-
final SpeechResults results = service.recognize(audio, contentType);
111+
final SpeechResults results = service.recognize(audio);
113112
assertNotNull(results.getResults().get(0).getAlternatives().get(0).getTranscript());
114113
}
115114

0 commit comments

Comments
 (0)