Skip to content

Commit 97e58c0

Browse files
authored
Merge pull request #223 from bug-is-zhanglinlin/master
极速asr SDK更新
2 parents a539a49 + 717455a commit 97e58c0

File tree

2 files changed

+42
-11
lines changed

2 files changed

+42
-11
lines changed

demo/ci_speech_recognition.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,8 @@ def ci_create_asr_jobs():
7878
'EngineModelType': '16k_zh',
7979
'ChannelNum': '1',
8080
'ResTextFormat': '1',
81+
# 'FlashAsr': 'true',
82+
# 'Format': 'mp3'
8183
}
8284
response = client.ci_create_asr_job(
8385
Bucket=bucket_name,
@@ -122,6 +124,8 @@ def ci_create_asr_template():
122124
EngineModelType='16k_zh',
123125
ChannelNum=1,
124126
ResTextFormat=2,
127+
FlashAsr=True,
128+
Format='mp3',
125129
)
126130
print(response)
127131
return response

qcloud_cos/cos_client.py

Lines changed: 38 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7846,13 +7846,14 @@ def ci_update_asr_queue(self, Bucket, QueueId, Request={}, **kwargs):
78467846
return self.ci_update_media_queue(Bucket=Bucket, QueueId=QueueId,
78477847
Request=Request, UrlPath="/asrqueue/", **kwargs)
78487848

7849-
def ci_create_asr_job(self, Bucket, QueueId, InputObject, OutputBucket, OutputRegion, OutputObject, TemplateId=None,
7849+
def ci_create_asr_job(self, Bucket, QueueId, OutputBucket, OutputRegion, OutputObject, InputObject=None, Url=None, TemplateId=None,
78507850
SpeechRecognition=None, CallBack=None, CallBackFormat=None, CallBackType=None, CallBackMqConfig=None, **kwargs):
78517851
""" 创建语音识别任务接口 https://cloud.tencent.com/document/product/460/78951
78527852
78537853
:param Bucket(string): 存储桶名称.
78547854
:param QueueId(string): 任务所在的队列 ID.
78557855
:param InputObject(string): 文件在 COS 上的文件路径,Bucket 由 Host 指定.
7856+
:param Url(string): 外网可下载的Url.
78567857
:param OutputBucket(string): 存储结果的存储桶.
78577858
:param OutputRegion(string): 存储结果的存储桶的地域.
78587859
:param OutputObject(string): 输出文件路径。
@@ -7901,7 +7902,6 @@ def ci_create_asr_job(self, Bucket, QueueId, InputObject, OutputBucket, OutputRe
79017902
params = format_values(params)
79027903
body = {
79037904
'Input': {
7904-
'Object': InputObject,
79057905
},
79067906
'QueueId': QueueId,
79077907
'Tag': 'SpeechRecognition',
@@ -7913,6 +7913,10 @@ def ci_create_asr_job(self, Bucket, QueueId, InputObject, OutputBucket, OutputRe
79137913
},
79147914
}
79157915
}
7916+
if InputObject:
7917+
body['Input']['Object'] = InputObject
7918+
if Url:
7919+
body['Input']['Url'] = Url
79167920
if TemplateId:
79177921
body['Operation']['TemplateId'] = TemplateId
79187922
if SpeechRecognition:
@@ -8069,9 +8073,9 @@ def ci_list_asr_jobs(self, Bucket, QueueId, StartCreationTime=None, EndCreationT
80698073
format_dict(data, ['JobsDetail'])
80708074
return data
80718075

8072-
def ci_create_asr_template(self, Bucket, Name, EngineModelType, ChannelNum,
8073-
ResTextFormat, FilterDirty=0, FilterModal=0, ConvertNumMode=0, SpeakerDiarization=0,
8074-
SpeakerNumber=0, FilterPunc=0, OutputFileType='txt', **kwargs):
8076+
def ci_create_asr_template(self, Bucket, Name, EngineModelType, ChannelNum=None,
8077+
ResTextFormat=None, FilterDirty=0, FilterModal=0, ConvertNumMode=0, SpeakerDiarization=0,
8078+
SpeakerNumber=0, FilterPunc=0, OutputFileType='txt', FlashAsr=False, Format=None, FirstChannelOnly=1, WordInfo=0, **kwargs):
80758079
""" 创建语音识别模板接口 https://cloud.tencent.com/document/product/460/78939
80768080
80778081
:param Bucket(string): 存储桶名称.
@@ -8099,8 +8103,12 @@ def ci_create_asr_template(self, Bucket, Name, EngineModelType, ChannelNum,
80998103
:param ConvertNumMode(int): 是否进行阿拉伯数字智能转换(目前支持中文普通话引擎):0 表示不转换,直接输出中文数字。1 表示根据场景智能转换为阿拉伯数字。3 表示打开数学相关数字转换。默认值为0。
81008104
:param SpeakerDiarization(int): 是否开启说话人分离:0 表示不开启。1 表示开启(仅支持8k_zh,16k_zh,16k_zh_video,单声道音频)。默认值为0。注意:8k电话场景建议使用双声道来区分通话双方,设置ChannelNum=2即可,不用开启说话人分离。
81018105
:param SpeakerNumber(int): 说话人分离人数(需配合开启说话人分离使用),取值范围:0-10。0 代表自动分离(目前仅支持≤6个人),1-10代表指定说话人数分离。默认值为 0。
8102-
:param FilterPunc(int): 是否过滤标点符号(目前支持中文普通话引擎):0 表示不过滤。1 表示过滤句末标点。2 表示过滤所有标点。默认值为 0。
8106+
:param FilterPunc(int): 是否过滤标点符号(目前支持中文普通话引擎):0 表示不过滤。1 表示过滤句末标点。2 表示过滤所有标点。默认值为 0
81038107
:param OutputFileType(string): 输出文件类型,可选 txt、srt。默认为 txt。
8108+
:param FlashAsr(bool): 是否开启极速ASR,可选true、false。默认为false.
8109+
:param Format(string): 极速ASR音频格式。支持 wav、pcm、ogg-opus、speex、silk、mp3、m4a、aac 。
8110+
:param FirstChannelOnly(int): 极速ASR参数。表示是否只识别首个声道,默认为1。0:识别所有声道;1:识别首个声道。
8111+
:param WordInfo(int): 极速ASR参数。表示是否显示词级别时间戳,默认为0。0:不显示;1:显示,不包含标点时间戳,2:显示,包含标点时间戳。
81048112
:return(dict): 创建成功返回的结果,dict类型.
81058113
81068114
.. code-block:: python
@@ -8135,17 +8143,25 @@ def ci_create_asr_template(self, Bucket, Name, EngineModelType, ChannelNum,
81358143
'Tag': 'SpeechRecognition',
81368144
'SpeechRecognition': {
81378145
'EngineModelType': EngineModelType,
8138-
'ChannelNum': ChannelNum,
8139-
'ResTextFormat': ResTextFormat
81408146
}
81418147
}
8148+
if ChannelNum:
8149+
body['SpeechRecognition']['ChannelNum'] = ChannelNum
8150+
if ResTextFormat:
8151+
body['SpeechRecognition']['ResTextFormat'] = ResTextFormat
81428152
body['SpeechRecognition']['FilterDirty'] = FilterDirty
81438153
body['SpeechRecognition']['FilterModal'] = FilterModal
81448154
body['SpeechRecognition']['ConvertNumMode'] = ConvertNumMode
81458155
body['SpeechRecognition']['SpeakerDiarization'] = SpeakerDiarization
81468156
body['SpeechRecognition']['SpeakerNumber'] = SpeakerNumber
81478157
body['SpeechRecognition']['FilterPunc'] = FilterPunc
81488158
body['SpeechRecognition']['OutputFileType'] = OutputFileType
8159+
body['SpeechRecognition']['FlashAsr'] = str(FlashAsr).lower()
8160+
if Format:
8161+
body['SpeechRecognition']['Format'] = Format
8162+
body['SpeechRecognition']['FirstChannelOnly'] = FirstChannelOnly
8163+
body['SpeechRecognition']['WordInfo'] = WordInfo
8164+
81498165
xml_config = format_xml(data=body, root='Request')
81508166
path = "/template"
81518167
url = self._conf.uri(bucket=Bucket, path=path, endpoint=self._conf._endpoint_ci)
@@ -8168,7 +8184,7 @@ def ci_create_asr_template(self, Bucket, Name, EngineModelType, ChannelNum,
81688184

81698185
def ci_update_asr_template(self, Bucket, TemplateId, Name, EngineModelType, ChannelNum,
81708186
ResTextFormat, FilterDirty=0, FilterModal=0, ConvertNumMode=0, SpeakerDiarization=0,
8171-
SpeakerNumber=0, FilterPunc=0, OutputFileType='txt', **kwargs):
8187+
SpeakerNumber=0, FilterPunc=0, OutputFileType='txt', FlashAsr=False, Format=None, FirstChannelOnly=1, WordInfo=0, **kwargs):
81728188
""" 更新语音识别模板接口 https://cloud.tencent.com/document/product/460/78942
81738189
81748190
:param Bucket(string): 存储桶名称.
@@ -8199,6 +8215,10 @@ def ci_update_asr_template(self, Bucket, TemplateId, Name, EngineModelType, Chan
81998215
:param SpeakerNumber(int): 说话人分离人数(需配合开启说话人分离使用),取值范围:0-10。0 代表自动分离(目前仅支持≤6个人),1-10代表指定说话人数分离。默认值为 0。
82008216
:param FilterPunc(int): 是否过滤标点符号(目前支持中文普通话引擎):0 表示不过滤。1 表示过滤句末标点。2 表示过滤所有标点。默认值为 0。
82018217
:param OutputFileType(string): 输出文件类型,可选 txt、srt。默认为 txt。
8218+
:param FlashAsr(bool): 是否开启极速ASR,可选true、false。默认为false.
8219+
:param Format(string): 极速ASR音频格式。支持 wav、pcm、ogg-opus、speex、silk、mp3、m4a、aac 。
8220+
:param FirstChannelOnly(int): 极速ASR参数。表示是否只识别首个声道,默认为1。0:识别所有声道;1:识别首个声道。
8221+
:param WordInfo(int): 极速ASR参数。表示是否显示词级别时间戳,默认为0。0:不显示;1:显示,不包含标点时间戳,2:显示,包含标点时间戳。
82028222
:return(dict): 更新成功返回的结果,dict类型.
82038223
82048224
.. code-block:: python
@@ -8234,17 +8254,24 @@ def ci_update_asr_template(self, Bucket, TemplateId, Name, EngineModelType, Chan
82348254
'Tag': 'SpeechRecognition',
82358255
'SpeechRecognition': {
82368256
'EngineModelType': EngineModelType,
8237-
'ChannelNum': ChannelNum,
8238-
'ResTextFormat': ResTextFormat
82398257
}
82408258
}
8259+
if ChannelNum:
8260+
body['SpeechRecognition']['ChannelNum'] = ChannelNum
8261+
if ResTextFormat:
8262+
body['SpeechRecognition']['ResTextFormat'] = ResTextFormat
82418263
body['SpeechRecognition']['FilterDirty'] = FilterDirty
82428264
body['SpeechRecognition']['FilterModal'] = FilterModal
82438265
body['SpeechRecognition']['ConvertNumMode'] = ConvertNumMode
82448266
body['SpeechRecognition']['SpeakerDiarization'] = SpeakerDiarization
82458267
body['SpeechRecognition']['SpeakerNumber'] = SpeakerNumber
82468268
body['SpeechRecognition']['FilterPunc'] = FilterPunc
82478269
body['SpeechRecognition']['OutputFileType'] = OutputFileType
8270+
body['SpeechRecognition']['FlashAsr'] = str(FlashAsr).lower()
8271+
if Format:
8272+
body['SpeechRecognition']['Format'] = Format
8273+
body['SpeechRecognition']['FirstChannelOnly'] = FirstChannelOnly
8274+
body['SpeechRecognition']['WordInfo'] = WordInfo
82488275
xml_config = format_xml(data=body, root='Request')
82498276
path = "/template/" + TemplateId
82508277
url = self._conf.uri(bucket=Bucket, path=path, endpoint=self._conf._endpoint_ci)

0 commit comments

Comments
 (0)