Merge pull request #189 from degangliu/master

zhy1985555 · web-flow · commit f42ad7b1b3ee · 2021-12-03T10:49:19.000+08:00
优化审核接口返回参数的array相关字段
diff --git a/qcloud_cos/cos_client.py b/qcloud_cos/cos_client.py
@@ -511,7 +511,25 @@ def get_object_sensitive_content_recognition(self, Bucket, Key, DetectType, Inte
             params=params,
             headers=headers)
 
+        logging.debug("get object sensitive content recognition rsp:%s", rt.content)
         data = xml_to_dict(rt.content)
+        # format res
+        if 'PornInfo' in data:
+            if 'OcrResults' in data['PornInfo']:
+                format_dict_or_list(data['PornInfo']['OcrResults'], ['Keywords'])
+            format_dict(data['PornInfo'], ['OcrResults', 'ObjectResults'])
+        if 'TerroristInfo' in data:
+            if 'OcrResults' in data['TerroristInfo']:
+                format_dict_or_list(data['TerroristInfo']['OcrResults'], ['Keywords'])
+            format_dict(data['TerroristInfo'], ['OcrResults', 'ObjectResults'])
+        if 'PoliticsInfo' in data:
+            if 'OcrResults' in data['PoliticsInfo']:
+                format_dict_or_list(data['PoliticsInfo']['OcrResults'], ['Keywords'])
+            format_dict(data['PoliticsInfo'], ['OcrResults', 'ObjectResults'])
+        if 'AdsInfo' in data:
+            if 'OcrResults' in data['AdsInfo']:
+                format_dict_or_list(data['AdsInfo']['OcrResults'], ['Keywords'])
+            format_dict(data['AdsInfo'], ['OcrResults', 'ObjectResults'])
 
         return data
 
@@ -4434,7 +4452,7 @@ def ci_auditing_submit_common(self, Bucket, Key, DetectType, Type, Url=None, Biz
 
         :param Bucket(string): 存储桶名称.
         :param Key(string): COS路径.
-        :param DetectType(int): 内容识别标志,位计算 1:porn, 2:terrorist, 4:politics, 8:ads
+        :param DetectType(int): 内容识别标志,位计算 1:porn, 2:terrorist, 4:politics, 8:ads, 16: Illegal, 32:Abuse
         :param Type(string): 审核类型，video:视频，text：文本，audio：音频，docment：文档。
         :param Url(string): Url, 支持非cos上的文件
         :param Conf(dic): 审核的个性化配置
@@ -4505,6 +4523,7 @@ def ci_auditing_submit_common(self, Bucket, Key, DetectType, Type, Url=None, Biz
             params=params,
             headers=headers)
 
+        logging.debug("ci auditing rsp:%s", rt.content)
         data = xml_to_dict(rt.content)
 
         return data
@@ -4557,6 +4576,7 @@ def ci_auditing_query_common(self, Bucket, Type, JobID, **kwargs):
             params=params,
             headers=headers)
 
+        logging.debug("query ci auditing:%s", rt.content)
         data = xml_to_dict(rt.content)
 
         return data
@@ -4641,13 +4661,50 @@ def ci_auditing_video_query(self, Bucket, JobID, **kwargs):
             print response
         """
 
-        return self.ci_auditing_query_common(
+        data = self.ci_auditing_query_common(
             Bucket=Bucket,
             JobID=JobID,
             Type='video',
             **kwargs
         )
 
+        if 'JobsDetail' in data:
+            format_dict(data['JobsDetail'], ['Snapshot', 'AudioSection'])
+            if 'Snapshot' in data['JobsDetail']:
+                for snapshot in data['JobsDetail']['Snapshot']:
+                    if 'PornInfo' in snapshot:
+                        format_dict(snapshot['PornInfo'], ['OcrResults', 'ObjectResults'])
+                        if 'OcrResults' in snapshot['PornInfo']:
+                            for ocrResult in snapshot['PornInfo']['OcrResults']:
+                                format_dict(ocrResult, ['Keywords'])
+                    if 'TerrorismInfo' in snapshot:
+                        format_dict(snapshot['TerrorismInfo'], ['OcrResults', 'ObjectResults'])
+                        if 'OcrResults' in snapshot['TerrorismInfo']:
+                            for ocrResult in snapshot['TerrorismInfo']['OcrResults']:
+                                format_dict(ocrResult, ['Keywords'])
+                    if 'PoliticsInfo' in snapshot:
+                        format_dict(snapshot['PoliticsInfo'], ['OcrResults', 'ObjectResults'])
+                        if 'OcrResults' in snapshot['PoliticsInfo']:
+                            for ocrResult in snapshot['PoliticsInfo']['OcrResults']:
+                                format_dict(ocrResult, ['Keywords'])
+                    if 'AdsInfo' in snapshot:
+                        format_dict(snapshot['AdsInfo'], ['OcrResults', 'ObjectResults'])
+                        if 'OcrResults' in snapshot['AdsInfo']:
+                            for ocrResult in snapshot['AdsInfo']['OcrResults']:
+                                format_dict(ocrResult, ['Keywords'])
+            if 'AudioSection' in data['JobsDetail']:
+                for audioSection in data['JobsDetail']['AudioSection']:
+                    if 'PornInfo' in audioSection:
+                        format_dict(audioSection['PornInfo'], ['Keywords'])
+                    if 'TerrorismInfo' in audioSection:
+                        format_dict(audioSection['TerrorismInfo'], ['Keywords'])
+                    if 'PoliticsInfo' in audioSection:
+                        format_dict(audioSection['PoliticsInfo'], ['Keywords'])
+                    if 'AdsInfo' in audioSection:
+                        format_dict(audioSection['AdsInfo'], ['Keywords'])
+
+        return data
+
     def ci_auditing_audio_submit(self, Bucket, Key, DetectType, Url=None, Callback=None, CallbackVersion='Simple', BizType=None, **kwargs):
         """提交音频审核任务接口 https://cloud.tencent.com/document/product/460/53395
 
@@ -4714,12 +4771,26 @@ def ci_auditing_audio_query(self, Bucket, JobID, **kwargs):
             print response
         """
 
-        return self.ci_auditing_query_common(
+        data = self.ci_auditing_query_common(
             Bucket=Bucket,
             JobID=JobID,
             Type='audio',
             **kwargs
         )
+        if 'JobsDetail' in data:
+            format_dict(data['JobsDetail'], ['Section'])
+            if 'Section' in data['JobsDetail']:
+                for section in data['JobsDetail']['Section']:
+                    if 'PornInfo' in section:
+                        format_dict(section['PornInfo'], ['Keywords'])
+                    if 'TerrorismInfo' in section:
+                        format_dict(section['TerrorismInfo'], ['Keywords'])
+                    if 'PoliticsInfo' in section:
+                        format_dict(section['PoliticsInfo'], ['Keywords'])
+                    if 'AdsInfo' in section:
+                        format_dict(section['AdsInfo'], ['Keywords'])
+
+        return data
 
     def ci_auditing_text_submit(self, Bucket, Key, DetectType, Content=None, Callback=None,  BizType=None, **kwargs):
         """提交文本审核任务接口 https://cloud.tencent.com/document/product/460/56285
@@ -4758,7 +4829,7 @@ def ci_auditing_text_submit(self, Bucket, Key, DetectType, Content=None, Callbac
         if Callback:
             conf['Callback'] = Callback
 
-        return self.ci_auditing_submit_common(
+        data = self.ci_auditing_submit_common(
             Bucket=Bucket,
             Key=Key,
             Type='text',
@@ -4769,6 +4840,11 @@ def ci_auditing_text_submit(self, Bucket, Key, DetectType, Content=None, Callbac
             **kwargs
         )
 
+        if 'JobsDetail' in data:
+            format_dict(data['JobsDetail'], ['Section'])
+
+        return data
+
     def ci_auditing_text_query(self, Bucket, JobID, **kwargs):
         """查询文本审核任务接口 https://cloud.tencent.com/document/product/460/56284
 
@@ -4789,19 +4865,23 @@ def ci_auditing_text_query(self, Bucket, JobID, **kwargs):
             print response
         """
 
-        return self.ci_auditing_query_common(
+        data = self.ci_auditing_query_common(
             Bucket=Bucket,
             JobID=JobID,
             Type='text',
             **kwargs
         )
+        if 'JobsDetail' in data:
+            format_dict(data['JobsDetail'], ['Section'])
+        return data
 
-    def ci_auditing_document_submit(self, Bucket, Url, DetectType, Type=None, Callback=None,  BizType=None, **kwargs):
+    def ci_auditing_document_submit(self, Bucket, Url, DetectType, Key=None, Type=None, Callback=None,  BizType=None, **kwargs):
         """提交文档审核任务接口 https://cloud.tencent.com/document/product/460/59380
 
         :param Bucket(string): 存储桶名称.
         :param Url(string): 文档文件的链接地址，例如 http://www.example.com/doctest.doc
         :param DetectType(int): 内容识别标志,位计算 1:porn, 2:terrorist, 4:politics, 8:ads
+        :param Key(string): 存储在 COS 存储桶中的文件名称，例如在目录 test 中的文件test.doc，则文件名称为 test/test. Key 和 Url 只能选择其中一种。
         :param Type(string): 指定文档文件的类型，如未指定则默认以文件的后缀为类型。
                              如果文件没有后缀，该字段必须指定，否则会审核失败。例如：doc、docx、ppt、pptx 等
         :param Callback(string): 回调地址，以http://或者https://开头的地址。
@@ -4822,7 +4902,11 @@ def ci_auditing_document_submit(self, Bucket, Url, DetectType, Type=None, Callba
             print response
         """
 
-        Input = {'Url': Url}
+        Input = {}
+        if Url is not None:
+            Input['Url'] = Url
+        if Key is not None:
+            Input['Object'] = Key
         if Type:
             Input['Type'] = Type
 
@@ -4863,13 +4947,35 @@ def ci_auditing_document_query(self, Bucket, JobID, **kwargs):
             print response
         """
 
-        return self.ci_auditing_query_common(
+        data = self.ci_auditing_query_common(
             Bucket=Bucket,
             JobID=JobID,
             Type='document',
             **kwargs
         )
 
+        if 'JobsDetail' in data and 'PageSegment' in data['JobsDetail'] and 'Results' in data['JobsDetail']['PageSegment']:
+            format_dict(data['JobsDetail']['PageSegment'], ['Results'])
+            for resultsItem in data['JobsDetail']['PageSegment']['Results']:
+                if 'PornInfo' in resultsItem:
+                    format_dict(resultsItem['PornInfo'], ['OcrResults', 'ObjectResults'])
+                    if 'OcrResults' in resultsItem['PornInfo']:
+                        format_dict_or_list(resultsItem['PornInfo']['OcrResults'], ['Keywords'])
+                if 'TerrorismInfo' in resultsItem:
+                    format_dict(resultsItem['TerrorismInfo'], ['OcrResults', 'ObjectResults'])
+                    if 'OcrResults' in resultsItem['TerrorismInfo']:
+                        format_dict_or_list(resultsItem['TerrorismInfo']['OcrResults'], ['Keywords'])
+                if 'PoliticsInfo' in resultsItem:
+                    format_dict(resultsItem['PoliticsInfo'], ['OcrResults', 'ObjectResults'])
+                    if 'OcrResults' in resultsItem['PoliticsInfo']:
+                        format_dict_or_list(resultsItem['PoliticsInfo']['OcrResults'], ['Keywords'])
+                if 'AdsInfo' in resultsItem:
+                    format_dict(resultsItem['AdsInfo'], ['OcrResults', 'ObjectResults'])
+                    if 'OcrResults' in resultsItem['AdsInfo']:
+                        format_dict_or_list(resultsItem['AdsInfo']['OcrResults'], ['Keywords'])
+
+        return data
+
     def ci_get_media_queue(self, Bucket, **kwargs):
         """查询媒体处理队列接口 https://cloud.tencent.com/document/product/436/54045
 
diff --git a/qcloud_cos/cos_comm.py b/qcloud_cos/cos_comm.py
@@ -396,13 +396,26 @@ def format_dict(data, key_lst):
         return data
     for key in key_lst:
         # 将dict转为list，保持一致
-        if key in data and (isinstance(data[key], dict) or isinstance(data[key], str)):
+        if key in data and (isinstance(data[key], dict) or isinstance(data[key], string_types)):
             lst = []
             lst.append(data[key])
             data[key] = lst
     return data
 
 
+def format_dict_or_list(data, key_lst):
+    """转换返回dict或list中的可重复字段为list"""
+    if not ((isinstance(data, list) or isinstance(data, dict)) and isinstance(key_lst, list)):
+        return data
+    if isinstance(data, dict):
+        return format_dict(data, key_lst)
+
+    for data_item in data:
+        format_dict(data_item, key_lst)
+
+    return data
+
+
 def decode_result(data, key_lst, multi_key_list):
     """decode结果中的字段"""
     for key in key_lst:
@@ -471,6 +484,8 @@ class CiDetectType():
     TERRORIST = 2
     POLITICS = 4
     ADS = 8
+    ILLEGAL = 16
+    ABUSE = 32
 
     @staticmethod
     def get_detect_type_str(DetectType):
@@ -490,6 +505,14 @@ def get_detect_type_str(DetectType):
             if len(detect_type) > 0:
                 detect_type += ','
             detect_type += 'Ads'
+        if DetectType & CiDetectType.ILLEGAL > 0:
+            if len(detect_type) > 0:
+                detect_type += ','
+            detect_type += 'Illegal'
+        if DetectType & CiDetectType.ABUSE > 0:
+            if len(detect_type) > 0:
+                detect_type += ','
+            detect_type += 'Abuse'
 
         return detect_type