|
| 1 | +#Perform transcription on given audio file using the authentication keys in the given JSON file. THIS FUNCTION IS FOR USE WITH PYTHON ONLY. MATLAB FUNCTION IS BELOW. |
| 2 | +#Required variables: |
| 3 | +#bucket_path : String. Path to the file in the bucket that you want transcribed. |
| 4 | +#GOOGLE_AUTH_JSON_PATH : String. Path to the local JSON file containing the authenitcation credentials for Google Cloud |
| 5 | +def transcribe_gcs(bucket_path, GOOGLE_AUTH_JSON_PATH): |
| 6 | + from google.cloud import speech |
| 7 | + import os |
| 8 | + os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = GOOGLE_AUTH_JSON_PATH |
| 9 | + client = speech.SpeechClient() |
| 10 | + audio = speech.RecognitionAudio(uri=bucket_path) |
| 11 | + config = speech.RecognitionConfig( |
| 12 | + encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, |
| 13 | + language_code="en-US", |
| 14 | + enable_word_time_offsets=True, |
| 15 | + enable_automatic_punctuation=True, |
| 16 | + ) |
| 17 | + |
| 18 | + operation = client.long_running_recognize(config=config, audio=audio) |
| 19 | + print("Waiting for operation to complete...") |
| 20 | + response = operation.result(timeout=90) |
| 21 | + |
| 22 | + for result in response.results: |
| 23 | + print(u"Transcript: {}".format(result.alternatives[0].transcript)) |
| 24 | + print("Confidence: {}".format(result.alternatives[0].confidence)) |
| 25 | + |
| 26 | + result = response.results[-1] |
| 27 | + words_info = result.alternatives[0].words |
| 28 | + words_list = list(words_info) |
| 29 | + words_str_list = [] |
| 30 | + #words_str = words_str.split(",") |
| 31 | + |
| 32 | + for i in range(len(words_list)): |
| 33 | + words_str_list.append(str(words_list[i])) |
| 34 | + words_str_list = [n.replace("\n", "") for n in words_str_list] |
| 35 | + #print(words_str_list) |
| 36 | + final_words = "" |
| 37 | + for l in words_str_list: |
| 38 | + final_words += (l + "\n") |
| 39 | + print(final_words) |
| 40 | + |
| 41 | + |
| 42 | + return response, words_str_list |
| 43 | + |
| 44 | +#Perform transcription on given audio file using the authentication keys in the given JSON file. Uses diarization to attempt to differentiate multiple speakers. |
| 45 | +#Required variables: |
| 46 | +#bucket_path : String. Path to the file in the bucket that you want transcribed. |
| 47 | +#GOOGLE_AUTH_JSON_PATH : String. Path to the local JSON file containing the authenitcation credentials for Google Cloud. |
| 48 | +#num_speakers : String or Int. The number of speakers that diarization should expect. Should be passed as a whole number integer in either string or int format. |
| 49 | +#save_cloud: Boolean. Indicates whether to save the transcription output to Google Cloud. |
| 50 | +def transcribe_gcs_multi(bucket_path, GOOGLE_AUTH_JSON_PATH, num_speakers, save_cloud): |
| 51 | + #Import things. I imported re on the hope that it would encourage me to learn regex. This did not happen. |
| 52 | + from google.cloud import speech_v1p1beta1 as speech |
| 53 | + import os |
| 54 | + import re |
| 55 | + |
| 56 | + #Set important variables. |
| 57 | + os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = GOOGLE_AUTH_JSON_PATH |
| 58 | + client = speech.SpeechClient() |
| 59 | + audio = speech.RecognitionAudio(uri=bucket_path) |
| 60 | + num_speakers = int(num_speakers) |
| 61 | + dirname = os.path.dirname(os.path.abspath(__file__)) |
| 62 | + FILE_PATH_SAVE_TXT = os.path.join(dirname, 'data/temp.txt') |
| 63 | + |
| 64 | + #Make speech recognition configuration for transcription. |
| 65 | + config = speech.RecognitionConfig( |
| 66 | + encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, |
| 67 | + language_code="en-US", |
| 68 | + enable_word_time_offsets=True, |
| 69 | + enable_speaker_diarization=True, |
| 70 | + diarization_speaker_count=num_speakers, |
| 71 | + enable_automatic_punctuation=True, |
| 72 | + model='video' |
| 73 | + ) |
| 74 | + |
| 75 | + #Creates a path for the transcribed file to be exported to the bucket. FEATURE NOT IMPLEMENTED YET. (But maybe someday.) |
| 76 | + output_path = bucket_path[:-4] + "_TRANSCRIPT.txt" |
| 77 | + output_config_x = speech.TranscriptOutputConfig( |
| 78 | + gcs_uri=output_path |
| 79 | + ) |
| 80 | + print(output_path) |
| 81 | + |
| 82 | + #Performs the transcription. |
| 83 | + operation = client.long_running_recognize(config=config, audio=audio) |
| 84 | + print("Waiting for operation to complete...") |
| 85 | + response = operation.result(timeout=1800) |
| 86 | + |
| 87 | + #Prints results. |
| 88 | + for result in response.results: |
| 89 | + print(u"Transcript: {}".format(result.alternatives[0].transcript)) |
| 90 | + print("Confidence: {}".format(result.alternatives[0].confidence)) |
| 91 | + |
| 92 | + #Prepares things to pass to the translate_output function for translation into a list of lists. |
| 93 | + result = response.results[-1] |
| 94 | + words_info = result.alternatives[0].words |
| 95 | + words_list = list(words_info) |
| 96 | + words_str_list = [] |
| 97 | + #words_str = words_str.split(",") |
| 98 | + for i in range(len(words_list)): |
| 99 | + words_str_list.append(str(words_list[i])) |
| 100 | + words_str_list = [n.replace("\n", "") for n in words_str_list] |
| 101 | + lofl_less = translate_output(words_str_list) |
| 102 | + |
| 103 | + #If the user chose to save the transcription output to the google cloud bucket, this takes the transcription and writes it to a temporary text file that is then uploaded to the bucket and then deleted. |
| 104 | + bucket_list = bucket_path.split("/") |
| 105 | + #print(bucket_list) |
| 106 | + bucket_name = bucket_list[2] |
| 107 | + destination_blob_name = "TranscriptionOutput/" + bucket_list[-1][:-4] + "_OUTPUT" |
| 108 | + #print(bucket_name, destination_blob_name) |
| 109 | + |
| 110 | + if save_cloud: |
| 111 | + with open(FILE_PATH_SAVE_TXT, 'w') as writer: |
| 112 | + for thing in range(len(lofl_less)): |
| 113 | + for line in lofl_less[thing]: |
| 114 | + writer.write(line + " ") |
| 115 | + writer.write("\n") |
| 116 | + upload_blob(bucket_name, FILE_PATH_SAVE_TXT, destination_blob_name, GOOGLE_AUTH_JSON_PATH) |
| 117 | + os.remove(FILE_PATH_SAVE_TXT) |
| 118 | + |
| 119 | + #Return the relevant values. |
| 120 | + return response, words_str_list, lofl_less |
| 121 | + |
| 122 | +#Upload a given file to the specified Google Cloud bucket so it can be accessed by longrunningrecognizer. |
| 123 | +#Required variables: |
| 124 | +#bucket_name : String. Name of the bucket to upload to. Google Cloud bucket names are unique identifiers. Name should be JUST the name. Not gs://whatever-the-name-is. Just whatever-the-name-is. |
| 125 | +#source_file_name : String. Path to the file that you want uploaded to the bucket. Should be full path including drive letter just to be safe. (e.g. "C:/Users/Your-Username/Your-path-to-file/your-file.filextension") |
| 126 | +#destination_blob_name : String. Name of the file (or blob as Google likes to call it) once uploaded to the bucket. This can be different from the original file name or the same as the original file name. Including file extensions in the name is optional. |
| 127 | +#GOOGLE_AUTH_JSON_PATH : String. Path to the local JSON file containing the authenitcation credentials for Google Cloud. |
| 128 | +def upload_blob(bucket_name, source_file_name, destination_blob_name, GOOGLE_AUTH_JSON_PATH): |
| 129 | + from google.cloud import storage |
| 130 | + import os |
| 131 | + os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = GOOGLE_AUTH_JSON_PATH |
| 132 | + storage_client = storage.Client() |
| 133 | + bucket = storage_client.bucket(bucket_name) |
| 134 | + blob = bucket.blob(destination_blob_name) |
| 135 | + |
| 136 | + blob.upload_from_filename(source_file_name) |
| 137 | + |
| 138 | + path_to_bucket_file = "gs://" + bucket_name + "/" + destination_blob_name |
| 139 | + |
| 140 | + print("File {} uploaded as {}".format( source_file_name, destination_blob_name)) |
| 141 | + print("Path to new bucket file: {}".format(path_to_bucket_file)) |
| 142 | + |
| 143 | +#Delete a given file from the specified Google Cloud bucket. |
| 144 | +#Required variables: |
| 145 | +#bucket_name : String. Name of the bucket to upload to. Google Cloud bucket names are unique identifiers. Name should be JUST the name. Not gs://whatever-the-name-is. Just whatever-the-name-is. |
| 146 | +#blob_name : String. Name of the file (or blob as Google likes to call it) that you wish to delete from the bucket. Assuming you uploaded directly to bucket and not to a subfolder, you can pass just the name of the blob without any path. |
| 147 | +#GOOGLE_AUTH_JSON_PATH : String. Path to the local JSON file containing the authenitcation credentials for Google Cloud. |
| 148 | +def delete_blob(bucket_name, blob_name, GOOGLE_AUTH_JSON_PATH): |
| 149 | + from google.cloud import storage |
| 150 | + import os |
| 151 | + os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = GOOGLE_AUTH_JSON_PATH |
| 152 | + storage_client = storage.Client() |
| 153 | + bucket = storage_client.get_bucket(bucket_name) |
| 154 | + blob = bucket.blob(blob_name) |
| 155 | + blob.delete() |
| 156 | + print("File {} deleted from bucket {}.".format(blob_name, bucket_name)) |
| 157 | + |
| 158 | +#Translates the mess of values in Google's output to a list of lists for matlab to make sense of. |
| 159 | +#This solution is messy as hell and likely to break at some point in the future. Good luck. |
| 160 | +#If some wonderful person who understands regex wants to make this loads better and less complicated, feel free. |
| 161 | +#Required variables: |
| 162 | +#words_str_list : List. List of words as strings created in the transcribe_gcs_multi function. |
| 163 | +def translate_output(words_str_list): |
| 164 | + lofl = [] |
| 165 | + for n in words_str_list: |
| 166 | + for k in range(len(n)): |
| 167 | + if n[k] == "{": |
| 168 | + lofl.append(['Start Time']) |
| 169 | + for m in range(len(n)): |
| 170 | + if n[m] == "}": |
| 171 | + lofl[(words_str_list.index(n))].append(n[k:m+1]) |
| 172 | + elif n[k:k+5] == "word:": |
| 173 | + lofl[(words_str_list.index(n))].append("Word: ") |
| 174 | + for m in range(len(n)): |
| 175 | + if n[m:m+11] == 'speaker_tag': |
| 176 | + lofl[(words_str_list.index(n)) |
| 177 | + ][5] += "{}".format(n[k+7:m-1]) |
| 178 | + lofl[(words_str_list.index(n))].append("Speaker: ") |
| 179 | + lofl[(words_str_list.index(n)) |
| 180 | + ][6] += "{}".format(n[m+12:]) |
| 181 | + lofl_less = [] |
| 182 | + for d in range(len(lofl)): |
| 183 | + if '' in lofl[d]: |
| 184 | + lofl[d].remove('') |
| 185 | + lofl[d].pop(1) |
| 186 | + lofl[d].pop(2) |
| 187 | + del lofl[len(words_str_list):] |
| 188 | + for w in range(len(lofl)): |
| 189 | + for q in range(len(lofl[w])): |
| 190 | + lofl[w][q] = lofl[w][q].strip().replace( |
| 191 | + "{", "").replace("}", "|").replace(" ", "") |
| 192 | + if lofl[w][q][-1] == "|": |
| 193 | + lofl[w][q] = lofl[w][q][:-1] |
| 194 | + lofl[w] = [lofl[w][x].split("|") for x in range(len(lofl[w]))] |
| 195 | + lofl[w][0][0] += ": {}".format(lofl[w][1][0]) |
| 196 | + lofl[w][1].pop(0) |
| 197 | + lofl_less.append([]) |
| 198 | + for z in range(len(lofl[w])): |
| 199 | + lofl_less[w].append(lofl[w][z][0]) |
| 200 | + seconds_yn = False |
| 201 | + other_seconds_yn = False |
| 202 | + for b in range(len(lofl_less)): |
| 203 | + for character in range(len(lofl_less[b][0])): |
| 204 | + if lofl_less[b][0][character:character+8] == "seconds:": |
| 205 | + seconds_yn = True |
| 206 | + for end_character in range(len(lofl_less[b][0])): |
| 207 | + if lofl_less[b][0][end_character:end_character+6] == "nanos:": |
| 208 | + lofl_less[b][0] = lofl_less[b][0].replace( |
| 209 | + "seconds:", "").replace("nanos:", ".") |
| 210 | + else: |
| 211 | + lofl_less[b][0] = lofl_less[b][0].replace( |
| 212 | + "seconds:", "") |
| 213 | + if not seconds_yn: |
| 214 | + lofl_less[b][0] = lofl_less[b][0].replace("nanos:", ".") |
| 215 | + |
| 216 | + for other_character in range(len(lofl_less[b][1])): |
| 217 | + if lofl_less[b][1][other_character:other_character+16] == "end_timeseconds:": |
| 218 | + other_seconds_yn = True |
| 219 | + for other_end_character in range(len(lofl_less[b][1])): |
| 220 | + if lofl_less[b][1][other_end_character:other_end_character+6] == "nanos:": |
| 221 | + lofl_less[b][1] = lofl_less[b][1].replace( |
| 222 | + "end_timeseconds:", "EndTime: ").replace("nanos:", ".") |
| 223 | + else: |
| 224 | + lofl_less[b][1] = lofl_less[b][1].replace( |
| 225 | + "end_timeseconds:", "EndTime: ") |
| 226 | + if not other_seconds_yn: |
| 227 | + lofl_less[b][1] = lofl_less[b][1].replace( |
| 228 | + "end_timenanos:", "EndTime: .") |
| 229 | + |
| 230 | + return lofl_less |
| 231 | + |
| 232 | +#Use to test functions and changes. Be careful when enabling this. If you leave it enabled and try to run the MATLAB script, it will call this line before running MATLAB's commands. This can greatly increase the amount of time it takes to run and can muck up the output. |
| 233 | +#transcribe_gcs_multi('gs://acnlab-audio-files/RhysTest1_mono.wav', 'C:/Users/Research/Dropbox/StudentAndStaffFolders/Current/RhysSwitzer/Python Scripts/AudacityWordSplit/data/transcriptionprogram-a7df078889c7.json', '2', True) |
0 commit comments