From 7256d6de927303af975468f102cc12c6877f9ea4 Mon Sep 17 00:00:00 2001 From: Jacek Czaja Date: Sat, 2 Jan 2021 18:08:44 +0100 Subject: [PATCH 1/2] Added experiment support to DeepSpeech --- README.md | 18 ++++++++++++++++++ autoload/vim_speech.vim | 4 ++-- plugin/speech_to_text_client.py | 23 ++++++++++++++++++++--- 3 files changed, 40 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 74cbb1a..0f71f9e 100644 --- a/README.md +++ b/README.md @@ -63,6 +63,24 @@ register a project with access to the "Cloud Speech API." See Google's speech-to-text demo site for more information: https://cloud.google.com/speech-to-text/ +### Deepspeech support (experimental) +vim-speech can utilize the Mozilla [deepspeech](https://github.com/mozilla/DeepSpeech) + +You need to install deepspeech: +``` +pip install deepspeech +``` + +And then download/train deepspeech model (and optionally the language model scorer). Then +let this plugin know about their location e.g. +``` +export DEEPSPEECH_MODEL= +export DEEPSPEECH_SCORER= +``` + +It may be helpful to finetune the pre-trained model with your own voice samples. +More info in [documentation](https://deepspeech.readthedocs.io/) + ## Usage Once you have figured out how to get everything installed, you can use the diff --git a/autoload/vim_speech.vim b/autoload/vim_speech.vim index d103116..6bd42ac 100644 --- a/autoload/vim_speech.vim +++ b/autoload/vim_speech.vim @@ -80,8 +80,8 @@ function! s:StartJobIfNeeded(buffer) abort return endif - if empty($GOOGLE_APPLICATION_CREDENTIALS) - throw 'GOOGLE_APPLICATION_CREDENTIALS is not set' + if empty($GOOGLE_APPLICATION_CREDENTIALS) && empty($DEEPSPEECH_MODEL) + throw 'Neither GOOGLE_APPLICATION_CREDENTIALS nor DEEPSPEECH_MODEL is set' endif let l:command = ale#Escape(g:vim_speech_dir . '/venv/bin/python') diff --git a/plugin/speech_to_text_client.py b/plugin/speech_to_text_client.py index 13a4bf7..4098af1 100755 --- a/plugin/speech_to_text_client.py +++ b/plugin/speech_to_text_client.py @@ -92,6 +92,21 @@ def stop_recording(self): return output_file.getvalue() +def transcribe_file_with_deepspeech(content): + from deepspeech import Model + import numpy as np + + if not content: + return '' + + ds = Model(os.environ.get('DEEPSPEECH_MODEL')) + scorer = os.environ.get('DEEPSPEECH_SCORER') + if scorer: + ds.enableExternalScorer(scorer) + numpy_content = np.frombuffer(content, dtype=np.int16) + transcribe = ds.stt(numpy_content) + return transcribe + def transcribe_file(content): from google.cloud import speech @@ -127,11 +142,13 @@ def stdin_has_data(): def main(): + # Stop early if the environment variable isn't set. - if not os.environ.get('GOOGLE_APPLICATION_CREDENTIALS'): + if not os.environ.get('GOOGLE_APPLICATION_CREDENTIALS') and not os.environ.get('DEEPSPEECH_MODEL'): sys.exit( 'You must set GOOGLE_APPLICATION_CREDENTIALS' - ' to your JSON credentials filename.' + 'to your JSON credentials filename or DEEPSPEECH_MODEL' + 'to trained deepspeech model.' ) client = RecordingClient() @@ -156,7 +173,7 @@ def main(): elif message == 'stop': print_and_flush('record end') audio_content = client.stop_recording() - print_and_flush('speech', transcribe_file(audio_content)) + print_and_flush('speech',transcribe_file(audio_content) if os.environ.get('GOOGLE_APPLICATION_CREDENTIALS') else transcribe_file_with_deepspeech(audio_content)) elif message == 'quit': break From 0ebe2f99f9720b0a5840a1414f20ddc4c181aafc Mon Sep 17 00:00:00 2001 From: Jacek Czaja Date: Sat, 2 Jan 2021 18:13:59 +0100 Subject: [PATCH 2/2] cosmetic fix --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 0f71f9e..2202a50 100644 --- a/README.md +++ b/README.md @@ -75,7 +75,7 @@ And then download/train deepspeech model (and optionally the language model scor let this plugin know about their location e.g. ``` export DEEPSPEECH_MODEL= -export DEEPSPEECH_SCORER= +export DEEPSPEECH_SCORER= ``` It may be helpful to finetune the pre-trained model with your own voice samples.