From 7256d6de927303af975468f102cc12c6877f9ea4 Mon Sep 17 00:00:00 2001
From: Jacek Czaja <jacek.czaja@gmail.com>
Date: Sat, 2 Jan 2021 18:08:44 +0100
Subject: [PATCH 1/2] Added experiment support to DeepSpeech

---
 README.md                       | 18 ++++++++++++++++++
 autoload/vim_speech.vim         |  4 ++--
 plugin/speech_to_text_client.py | 23 ++++++++++++++++++++---
 3 files changed, 40 insertions(+), 5 deletions(-)
diff --git a/README.md b/README.md
index 74cbb1a..0f71f9e 100644
--- a/README.md
+++ b/README.md
@@ -63,6 +63,24 @@ register a project with access to the "Cloud Speech API." See Google's
 speech-to-text demo site for more information:
 https://cloud.google.com/speech-to-text/
 
+### Deepspeech support (experimental)
+vim-speech can utilize the Mozilla [deepspeech](https://github.com/mozilla/DeepSpeech)
+
+You need to install deepspeech:
+```
+pip install deepspeech
+```
+
+And then download/train deepspeech model (and optionally the language model scorer). Then
+let this plugin know about their location e.g.
+```
+export DEEPSPEECH_MODEL=<path to my deepspeech model e.g.deepspeech-0.9.3-models.pbmm>
+export DEEPSPEECH_SCORER=<path to my deepspeech model e.g. deepspeech-0.9.3-models.scorer>
+```
+
+It may be helpful to finetune the pre-trained model with your own voice samples.
+More info in [documentation](https://deepspeech.readthedocs.io/)
+
 ## Usage
 
 Once you have figured out how to get everything installed, you can use the
diff --git a/autoload/vim_speech.vim b/autoload/vim_speech.vim
index d103116..6bd42ac 100644
--- a/autoload/vim_speech.vim
+++ b/autoload/vim_speech.vim
@@ -80,8 +80,8 @@ function! s:StartJobIfNeeded(buffer) abort
         return
     endif
 
-    if empty($GOOGLE_APPLICATION_CREDENTIALS)
-        throw 'GOOGLE_APPLICATION_CREDENTIALS is not set'
+    if empty($GOOGLE_APPLICATION_CREDENTIALS) && empty($DEEPSPEECH_MODEL)
+        throw 'Neither GOOGLE_APPLICATION_CREDENTIALS nor DEEPSPEECH_MODEL is set'
     endif
 
     let l:command = ale#Escape(g:vim_speech_dir . '/venv/bin/python')
diff --git a/plugin/speech_to_text_client.py b/plugin/speech_to_text_client.py
index 13a4bf7..4098af1 100755
--- a/plugin/speech_to_text_client.py
+++ b/plugin/speech_to_text_client.py
@@ -92,6 +92,21 @@ def stop_recording(self):
 
         return output_file.getvalue()
 
+def transcribe_file_with_deepspeech(content):
+    from deepspeech import Model
+    import numpy as np
+
+    if not content:
+        return ''
+
+    ds = Model(os.environ.get('DEEPSPEECH_MODEL'))
+    scorer = os.environ.get('DEEPSPEECH_SCORER')
+    if scorer:
+        ds.enableExternalScorer(scorer)
+    numpy_content = np.frombuffer(content, dtype=np.int16)
+    transcribe = ds.stt(numpy_content)
+    return transcribe
+
 
 def transcribe_file(content):
     from google.cloud import speech
@@ -127,11 +142,13 @@ def stdin_has_data():
 
 
 def main():
+
     # Stop early if the environment variable isn't set.
-    if not os.environ.get('GOOGLE_APPLICATION_CREDENTIALS'):
+    if not os.environ.get('GOOGLE_APPLICATION_CREDENTIALS') and not os.environ.get('DEEPSPEECH_MODEL'):
         sys.exit(
             'You must set GOOGLE_APPLICATION_CREDENTIALS'
-            ' to your JSON credentials filename.'
+            'to your JSON credentials filename or DEEPSPEECH_MODEL'
+            'to trained deepspeech model.'
         )
 
     client = RecordingClient()
@@ -156,7 +173,7 @@ def main():
                 elif message == 'stop':
                     print_and_flush('record end')
                     audio_content = client.stop_recording()
-                    print_and_flush('speech', transcribe_file(audio_content))
+                    print_and_flush('speech',transcribe_file(audio_content) if os.environ.get('GOOGLE_APPLICATION_CREDENTIALS') else transcribe_file_with_deepspeech(audio_content))
                 elif message == 'quit':
                     break
 

From 0ebe2f99f9720b0a5840a1414f20ddc4c181aafc Mon Sep 17 00:00:00 2001
From: Jacek Czaja <jacek.czaja@gmail.com>
Date: Sat, 2 Jan 2021 18:13:59 +0100
Subject: [PATCH 2/2] cosmetic fix

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 0f71f9e..2202a50 100644
--- a/README.md
+++ b/README.md
@@ -75,7 +75,7 @@ And then download/train deepspeech model (and optionally the language model scor
 let this plugin know about their location e.g.
 ```
 export DEEPSPEECH_MODEL=<path to my deepspeech model e.g.deepspeech-0.9.3-models.pbmm>
-export DEEPSPEECH_SCORER=<path to my deepspeech model e.g. deepspeech-0.9.3-models.scorer>
+export DEEPSPEECH_SCORER=<path to my deepspeech scorer e.g. deepspeech-0.9.3-models.scorer>
 ```
 
 It may be helpful to finetune the pre-trained model with your own voice samples.