Merge pull request #36 from jsingh811/audio-read

jsingh811 · web-flow · commit b3568bae947d · 2021-06-10T16:01:19.000-07:00
Audio read
diff --git a/README.md b/README.md
@@ -185,6 +185,25 @@ python pyAudioProcessing/extract_features.py -f "data_samples/testing"  -feats "
 ```  
 Features extracted get saved in `audio_features.json`.  
 
+## Audio format conversion
+
+You can convert you audio in `.mp4`, `.mp3`, `.m4a` and `.aac` to `.wav`. This will allow you to use audio feature generation and classification functionalities.
+
+In order to convert your audios, the following code sample can be used.  
+
+```
+from pyAudioProcessing.convert_audio import convert_files_to_wav
+
+# dir_path is the path to the directory/folder on your machine containing audio files
+dir_path = "data/mp4_files"
+
+# simple change audio_format to "mp3", "m4a" or "acc" depending on the format
+# of audio that you are trying to convert to wav
+convert_files_to_wav(dir_path, audio_format="mp4")
+
+# the converted wav files will be saved in the same dir_path location.
+
+```
 
 ## Author  
 
diff --git a/pyAudioProcessing/convert_audio.py b/pyAudioProcessing/convert_audio.py
@@ -0,0 +1,137 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Jun 10 15:23:55 2021
+
+@author: jsingh
+"""
+# Imports
+
+import os
+import glob
+
+from pydub import AudioSegment
+
+
+# Functions
+
+def convert_from_m4a(file_path):
+    """
+    Converts m4a audio into wav format.
+    """
+    try:
+        track = AudioSegment.from_file(file_path, "m4a")
+        file_handle = track.export(
+            file_path.replace(".m4a", ".wav"), format='wav'
+        )
+    except FileNotFoundError:
+        print("{} does not appear to be valid. Please check.")
+    except Exception as e:
+        print(e)
+
+
+def convert_from_mp3(file_path):
+    """
+    Converts mp3 audio into wav format.
+    """
+    try:
+        track = AudioSegment.from_file(file_path, "mp3")
+        file_handle = track.export(
+            file_path.replace(".mp3", ".wav"), format='wav'
+        )
+    except FileNotFoundError:
+        print("{} does not appear to be valid. Please check.")
+    except Exception as e:
+        print(e)
+
+
+def convert_from_mp4(file_path):
+    """
+    Converts mp4 audio into wav format.
+    """
+    try:
+        track = AudioSegment.from_file(file_path, "mp4")
+        file_handle = track.export(
+            file_path.replace(".mp4", ".wav"), format='wav'
+        )
+    except FileNotFoundError:
+        print("{} does not appear to be valid. Please check.")
+    except Exception as e:
+        print(e)
+
+
+def convert_from_aac(file_path):
+    """
+    Converts aac audio into wav format.
+    """
+    try:
+        track = AudioSegment.from_file(file_path, "aac")
+        file_handle = track.export(
+            file_path.replace(".aac", ".wav"), format='wav'
+        )
+    except FileNotFoundError:
+        print("{} does not appear to be valid. Please check.")
+    except Exception as e:
+        print(e)
+
+
+def convert_files_to_wav(dir_path, audio_format="m4a"):
+    """
+    Converts all the audio files in the input directory path
+    with the extension specified by audio_format input
+    into .wav audio files, and saves them in the same directory.
+    """
+    # Read contents of dir
+    # Only select files with the mentioned extension
+    files = glob.glob(os.path.join(dir_path, "*." + audio_format))
+
+    # Convert to wav
+    # The wav files save in the same dir as specified by dir_path
+    if audio_format == "m4a":
+        cntr = 0
+        for aud_file in files:
+            convert_from_m4a(aud_file)
+            cntr += 1
+        print(
+            "{} {} files converted to .wav and saved in {}".format(
+                cntr, audio_format, dir_path
+            )
+        )
+    elif audio_format == "mp3":
+        cntr = 0
+        for aud_file in files:
+            convert_from_mp3(aud_file)
+            cntr += 1
+        print(
+            "{} {} files converted to .wav and saved in {}".format(
+                cntr, audio_format, dir_path
+            )
+        )
+    elif audio_format == "aac":
+        cntr = 0
+        for aud_file in files:
+            convert_from_aac(aud_file)
+            cntr += 1
+        print(
+            "{} {} files converted to .wav and saved in {}".format(
+                cntr, audio_format, dir_path
+            )
+        )
+    elif audio_format == "mp4":
+        cntr = 0
+        for aud_file in files:
+            convert_from_mp4(aud_file)
+            cntr += 1
+        print(
+            "{} {} files converted to .wav and saved in {}".format(
+                cntr, audio_format, dir_path
+            )
+        )
+    else:
+        print(
+            "File format {} is not in supported types (mp3, mp4, m4a, aac)".format(
+                audio_format
+            )
+        )
+    
+    
diff --git a/pyAudioProcessing/run_classification.py b/pyAudioProcessing/run_classification.py
@@ -132,7 +132,7 @@ def train_and_classify(
     Train on the data under folder_path or classify the data in folder path
     using features specified by feature_names and the specified classifier.
     """
-    # Get all direcotiers under folder_path
+    # Get all directories under folder_path
     data_dirs = [x[0] for x in os.walk(folder_path)][1:]
     print(
         "\n There are {} classes in the specified data folder\n".format(