Sound-Sample-Classifier-AI/data_processing.py at master · davidliii/Sound-Sample-Classifier-AI · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
'''
Script that performs signal preprocessing on audio files so that they are ready to be used by the model
Processed data for each test set is written to the appropriate .csv file
'''

import os
import numpy as np
from tqdm import tqdm
import read_wave as rw
import spectrogram as sp

# Stored wave file locations
TRAIN_DIR = 'train'
TEST_DIR = 'test'
VALIDATION_DIR = 'validation'

# Feaeture outputs
TRAIN_OUT = 'train_data.csv'
TEST_OUT = 'test_data.csv'
VALIDATION_OUT = 'validation_data.csv'

CLASSES = ['Claps',
            'Crashes',
            'HiHats',
            'Kicks',
            'Snares']

directories = [TRAIN_DIR, TEST_DIR, VALIDATION_DIR]
out_files = [TRAIN_OUT, TEST_OUT, VALIDATION_OUT]

# spectrogram parameters
frame_length=0.025
frame_offset=0.01
lowFreq=300
hiFreq=10000
numFilters=26
numFrames=75

numDataPoints = numFilters * numFrames

for dir, out in zip(directories, out_files):
    print("Writing " + dir + " data to: " + out)
    # Get filepaths of samples for each class
    class_files = {}
    for c in CLASSES:
        files = os.listdir(dir + '/' + c)
        class_files[c] = files

    # Read, process, and write data of each sample to a csv file
    data = []
    for c in class_files:
        print("Class being processed: " + c)
        files = class_files[c]
        paths = []
        for file in files:
            paths.append(dir + '/' + c + '/' + file)

        for path in tqdm(paths):
            signal, sr = rw.read_wave(path, normalize=True, length=1, threshold=0.001)
            spec = sp.get_spectrogram(signal, sr, frame_length=frame_length, frame_offset=frame_offset, lowFreq=lowFreq, hiFreq=hiFreq, numFilters=numFilters, numFrames=numFrames)
            dataToWrite = np.append(np.array([c]), spec.flatten())
            data.append(dataToWrite)

    header = ['Class']
    header.extend(['D' + str(i) for i in range(numDataPoints)])
    df = pd.DataFrame(columns=header, data=data)
    df.to_csv(out)