-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathdata_processing.py
More file actions
67 lines (56 loc) · 1.92 KB
/
data_processing.py
File metadata and controls
67 lines (56 loc) · 1.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
'''
Script that performs signal preprocessing on audio files so that they are ready to be used by the model
Processed data for each test set is written to the appropriate .csv file
'''
import os
import numpy as np
from tqdm import tqdm
import read_wave as rw
import spectrogram as sp
# Stored wave file locations
TRAIN_DIR = 'train'
TEST_DIR = 'test'
VALIDATION_DIR = 'validation'
# Feaeture outputs
TRAIN_OUT = 'train_data.csv'
TEST_OUT = 'test_data.csv'
VALIDATION_OUT = 'validation_data.csv'
CLASSES = ['Claps',
'Crashes',
'HiHats',
'Kicks',
'Snares']
directories = [TRAIN_DIR, TEST_DIR, VALIDATION_DIR]
out_files = [TRAIN_OUT, TEST_OUT, VALIDATION_OUT]
# spectrogram parameters
frame_length=0.025
frame_offset=0.01
lowFreq=300
hiFreq=10000
numFilters=26
numFrames=75
numDataPoints = numFilters * numFrames
for dir, out in zip(directories, out_files):
print("Writing " + dir + " data to: " + out)
# Get filepaths of samples for each class
class_files = {}
for c in CLASSES:
files = os.listdir(dir + '/' + c)
class_files[c] = files
# Read, process, and write data of each sample to a csv file
data = []
for c in class_files:
print("Class being processed: " + c)
files = class_files[c]
paths = []
for file in files:
paths.append(dir + '/' + c + '/' + file)
for path in tqdm(paths):
signal, sr = rw.read_wave(path, normalize=True, length=1, threshold=0.001)
spec = sp.get_spectrogram(signal, sr, frame_length=frame_length, frame_offset=frame_offset, lowFreq=lowFreq, hiFreq=hiFreq, numFilters=numFilters, numFrames=numFrames)
dataToWrite = np.append(np.array([c]), spec.flatten())
data.append(dataToWrite)
header = ['Class']
header.extend(['D' + str(i) for i in range(numDataPoints)])
df = pd.DataFrame(columns=header, data=data)
df.to_csv(out)