Skip to content

Commit 655376b

Browse files
committed
Remove model from dataset
1 parent e21bc35 commit 655376b

File tree

1 file changed

+8
-185
lines changed

1 file changed

+8
-185
lines changed

mplc/dataset.py

Lines changed: 8 additions & 185 deletions
Original file line numberDiff line numberDiff line change
@@ -13,25 +13,11 @@
1313

1414
import numpy as np
1515
import pandas as pd
16-
<<<<<<< HEAD
17-
=======
18-
from joblib import dump, load
19-
from keras.datasets import cifar10, cifar100, mnist, imdb
20-
from keras.layers import Activation
21-
from keras.layers import Conv2D, GlobalAveragePooling2D, MaxPooling2D
22-
from keras.layers import Dense, Dropout
23-
from keras.layers import Embedding, Conv1D, MaxPooling1D, Flatten
24-
from keras.losses import categorical_crossentropy
25-
from keras.models import Sequential
26-
from keras.optimizers import RMSprop
27-
from keras.preprocessing import sequence
28-
from keras.utils import to_categorical
29-
>>>>>>> 0bb0faf (Add pytorch model for cifar100 [WIP])
3016
from librosa import load as wav_load
3117
from librosa.feature import mfcc
3218
from loguru import logger
3319
from sklearn.model_selection import train_test_split
34-
from tensorflow.keras.datasets import cifar10, mnist, imdb
20+
from tensorflow.keras.datasets import cifar10, cifar100, mnist, imdb
3521
from tensorflow.keras.layers import Activation
3622
from tensorflow.keras.layers import Conv2D, GlobalAveragePooling2D, MaxPooling2D
3723
from tensorflow.keras.layers import Dense, Dropout
@@ -43,7 +29,8 @@
4329
from tensorflow.keras.utils import to_categorical
4430

4531
from . import constants
46-
from .models import LogisticRegression
32+
from .models import LogisticRegression, ModelPytorch
33+
from torchvision import models
4734

4835

4936
class Dataset(ABC):
@@ -210,11 +197,11 @@ def generate_new_model(self):
210197

211198
class Cifar100(Dataset):
212199
def __init__(self):
213-
self.input_shape = (32, 32, 3)
200+
self.input_shape = (3, 32, 32)
214201
self.num_classes = 100
215202
x_test, x_train, y_test, y_train = self.load_data()
216203

217-
super(Cifar10, self).__init__(dataset_name='cifar100',
204+
super(Cifar100, self).__init__(dataset_name='cifar100',
218205
num_classes=self.num_classes,
219206
input_shape=self.input_shape,
220207
x_train=x_train,
@@ -246,8 +233,8 @@ def load_data(self):
246233
# Pre-process inputs
247234
x_train = self.preprocess_dataset_inputs(x_train)
248235
x_test = self.preprocess_dataset_inputs(x_test)
249-
y_train = self.preprocess_dataset_labels(y_train)
250-
y_test = self.preprocess_dataset_labels(y_test)
236+
# y_train = self.preprocess_dataset_labels(y_train)
237+
# y_test = self.preprocess_dataset_labels(y_test)
251238
return x_test, x_train, y_test, y_train
252239

253240
# Data samples pre-processing method for inputs
@@ -266,41 +253,7 @@ def preprocess_dataset_labels(self, y):
266253

267254
# Model structure and generation
268255
def generate_new_model(self):
269-
"""Return a CNN model from scratch based on given batch_size"""
270-
271-
model = models.vgg16()
272-
273-
# TODO: Add new model
274-
# model = Sequential()
275-
# model.add(Conv2D(32, (3, 3), padding='same', input_shape=self.input_shape))
276-
# model.add(Activation('relu'))
277-
# model.add(Conv2D(32, (3, 3)))
278-
# model.add(Activation('relu'))
279-
# model.add(MaxPooling2D(pool_size=(2, 2)))
280-
# model.add(Dropout(0.25))
281-
282-
# model.add(Conv2D(64, (3, 3), padding='same'))
283-
# model.add(Activation('relu'))
284-
# model.add(Conv2D(64, (3, 3)))
285-
# model.add(Activation('relu'))
286-
# model.add(MaxPooling2D(pool_size=(2, 2)))
287-
# model.add(Dropout(0.25))
288-
289-
# model.add(Flatten())
290-
# model.add(Dense(512))
291-
# model.add(Activation('relu'))
292-
# model.add(Dropout(0.5))
293-
# model.add(Dense(self.num_classes))
294-
# model.add(Activation('softmax'))
295-
296-
# # initiate RMSprop optimizer
297-
# opt = RMSprop(learning_rate=0.0001, decay=1e-6)
298-
299-
# # Let's train the model using RMSprop
300-
# model.compile(loss='categorical_crossentropy',
301-
# optimizer=opt,
302-
# metrics=['accuracy'])
303-
256+
model = ModelPytorch()
304257
return model
305258

306259
# train, test, val splits
@@ -312,136 +265,6 @@ def train_test_split_local(x, y):
312265
def train_val_split_local(x, y):
313266
return train_test_split(x, y, test_size=0.1, random_state=42)
314267

315-
316-
class cifar100_dataset(torch.utils.data.Dataset):
317-
318-
def __init__(self, x, y, transform=[]):
319-
self.x = x
320-
self.y = y
321-
self.transform = transform
322-
323-
def __len__(self):
324-
return len(self.x)
325-
326-
def __getitem__(self, index):
327-
328-
x = self.x[index]
329-
y = torch.tensor(int(self.y[index]))
330-
331-
if self.transform:
332-
x = self.transform(x)
333-
334-
return x, y
335-
336-
337-
class ModelPytorch(torchvision.model.vgg16):
338-
def __init__(self, optimizer, criterion):
339-
super(Cifar100.ModelPytorch, self).__init__()
340-
self.optimizer = optimizer
341-
self.criterion = criterion
342-
343-
def fit(self, x_train, y_train, batch_size, validation_data, epochs=1, verbose=False):
344-
train_data = cifar100_dataset(x_train, y_train)
345-
train_loader = data.DataLoader(train_data, batch_size=batch_size, shuffle=True)
346-
347-
history = super(Cifar100.ModelPytorch, self).train()
348-
349-
for batch_idx, (image, label) in enumerate(trainloader):
350-
images, labels = torch.autograd.Variable(image), torch.autograd.Variable(label)
351-
352-
outputs = model(images)
353-
loss = self.criterion(outputs, labels)
354-
355-
self.optimizer.zero_grad()
356-
loss.backward()
357-
self.optimizer.step()
358-
359-
[loss, acc] = self.evaluate(x_train, y_train)
360-
[val_loss, val_acc] = self.evaluate(*validation_data)
361-
# Mimic Keras' history
362-
history.history = {
363-
'loss': [loss],
364-
'accuracy': [acc],
365-
'val_loss': [val_loss],
366-
'val_accuracy': [val_acc]
367-
}
368-
369-
return history
370-
371-
def evaluate(self, x_eval, y_eval, **kwargs):
372-
test_data = cifar100_dataset(x_eval, y_eval)
373-
test_loader = data.DataLoader(test_data, batch_size=batch_size, shuffle=True)
374-
375-
self.eval()
376-
377-
with torch.no_grad():
378-
379-
y_true_np = []
380-
y_pred_np = []
381-
count=0
382-
for i, (images, labels) in enumerate(validation_loader):
383-
count+= 1
384-
N = images.size(0)
385-
386-
images = torch.autograd.Variable(images)
387-
labels = torch.autograd.Variable(labels)
388-
389-
outputs = model_ft(images)
390-
391-
predictions = outputs.max(1, keepdim=True)[1]
392-
393-
val_loss =+ criterion(outputs, labels).item()
394-
val_acc =+ (predictions.eq(labels.view_as(predictions)).sum().item() / N)
395-
396-
model_evaluation = [val_loss/count, val_acc/count]
397-
398-
return model_evaluation
399-
400-
#TODO
401-
# def save_weights(self, path):
402-
# if self.coef_ is None:
403-
# raise ValueError(
404-
# 'Coef and intercept are set to None, it seems the model has not been fit properly.')
405-
# if '.h5' in path:
406-
# logger.debug('Automatically switch file format from .h5 to .npy')
407-
# path.replace('.h5', '.npy')
408-
# np.save(path, self.get_weights())
409-
410-
# def load_weights(self, path):
411-
# if '.h5' in path:
412-
# logger.debug('Automatically switch file format from .h5 to .npy')
413-
# path.replace('.h5', '.npy')
414-
# weights = load(path)
415-
# self.set_weights(weights)
416-
417-
# def get_weights(self):
418-
# if self.coef_ is None:
419-
# return None
420-
# else:
421-
# return np.concatenate((self.coef_, self.intercept_.reshape(1, 1)), axis=1)
422-
423-
# def set_weights(self, weights):
424-
# if weights is None:
425-
# self.coef_ = None
426-
# self.intercept_ = None
427-
# else:
428-
# self.coef_ = np.array(weights[0][:-1]).reshape(1, -1)
429-
# self.intercept_ = np.array(weights[0][-1]).reshape(1)
430-
431-
# def save_model(self, path):
432-
# if '.h5' in path:
433-
# logger.debug('Automatically switch file format from .h5 to .joblib')
434-
# path.replace('.h5', '.joblib')
435-
# dump(self, path)
436-
437-
# @staticmethod
438-
# def load_model(path):
439-
# if '.h5' in path:
440-
# logger.debug('Automatically switch file format from .h5 to .joblib')
441-
# path.replace('.h5', '.joblib')
442-
# return load(path)
443-
444-
445268

446269
class Titanic(Dataset):
447270
def __init__(self, proportion=1,

0 commit comments

Comments
 (0)