Skip to content

Commit e21bc35

Browse files
committed
Fix conflict
1 parent d6b43a2 commit e21bc35

File tree

1 file changed

+249
-0
lines changed

1 file changed

+249
-0
lines changed

mplc/dataset.py

Lines changed: 249 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,20 @@
1313

1414
import numpy as np
1515
import pandas as pd
16+
<<<<<<< HEAD
17+
=======
18+
from joblib import dump, load
19+
from keras.datasets import cifar10, cifar100, mnist, imdb
20+
from keras.layers import Activation
21+
from keras.layers import Conv2D, GlobalAveragePooling2D, MaxPooling2D
22+
from keras.layers import Dense, Dropout
23+
from keras.layers import Embedding, Conv1D, MaxPooling1D, Flatten
24+
from keras.losses import categorical_crossentropy
25+
from keras.models import Sequential
26+
from keras.optimizers import RMSprop
27+
from keras.preprocessing import sequence
28+
from keras.utils import to_categorical
29+
>>>>>>> 0bb0faf (Add pytorch model for cifar100 [WIP])
1630
from librosa import load as wav_load
1731
from librosa.feature import mfcc
1832
from loguru import logger
@@ -194,6 +208,241 @@ def generate_new_model(self):
194208
return model
195209

196210

211+
class Cifar100(Dataset):
212+
def __init__(self):
213+
self.input_shape = (32, 32, 3)
214+
self.num_classes = 100
215+
x_test, x_train, y_test, y_train = self.load_data()
216+
217+
super(Cifar10, self).__init__(dataset_name='cifar100',
218+
num_classes=self.num_classes,
219+
input_shape=self.input_shape,
220+
x_train=x_train,
221+
y_train=y_train,
222+
x_test=x_test,
223+
y_test=y_test)
224+
225+
def load_data(self):
226+
attempts = 0
227+
while True:
228+
try:
229+
(x_train, y_train), (x_test, y_test) = cifar100.load_data()
230+
break
231+
except (HTTPError, URLError) as e:
232+
if hasattr(e, 'code'):
233+
temp = e.code
234+
else:
235+
temp = e.errno
236+
logger.debug(
237+
f'URL fetch failure on '
238+
f'https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz : '
239+
f'{temp} -- {e.reason}')
240+
if attempts < constants.NUMBER_OF_DOWNLOAD_ATTEMPTS:
241+
sleep(2)
242+
attempts += 1
243+
else:
244+
raise
245+
246+
# Pre-process inputs
247+
x_train = self.preprocess_dataset_inputs(x_train)
248+
x_test = self.preprocess_dataset_inputs(x_test)
249+
y_train = self.preprocess_dataset_labels(y_train)
250+
y_test = self.preprocess_dataset_labels(y_test)
251+
return x_test, x_train, y_test, y_train
252+
253+
# Data samples pre-processing method for inputs
254+
@staticmethod
255+
def preprocess_dataset_inputs(x):
256+
x = x.astype("float32")
257+
x /= 255
258+
259+
return x
260+
261+
# Data samples pre-processing method for labels
262+
def preprocess_dataset_labels(self, y):
263+
y = to_categorical(y, self.num_classes)
264+
265+
return y
266+
267+
# Model structure and generation
268+
def generate_new_model(self):
269+
"""Return a CNN model from scratch based on given batch_size"""
270+
271+
model = models.vgg16()
272+
273+
# TODO: Add new model
274+
# model = Sequential()
275+
# model.add(Conv2D(32, (3, 3), padding='same', input_shape=self.input_shape))
276+
# model.add(Activation('relu'))
277+
# model.add(Conv2D(32, (3, 3)))
278+
# model.add(Activation('relu'))
279+
# model.add(MaxPooling2D(pool_size=(2, 2)))
280+
# model.add(Dropout(0.25))
281+
282+
# model.add(Conv2D(64, (3, 3), padding='same'))
283+
# model.add(Activation('relu'))
284+
# model.add(Conv2D(64, (3, 3)))
285+
# model.add(Activation('relu'))
286+
# model.add(MaxPooling2D(pool_size=(2, 2)))
287+
# model.add(Dropout(0.25))
288+
289+
# model.add(Flatten())
290+
# model.add(Dense(512))
291+
# model.add(Activation('relu'))
292+
# model.add(Dropout(0.5))
293+
# model.add(Dense(self.num_classes))
294+
# model.add(Activation('softmax'))
295+
296+
# # initiate RMSprop optimizer
297+
# opt = RMSprop(learning_rate=0.0001, decay=1e-6)
298+
299+
# # Let's train the model using RMSprop
300+
# model.compile(loss='categorical_crossentropy',
301+
# optimizer=opt,
302+
# metrics=['accuracy'])
303+
304+
return model
305+
306+
# train, test, val splits
307+
@staticmethod
308+
def train_test_split_local(x, y):
309+
return train_test_split(x, y, test_size=0.1, random_state=42)
310+
311+
@staticmethod
312+
def train_val_split_local(x, y):
313+
return train_test_split(x, y, test_size=0.1, random_state=42)
314+
315+
316+
class cifar100_dataset(torch.utils.data.Dataset):
317+
318+
def __init__(self, x, y, transform=[]):
319+
self.x = x
320+
self.y = y
321+
self.transform = transform
322+
323+
def __len__(self):
324+
return len(self.x)
325+
326+
def __getitem__(self, index):
327+
328+
x = self.x[index]
329+
y = torch.tensor(int(self.y[index]))
330+
331+
if self.transform:
332+
x = self.transform(x)
333+
334+
return x, y
335+
336+
337+
class ModelPytorch(torchvision.model.vgg16):
338+
def __init__(self, optimizer, criterion):
339+
super(Cifar100.ModelPytorch, self).__init__()
340+
self.optimizer = optimizer
341+
self.criterion = criterion
342+
343+
def fit(self, x_train, y_train, batch_size, validation_data, epochs=1, verbose=False):
344+
train_data = cifar100_dataset(x_train, y_train)
345+
train_loader = data.DataLoader(train_data, batch_size=batch_size, shuffle=True)
346+
347+
history = super(Cifar100.ModelPytorch, self).train()
348+
349+
for batch_idx, (image, label) in enumerate(trainloader):
350+
images, labels = torch.autograd.Variable(image), torch.autograd.Variable(label)
351+
352+
outputs = model(images)
353+
loss = self.criterion(outputs, labels)
354+
355+
self.optimizer.zero_grad()
356+
loss.backward()
357+
self.optimizer.step()
358+
359+
[loss, acc] = self.evaluate(x_train, y_train)
360+
[val_loss, val_acc] = self.evaluate(*validation_data)
361+
# Mimic Keras' history
362+
history.history = {
363+
'loss': [loss],
364+
'accuracy': [acc],
365+
'val_loss': [val_loss],
366+
'val_accuracy': [val_acc]
367+
}
368+
369+
return history
370+
371+
def evaluate(self, x_eval, y_eval, **kwargs):
372+
test_data = cifar100_dataset(x_eval, y_eval)
373+
test_loader = data.DataLoader(test_data, batch_size=batch_size, shuffle=True)
374+
375+
self.eval()
376+
377+
with torch.no_grad():
378+
379+
y_true_np = []
380+
y_pred_np = []
381+
count=0
382+
for i, (images, labels) in enumerate(validation_loader):
383+
count+= 1
384+
N = images.size(0)
385+
386+
images = torch.autograd.Variable(images)
387+
labels = torch.autograd.Variable(labels)
388+
389+
outputs = model_ft(images)
390+
391+
predictions = outputs.max(1, keepdim=True)[1]
392+
393+
val_loss =+ criterion(outputs, labels).item()
394+
val_acc =+ (predictions.eq(labels.view_as(predictions)).sum().item() / N)
395+
396+
model_evaluation = [val_loss/count, val_acc/count]
397+
398+
return model_evaluation
399+
400+
#TODO
401+
# def save_weights(self, path):
402+
# if self.coef_ is None:
403+
# raise ValueError(
404+
# 'Coef and intercept are set to None, it seems the model has not been fit properly.')
405+
# if '.h5' in path:
406+
# logger.debug('Automatically switch file format from .h5 to .npy')
407+
# path.replace('.h5', '.npy')
408+
# np.save(path, self.get_weights())
409+
410+
# def load_weights(self, path):
411+
# if '.h5' in path:
412+
# logger.debug('Automatically switch file format from .h5 to .npy')
413+
# path.replace('.h5', '.npy')
414+
# weights = load(path)
415+
# self.set_weights(weights)
416+
417+
# def get_weights(self):
418+
# if self.coef_ is None:
419+
# return None
420+
# else:
421+
# return np.concatenate((self.coef_, self.intercept_.reshape(1, 1)), axis=1)
422+
423+
# def set_weights(self, weights):
424+
# if weights is None:
425+
# self.coef_ = None
426+
# self.intercept_ = None
427+
# else:
428+
# self.coef_ = np.array(weights[0][:-1]).reshape(1, -1)
429+
# self.intercept_ = np.array(weights[0][-1]).reshape(1)
430+
431+
# def save_model(self, path):
432+
# if '.h5' in path:
433+
# logger.debug('Automatically switch file format from .h5 to .joblib')
434+
# path.replace('.h5', '.joblib')
435+
# dump(self, path)
436+
437+
# @staticmethod
438+
# def load_model(path):
439+
# if '.h5' in path:
440+
# logger.debug('Automatically switch file format from .h5 to .joblib')
441+
# path.replace('.h5', '.joblib')
442+
# return load(path)
443+
444+
445+
197446
class Titanic(Dataset):
198447
def __init__(self, proportion=1,
199448
val_proportion=0.1):

0 commit comments

Comments
 (0)