From 2f61a8065a31e66c7d4fc885a1e61484fb961337 Mon Sep 17 00:00:00 2001 From: erick7451 Date: Thu, 9 Jan 2020 20:16:55 -0600 Subject: [PATCH] Added two new parameters to summary: verbose and csv. Verbose shows more information from Convolutional layers and csv saves all outputs onto two csv files. The added functionalities will give the user more control over its model and a user-friendly way to extract the data --- README.md | 55 +++++++++++- torchsummary/torchsummary.py | 169 +++++++++++++++++++++++++++-------- 2 files changed, 185 insertions(+), 39 deletions(-) diff --git a/README.md b/README.md index a1d6059..732ecd8 100644 --- a/README.md +++ b/README.md @@ -191,10 +191,63 @@ Estimated Total Size (MB): 0.78 ---------------------------------------------------------------- ``` +#### verbose and CSV +```python +import torch +import torch.nn as nn +import torch.nn.functional as F +from torchsummary import summary + +class Net(nn.Module): + def __init__(self): + super(Net, self).__init__() + self.conv1 = nn.Conv2d(1, 10, kernel_size=5) + self.conv2 = nn.Conv2d(10, 20, kernel_size=5) + self.conv2_drop = nn.Dropout2d() + self.fc1 = nn.Linear(320, 50) + self.fc2 = nn.Linear(50, 10) + + def forward(self, x): + x = F.relu(F.max_pool2d(self.conv1(x), 2)) + x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2)) + x = x.view(-1, 320) + x = F.relu(self.fc1(x)) + x = F.dropout(x, training=self.training) + x = self.fc2(x) + return F.log_softmax(x, dim=1) + +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # PyTorch v0.4.0 +model = Net().to(device) + +summary(model, (1, 28, 28), verbose = True, csv = True) +``` + +``` +----------------------------------------------------------------------------------------- +Layer (type) Input Shape Output Shape kernel_size stride padding Param # +========================================================================================= +Conv2d-1 [-1, 1, 28, 28] [-1, 10, 24, 24] (5, 5) (1, 1) (0, 0) 260 +Conv2d-2 [-1, 10, 12, 12] [-1, 20, 8, 8] (5, 5) (1, 1) (0, 0) 5,020 +Dropout2d-3 [-1, 20, 8, 8] [-1, 20, 8, 8] 0 0 0 0 +Linear-4 [-1, 320] [-1, 50] 0 0 0 16,050 +Linear-5 [-1, 50] [-1, 10] 0 0 0 510 +========================================================================================= +Total params: 21,840 +Trainable params: 21,840 +Non-trainable params: 0 +----------------------------------------------------------------------------------------- +Input size (MB): 0.00 +Forward/backward pass size (MB): 0.06 +Params size (MB): 0.08 +Estimated Total Size (MB): 0.15 +----------------------------------------------------------------------------------------- +``` + +When ```csv = True```, all shown parameters are saved on two files: ```model_layers.csv``` and ```model_params.csv``` ### References - The idea for this package sparked from [this PyTorch issue](https://github.com/pytorch/pytorch/issues/2001). -- Thanks to @ncullen93 and @HTLife. +- Thanks to @ncullen93, @HTLife, and @Erick7451. - For Model Size Estimation @jacobkimmel ([details here](https://github.com/sksq96/pytorch-summary/pull/21)) diff --git a/torchsummary/torchsummary.py b/torchsummary/torchsummary.py index e8c3207..ed7017f 100644 --- a/torchsummary/torchsummary.py +++ b/torchsummary/torchsummary.py @@ -1,4 +1,5 @@ import torch +import pandas as pd import torch.nn as nn from torch.autograd import Variable @@ -6,18 +7,19 @@ import numpy as np -def summary(model, input_size, batch_size=-1, device=torch.device('cuda:0'), dtypes=None): - if dtypes == None: - dtypes = [torch.FloatTensor]*len(input_size) +def summary(model, input_size, batch_size=-1, device="cuda", verbose = False, csv = False): def register_hook(module): def hook(module, input, output): + # name of operation (ex: Conv2) class_name = str(module.__class__).split(".")[-1].split("'")[0] module_idx = len(summary) - + # Conv2-index m_key = "%s-%i" % (class_name, module_idx + 1) + # element of m_key is another OrderedDict() summary[m_key] = OrderedDict() + # Pass Key and Elements to inside OrderedDict() summary[m_key]["input_shape"] = list(input[0].size()) summary[m_key]["input_shape"][0] = batch_size if isinstance(output, (list, tuple)): @@ -35,20 +37,39 @@ def hook(module, input, output): if hasattr(module, "bias") and hasattr(module.bias, "size"): params += torch.prod(torch.LongTensor(list(module.bias.size()))) summary[m_key]["nb_params"] = params + # if class if Conv, attain: kernel_size, stride, padding + if class_name.__contains__("Conv"): + summary[m_key]["kernel_size"] = module.kernel_size + summary[m_key]["stride"] = module.stride + summary[m_key]["padding"] = module.padding if ( not isinstance(module, nn.Sequential) and not isinstance(module, nn.ModuleList) + and not (module == model) ): hooks.append(module.register_forward_hook(hook)) + + + device = device.lower() + assert device in [ + "cuda", + "cpu", + ], "Input device is not valid, please specify 'cuda' or 'cpu'" + + if device == "cuda" and torch.cuda.is_available(): + dtype = torch.cuda.FloatTensor + else: + dtype = torch.FloatTensor + # multiple inputs to the network if isinstance(input_size, tuple): input_size = [input_size] - # batch_size of 2 for batchnorm - x = [ torch.rand(2, *in_size).type(dtype).to(device=device) for in_size, dtype in zip(input_size, dtypes)] + x = [torch.rand(2, *in_size).type(dtype) for in_size in input_size] + # print(type(x[0])) # create properties summary = OrderedDict() @@ -65,43 +86,115 @@ def hook(module, input, output): for h in hooks: h.remove() - print("----------------------------------------------------------------") - line_new = "{:>20} {:>25} {:>15}".format("Layer (type)", "Output Shape", "Param #") - print(line_new) - print("================================================================") + if verbose: + print("-" * 112) + line_new = "{:>20} {:>20} {:>20} {:>10} {:>10} {:>10} {:>10}".format("Layer (type)", "Input Shape","Output Shape","kernel_size","stride","padding", "Param #") + print(line_new) + print("=" * 112) + else: + print("----------------------------------------------------------------") + line_new = "{:>20} {:>25} {:>15}".format("Layer (type)", "Output Shape", "Param #") + print(line_new) + print("================================================================") total_params = 0 total_output = 0 trainable_params = 0 - for layer in summary: - # input_shape, output_shape, trainable, nb_params - line_new = "{:>20} {:>25} {:>15}".format( - layer, - str(summary[layer]["output_shape"]), - "{0:,}".format(summary[layer]["nb_params"]), - ) - total_params += summary[layer]["nb_params"] - - total_output += np.prod(summary[layer]["output_shape"]) - if "trainable" in summary[layer]: - if summary[layer]["trainable"] == True: - trainable_params += summary[layer]["nb_params"] - print(line_new) + if verbose: + for layer in summary: + # if false, add parameters + if not layer.__contains__("Conv"): + summary[layer]['kernel_size'] = 0 + summary[layer]['stride'] = 0 + summary[layer]['padding'] = 0 + + line_new = "{:>20} {:>20} {:>20} {:>10} {:>10} {:>10} {:>10}".format( + layer, + str(summary[layer]["input_shape"]), + str(summary[layer]["output_shape"]), + "{0}".format(summary[layer]['kernel_size']), + "{0}".format(summary[layer]['stride']), + "{0}".format(summary[layer]['padding']), + "{0:,}".format(summary[layer]["nb_params"]), + ) + total_params += summary[layer]["nb_params"] + total_output += np.prod(summary[layer]["output_shape"]) + if "trainable" in summary[layer]: + if summary[layer]["trainable"] == True: + trainable_params += summary[layer]["nb_params"] + print(line_new) + + + else: + for layer in summary: + # input_shape, output_shape, trainable, nb_params + line_new = "{:>20} {:>25} {:>15}".format( + layer, + str(summary[layer]["output_shape"]), + "{0:,}".format(summary[layer]["nb_params"]), + ) + total_params += summary[layer]["nb_params"] + total_output += np.prod(summary[layer]["output_shape"]) + if "trainable" in summary[layer]: + if summary[layer]["trainable"] == True: + trainable_params += summary[layer]["nb_params"] + print(line_new) # assume 4 bytes/number (float on cuda). - total_input_size = abs(np.prod(sum(input_size, ())) * batch_size * 4. / (1024 ** 2.)) + total_input_size = abs(np.prod(input_size) * batch_size * 4. / (1024 ** 2.)) total_output_size = abs(2. * total_output * 4. / (1024 ** 2.)) # x2 for gradients - total_params_size = abs(total_params * 4. / (1024 ** 2.)) + total_params_size = abs(total_params.numpy() * 4. / (1024 ** 2.)) total_size = total_params_size + total_output_size + total_input_size - - print("================================================================") - print("Total params: {0:,}".format(total_params)) - print("Trainable params: {0:,}".format(trainable_params)) - print("Non-trainable params: {0:,}".format(total_params - trainable_params)) - print("----------------------------------------------------------------") - print("Input size (MB): %0.2f" % total_input_size) - print("Forward/backward pass size (MB): %0.2f" % total_output_size) - print("Params size (MB): %0.2f" % total_params_size) - print("Estimated Total Size (MB): %0.2f" % total_size) - print("----------------------------------------------------------------") + if verbose: + print("="*112) + print("Total params: {0:,}".format(total_params)) + print("Trainable params: {0:,}".format(trainable_params)) + print("Non-trainable params: {0:,}".format(total_params - trainable_params)) + print("-"*112) + print("Input size (MB): %0.2f" % total_input_size) + print("Forward/backward pass size (MB): %0.2f" % total_output_size) + print("Params size (MB): %0.2f" % total_params_size) + print("Estimated Total Size (MB): %0.2f" % total_size) + print("-"*112) + else: + print("================================================================") + print("Total params: {0:,}".format(total_params)) + print("Trainable params: {0:,}".format(trainable_params)) + print("Non-trainable params: {0:,}".format(total_params - trainable_params)) + print("----------------------------------------------------------------") + print("Input size (MB): %0.2f" % total_input_size) + print("Forward/backward pass size (MB): %0.2f" % total_output_size) + print("Params size (MB): %0.2f" % total_params_size) + print("Estimated Total Size (MB): %0.2f" % total_size) + print("----------------------------------------------------------------") # return summary - return total_params, trainable_params + + # if csv == True, print a df + if csv: + if verbose: + cols = ["input_shape","output_shape","kernel_size","stride","padding", "nb_params"] + else: + cols = ["output_shape", "nb_params"] + idx = summary.keys() + vals = [] + for layer in summary: + dict_vals = [summary[layer][key] if type(summary[layer][key]) != torch.Tensor else summary[layer][key].item() for key in cols] + vals.append(dict_vals) + + + + df = pd.DataFrame(vals, index = idx, columns = cols) + df.index.name = 'Layer' + df.to_csv('model_layers.csv') + + # Create a second df with: total params, trainable params, non-trainable params, input size, forward/backward pass, params size, estimated total size + cols = ['Total params', 'Trainable params', 'Non-trainable params', 'Input size (MB)', "Forward/backward pass size (MB):","Params size (MB)", "Estimated Total Size (MB)"] + vals = [total_params.item(), trainable_params.item(), (total_params - trainable_params).item(), total_input_size, total_output_size, total_params_size, total_size] + df2 = pd.DataFrame(vals, index = cols, columns = ['model params']).transpose() + df2.to_csv('model_params.csv') + + + + + + +