diff --git a/README.md b/README.md index a1d6059..732ecd8 100644 --- a/README.md +++ b/README.md @@ -191,10 +191,63 @@ Estimated Total Size (MB): 0.78 ---------------------------------------------------------------- ``` +#### verbose and CSV +```python +import torch +import torch.nn as nn +import torch.nn.functional as F +from torchsummary import summary + +class Net(nn.Module): + def __init__(self): + super(Net, self).__init__() + self.conv1 = nn.Conv2d(1, 10, kernel_size=5) + self.conv2 = nn.Conv2d(10, 20, kernel_size=5) + self.conv2_drop = nn.Dropout2d() + self.fc1 = nn.Linear(320, 50) + self.fc2 = nn.Linear(50, 10) + + def forward(self, x): + x = F.relu(F.max_pool2d(self.conv1(x), 2)) + x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2)) + x = x.view(-1, 320) + x = F.relu(self.fc1(x)) + x = F.dropout(x, training=self.training) + x = self.fc2(x) + return F.log_softmax(x, dim=1) + +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # PyTorch v0.4.0 +model = Net().to(device) + +summary(model, (1, 28, 28), verbose = True, csv = True) +``` + +``` +----------------------------------------------------------------------------------------- +Layer (type) Input Shape Output Shape kernel_size stride padding Param # +========================================================================================= +Conv2d-1 [-1, 1, 28, 28] [-1, 10, 24, 24] (5, 5) (1, 1) (0, 0) 260 +Conv2d-2 [-1, 10, 12, 12] [-1, 20, 8, 8] (5, 5) (1, 1) (0, 0) 5,020 +Dropout2d-3 [-1, 20, 8, 8] [-1, 20, 8, 8] 0 0 0 0 +Linear-4 [-1, 320] [-1, 50] 0 0 0 16,050 +Linear-5 [-1, 50] [-1, 10] 0 0 0 510 +========================================================================================= +Total params: 21,840 +Trainable params: 21,840 +Non-trainable params: 0 +----------------------------------------------------------------------------------------- +Input size (MB): 0.00 +Forward/backward pass size (MB): 0.06 +Params size (MB): 0.08 +Estimated Total Size (MB): 0.15 +----------------------------------------------------------------------------------------- +``` + +When ```csv = True```, all shown parameters are saved on two files: ```model_layers.csv``` and ```model_params.csv``` ### References - The idea for this package sparked from [this PyTorch issue](https://github.com/pytorch/pytorch/issues/2001). -- Thanks to @ncullen93 and @HTLife. +- Thanks to @ncullen93, @HTLife, and @Erick7451. - For Model Size Estimation @jacobkimmel ([details here](https://github.com/sksq96/pytorch-summary/pull/21)) diff --git a/torchsummary/torchsummary.py b/torchsummary/torchsummary.py index e8c3207..ed7017f 100644 --- a/torchsummary/torchsummary.py +++ b/torchsummary/torchsummary.py @@ -1,4 +1,5 @@ import torch +import pandas as pd import torch.nn as nn from torch.autograd import Variable @@ -6,18 +7,19 @@ import numpy as np -def summary(model, input_size, batch_size=-1, device=torch.device('cuda:0'), dtypes=None): - if dtypes == None: - dtypes = [torch.FloatTensor]*len(input_size) +def summary(model, input_size, batch_size=-1, device="cuda", verbose = False, csv = False): def register_hook(module): def hook(module, input, output): + # name of operation (ex: Conv2) class_name = str(module.__class__).split(".")[-1].split("'")[0] module_idx = len(summary) - + # Conv2-index m_key = "%s-%i" % (class_name, module_idx + 1) + # element of m_key is another OrderedDict() summary[m_key] = OrderedDict() + # Pass Key and Elements to inside OrderedDict() summary[m_key]["input_shape"] = list(input[0].size()) summary[m_key]["input_shape"][0] = batch_size if isinstance(output, (list, tuple)): @@ -35,20 +37,39 @@ def hook(module, input, output): if hasattr(module, "bias") and hasattr(module.bias, "size"): params += torch.prod(torch.LongTensor(list(module.bias.size()))) summary[m_key]["nb_params"] = params + # if class if Conv, attain: kernel_size, stride, padding + if class_name.__contains__("Conv"): + summary[m_key]["kernel_size"] = module.kernel_size + summary[m_key]["stride"] = module.stride + summary[m_key]["padding"] = module.padding if ( not isinstance(module, nn.Sequential) and not isinstance(module, nn.ModuleList) + and not (module == model) ): hooks.append(module.register_forward_hook(hook)) + + + device = device.lower() + assert device in [ + "cuda", + "cpu", + ], "Input device is not valid, please specify 'cuda' or 'cpu'" + + if device == "cuda" and torch.cuda.is_available(): + dtype = torch.cuda.FloatTensor + else: + dtype = torch.FloatTensor + # multiple inputs to the network if isinstance(input_size, tuple): input_size = [input_size] - # batch_size of 2 for batchnorm - x = [ torch.rand(2, *in_size).type(dtype).to(device=device) for in_size, dtype in zip(input_size, dtypes)] + x = [torch.rand(2, *in_size).type(dtype) for in_size in input_size] + # print(type(x[0])) # create properties summary = OrderedDict() @@ -65,43 +86,115 @@ def hook(module, input, output): for h in hooks: h.remove() - print("----------------------------------------------------------------") - line_new = "{:>20} {:>25} {:>15}".format("Layer (type)", "Output Shape", "Param #") - print(line_new) - print("================================================================") + if verbose: + print("-" * 112) + line_new = "{:>20} {:>20} {:>20} {:>10} {:>10} {:>10} {:>10}".format("Layer (type)", "Input Shape","Output Shape","kernel_size","stride","padding", "Param #") + print(line_new) + print("=" * 112) + else: + print("----------------------------------------------------------------") + line_new = "{:>20} {:>25} {:>15}".format("Layer (type)", "Output Shape", "Param #") + print(line_new) + print("================================================================") total_params = 0 total_output = 0 trainable_params = 0 - for layer in summary: - # input_shape, output_shape, trainable, nb_params - line_new = "{:>20} {:>25} {:>15}".format( - layer, - str(summary[layer]["output_shape"]), - "{0:,}".format(summary[layer]["nb_params"]), - ) - total_params += summary[layer]["nb_params"] - - total_output += np.prod(summary[layer]["output_shape"]) - if "trainable" in summary[layer]: - if summary[layer]["trainable"] == True: - trainable_params += summary[layer]["nb_params"] - print(line_new) + if verbose: + for layer in summary: + # if false, add parameters + if not layer.__contains__("Conv"): + summary[layer]['kernel_size'] = 0 + summary[layer]['stride'] = 0 + summary[layer]['padding'] = 0 + + line_new = "{:>20} {:>20} {:>20} {:>10} {:>10} {:>10} {:>10}".format( + layer, + str(summary[layer]["input_shape"]), + str(summary[layer]["output_shape"]), + "{0}".format(summary[layer]['kernel_size']), + "{0}".format(summary[layer]['stride']), + "{0}".format(summary[layer]['padding']), + "{0:,}".format(summary[layer]["nb_params"]), + ) + total_params += summary[layer]["nb_params"] + total_output += np.prod(summary[layer]["output_shape"]) + if "trainable" in summary[layer]: + if summary[layer]["trainable"] == True: + trainable_params += summary[layer]["nb_params"] + print(line_new) + + + else: + for layer in summary: + # input_shape, output_shape, trainable, nb_params + line_new = "{:>20} {:>25} {:>15}".format( + layer, + str(summary[layer]["output_shape"]), + "{0:,}".format(summary[layer]["nb_params"]), + ) + total_params += summary[layer]["nb_params"] + total_output += np.prod(summary[layer]["output_shape"]) + if "trainable" in summary[layer]: + if summary[layer]["trainable"] == True: + trainable_params += summary[layer]["nb_params"] + print(line_new) # assume 4 bytes/number (float on cuda). - total_input_size = abs(np.prod(sum(input_size, ())) * batch_size * 4. / (1024 ** 2.)) + total_input_size = abs(np.prod(input_size) * batch_size * 4. / (1024 ** 2.)) total_output_size = abs(2. * total_output * 4. / (1024 ** 2.)) # x2 for gradients - total_params_size = abs(total_params * 4. / (1024 ** 2.)) + total_params_size = abs(total_params.numpy() * 4. / (1024 ** 2.)) total_size = total_params_size + total_output_size + total_input_size - - print("================================================================") - print("Total params: {0:,}".format(total_params)) - print("Trainable params: {0:,}".format(trainable_params)) - print("Non-trainable params: {0:,}".format(total_params - trainable_params)) - print("----------------------------------------------------------------") - print("Input size (MB): %0.2f" % total_input_size) - print("Forward/backward pass size (MB): %0.2f" % total_output_size) - print("Params size (MB): %0.2f" % total_params_size) - print("Estimated Total Size (MB): %0.2f" % total_size) - print("----------------------------------------------------------------") + if verbose: + print("="*112) + print("Total params: {0:,}".format(total_params)) + print("Trainable params: {0:,}".format(trainable_params)) + print("Non-trainable params: {0:,}".format(total_params - trainable_params)) + print("-"*112) + print("Input size (MB): %0.2f" % total_input_size) + print("Forward/backward pass size (MB): %0.2f" % total_output_size) + print("Params size (MB): %0.2f" % total_params_size) + print("Estimated Total Size (MB): %0.2f" % total_size) + print("-"*112) + else: + print("================================================================") + print("Total params: {0:,}".format(total_params)) + print("Trainable params: {0:,}".format(trainable_params)) + print("Non-trainable params: {0:,}".format(total_params - trainable_params)) + print("----------------------------------------------------------------") + print("Input size (MB): %0.2f" % total_input_size) + print("Forward/backward pass size (MB): %0.2f" % total_output_size) + print("Params size (MB): %0.2f" % total_params_size) + print("Estimated Total Size (MB): %0.2f" % total_size) + print("----------------------------------------------------------------") # return summary - return total_params, trainable_params + + # if csv == True, print a df + if csv: + if verbose: + cols = ["input_shape","output_shape","kernel_size","stride","padding", "nb_params"] + else: + cols = ["output_shape", "nb_params"] + idx = summary.keys() + vals = [] + for layer in summary: + dict_vals = [summary[layer][key] if type(summary[layer][key]) != torch.Tensor else summary[layer][key].item() for key in cols] + vals.append(dict_vals) + + + + df = pd.DataFrame(vals, index = idx, columns = cols) + df.index.name = 'Layer' + df.to_csv('model_layers.csv') + + # Create a second df with: total params, trainable params, non-trainable params, input size, forward/backward pass, params size, estimated total size + cols = ['Total params', 'Trainable params', 'Non-trainable params', 'Input size (MB)', "Forward/backward pass size (MB):","Params size (MB)", "Estimated Total Size (MB)"] + vals = [total_params.item(), trainable_params.item(), (total_params - trainable_params).item(), total_input_size, total_output_size, total_params_size, total_size] + df2 = pd.DataFrame(vals, index = cols, columns = ['model params']).transpose() + df2.to_csv('model_params.csv') + + + + + + +