From 2f61a8065a31e66c7d4fc885a1e61484fb961337 Mon Sep 17 00:00:00 2001
From: erick7451 <erickplatero97@hotmail.com>
Date: Thu, 9 Jan 2020 20:16:55 -0600
Subject: [PATCH] Added two new parameters to summary: verbose and csv. Verbose
 shows more information from Convolutional layers and csv saves all outputs
 onto two csv files. The added functionalities will give the user more control
 over its model and a user-friendly way to extract the data

---
 README.md                    |  55 +++++++++++-
 torchsummary/torchsummary.py | 169 +++++++++++++++++++++++++++--------
 2 files changed, 185 insertions(+), 39 deletions(-)

diff --git a/README.md b/README.md
index a1d6059..732ecd8 100644
--- a/README.md
+++ b/README.md
@@ -191,10 +191,63 @@ Estimated Total Size (MB): 0.78
 ----------------------------------------------------------------
 ```
 
+#### verbose and CSV
 
+```python
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torchsummary import summary
+
+class Net(nn.Module):
+    def __init__(self):
+        super(Net, self).__init__()
+        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
+        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
+        self.conv2_drop = nn.Dropout2d()
+        self.fc1 = nn.Linear(320, 50)
+        self.fc2 = nn.Linear(50, 10)
+
+    def forward(self, x):
+        x = F.relu(F.max_pool2d(self.conv1(x), 2))
+        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
+        x = x.view(-1, 320)
+        x = F.relu(self.fc1(x))
+        x = F.dropout(x, training=self.training)
+        x = self.fc2(x)
+        return F.log_softmax(x, dim=1)
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # PyTorch v0.4.0
+model = Net().to(device)
+
+summary(model, (1, 28, 28), verbose = True, csv = True)
+```
+
+```
+-----------------------------------------------------------------------------------------
+Layer (type)    Input Shape    Output Shape      kernel_size   stride   padding   Param #
+=========================================================================================
+Conv2d-1      [-1, 1, 28, 28]  [-1, 10, 24, 24]   (5, 5)      (1, 1)   (0, 0)         260
+Conv2d-2     [-1, 10, 12, 12]    [-1, 20, 8, 8]   (5, 5)      (1, 1)   (0, 0)       5,020
+Dropout2d-3    [-1, 20, 8, 8]    [-1, 20, 8, 8]      0           0        0           0
+Linear-4            [-1, 320]          [-1, 50]      0           0        0        16,050
+Linear-5             [-1, 50]          [-1, 10]      0           0        0         510
+=========================================================================================
+Total params: 21,840
+Trainable params: 21,840
+Non-trainable params: 0
+-----------------------------------------------------------------------------------------
+Input size (MB): 0.00
+Forward/backward pass size (MB): 0.06
+Params size (MB): 0.08
+Estimated Total Size (MB): 0.15
+-----------------------------------------------------------------------------------------
+```
+
+When ```csv = True```, all shown parameters are saved on two files: ```model_layers.csv``` and ```model_params.csv```
 
 ### References
 
 - The idea for this package sparked from [this PyTorch issue](https://github.com/pytorch/pytorch/issues/2001).
-- Thanks to @ncullen93 and @HTLife. 
+- Thanks to @ncullen93, @HTLife, and @Erick7451. 
 - For Model Size Estimation @jacobkimmel ([details here](https://github.com/sksq96/pytorch-summary/pull/21))
diff --git a/torchsummary/torchsummary.py b/torchsummary/torchsummary.py
index e8c3207..ed7017f 100644
--- a/torchsummary/torchsummary.py
+++ b/torchsummary/torchsummary.py
@@ -1,4 +1,5 @@
 import torch
+import pandas as pd
 import torch.nn as nn
 from torch.autograd import Variable
 
@@ -6,18 +7,19 @@
 import numpy as np
 
 
-def summary(model, input_size, batch_size=-1, device=torch.device('cuda:0'), dtypes=None):
-    if dtypes == None:
-        dtypes = [torch.FloatTensor]*len(input_size)
+def summary(model, input_size, batch_size=-1, device="cuda", verbose = False, csv = False):
 
     def register_hook(module):
 
         def hook(module, input, output):
+            # name of operation (ex: Conv2)
             class_name = str(module.__class__).split(".")[-1].split("'")[0]
             module_idx = len(summary)
-
+            # Conv2-index
             m_key = "%s-%i" % (class_name, module_idx + 1)
+            # element of m_key is another OrderedDict()
             summary[m_key] = OrderedDict()
+            # Pass Key and Elements to inside OrderedDict()
             summary[m_key]["input_shape"] = list(input[0].size())
             summary[m_key]["input_shape"][0] = batch_size
             if isinstance(output, (list, tuple)):
@@ -35,20 +37,39 @@ def hook(module, input, output):
             if hasattr(module, "bias") and hasattr(module.bias, "size"):
                 params += torch.prod(torch.LongTensor(list(module.bias.size())))
             summary[m_key]["nb_params"] = params
+                   # if class if Conv, attain: kernel_size, stride, padding
+            if class_name.__contains__("Conv"):
+                summary[m_key]["kernel_size"] = module.kernel_size
+                summary[m_key]["stride"] = module.stride
+                summary[m_key]["padding"] = module.padding
 
         if (
             not isinstance(module, nn.Sequential)
             and not isinstance(module, nn.ModuleList)
+            and not (module == model)
         ):
             hooks.append(module.register_forward_hook(hook))
 
+
+
+    device = device.lower()
+    assert device in [
+        "cuda",
+        "cpu",
+    ], "Input device is not valid, please specify 'cuda' or 'cpu'"
+
+    if device == "cuda" and torch.cuda.is_available():
+        dtype = torch.cuda.FloatTensor
+    else:
+        dtype = torch.FloatTensor
+
     # multiple inputs to the network
     if isinstance(input_size, tuple):
         input_size = [input_size]
 
-
     # batch_size of 2 for batchnorm
-    x = [ torch.rand(2, *in_size).type(dtype).to(device=device) for in_size, dtype in zip(input_size, dtypes)]
+    x = [torch.rand(2, *in_size).type(dtype) for in_size in input_size]
+    # print(type(x[0]))
 
     # create properties
     summary = OrderedDict()
@@ -65,43 +86,115 @@ def hook(module, input, output):
     for h in hooks:
         h.remove()
 
-    print("----------------------------------------------------------------")
-    line_new = "{:>20}  {:>25} {:>15}".format("Layer (type)", "Output Shape", "Param #")
-    print(line_new)
-    print("================================================================")
+    if verbose:
+        print("-" * 112)
+        line_new = "{:>20} {:>20}  {:>20}  {:>10}  {:>10}  {:>10}  {:>10}".format("Layer (type)", "Input Shape","Output Shape","kernel_size","stride","padding", "Param #")
+        print(line_new)
+        print("=" * 112)
+    else:
+        print("----------------------------------------------------------------")
+        line_new = "{:>20}  {:>25} {:>15}".format("Layer (type)", "Output Shape", "Param #")
+        print(line_new)
+        print("================================================================")
     total_params = 0
     total_output = 0
     trainable_params = 0
-    for layer in summary:
-        # input_shape, output_shape, trainable, nb_params
-        line_new = "{:>20}  {:>25} {:>15}".format(
-            layer,
-            str(summary[layer]["output_shape"]),
-            "{0:,}".format(summary[layer]["nb_params"]),
-        )
-        total_params += summary[layer]["nb_params"]
-
-        total_output += np.prod(summary[layer]["output_shape"])
-        if "trainable" in summary[layer]:
-            if summary[layer]["trainable"] == True:
-                trainable_params += summary[layer]["nb_params"]
-        print(line_new)
+    if verbose:
+        for layer in summary:
+            # if false, add parameters
+            if not layer.__contains__("Conv"):
+                summary[layer]['kernel_size'] = 0
+                summary[layer]['stride'] = 0
+                summary[layer]['padding'] = 0
+
+            line_new = "{:>20} {:>20}  {:>20}  {:>10}  {:>10}  {:>10}  {:>10}".format(
+                layer,
+                str(summary[layer]["input_shape"]),
+                str(summary[layer]["output_shape"]),
+                "{0}".format(summary[layer]['kernel_size']),
+                "{0}".format(summary[layer]['stride']),
+                "{0}".format(summary[layer]['padding']),
+                "{0:,}".format(summary[layer]["nb_params"]),
+            )
+            total_params += summary[layer]["nb_params"]
+            total_output += np.prod(summary[layer]["output_shape"])
+            if "trainable" in summary[layer]:
+                if summary[layer]["trainable"] == True:
+                    trainable_params += summary[layer]["nb_params"]
+            print(line_new)
+
+
+    else:
+        for layer in summary:
+            # input_shape, output_shape, trainable, nb_params
+            line_new = "{:>20}  {:>25} {:>15}".format(
+                layer,
+                str(summary[layer]["output_shape"]),
+                "{0:,}".format(summary[layer]["nb_params"]),
+            )
+            total_params += summary[layer]["nb_params"]
+            total_output += np.prod(summary[layer]["output_shape"])
+            if "trainable" in summary[layer]:
+                if summary[layer]["trainable"] == True:
+                    trainable_params += summary[layer]["nb_params"]
+            print(line_new)
 
     # assume 4 bytes/number (float on cuda).
-    total_input_size = abs(np.prod(sum(input_size, ())) * batch_size * 4. / (1024 ** 2.))
+    total_input_size = abs(np.prod(input_size) * batch_size * 4. / (1024 ** 2.))
     total_output_size = abs(2. * total_output * 4. / (1024 ** 2.))  # x2 for gradients
-    total_params_size = abs(total_params * 4. / (1024 ** 2.))
+    total_params_size = abs(total_params.numpy() * 4. / (1024 ** 2.))
     total_size = total_params_size + total_output_size + total_input_size
-
-    print("================================================================")
-    print("Total params: {0:,}".format(total_params))
-    print("Trainable params: {0:,}".format(trainable_params))
-    print("Non-trainable params: {0:,}".format(total_params - trainable_params))
-    print("----------------------------------------------------------------")
-    print("Input size (MB): %0.2f" % total_input_size)
-    print("Forward/backward pass size (MB): %0.2f" % total_output_size)
-    print("Params size (MB): %0.2f" % total_params_size)
-    print("Estimated Total Size (MB): %0.2f" % total_size)
-    print("----------------------------------------------------------------")
+    if verbose:
+        print("="*112)
+        print("Total params: {0:,}".format(total_params))
+        print("Trainable params: {0:,}".format(trainable_params))
+        print("Non-trainable params: {0:,}".format(total_params - trainable_params))
+        print("-"*112)
+        print("Input size (MB): %0.2f" % total_input_size)
+        print("Forward/backward pass size (MB): %0.2f" % total_output_size)
+        print("Params size (MB): %0.2f" % total_params_size)
+        print("Estimated Total Size (MB): %0.2f" % total_size)
+        print("-"*112)
+    else:
+        print("================================================================")
+        print("Total params: {0:,}".format(total_params))
+        print("Trainable params: {0:,}".format(trainable_params))
+        print("Non-trainable params: {0:,}".format(total_params - trainable_params))
+        print("----------------------------------------------------------------")
+        print("Input size (MB): %0.2f" % total_input_size)
+        print("Forward/backward pass size (MB): %0.2f" % total_output_size)
+        print("Params size (MB): %0.2f" % total_params_size)
+        print("Estimated Total Size (MB): %0.2f" % total_size)
+        print("----------------------------------------------------------------")
     # return summary
-    return total_params, trainable_params
+
+    # if csv == True, print a df
+    if csv:
+        if verbose:
+            cols = ["input_shape","output_shape","kernel_size","stride","padding", "nb_params"]
+        else:
+            cols = ["output_shape", "nb_params"]
+        idx = summary.keys()
+        vals = []
+        for layer in summary:
+            dict_vals = [summary[layer][key] if type(summary[layer][key]) != torch.Tensor else summary[layer][key].item() for key in cols]
+            vals.append(dict_vals)
+
+
+
+        df = pd.DataFrame(vals, index = idx, columns = cols)
+        df.index.name = 'Layer'
+        df.to_csv('model_layers.csv')
+
+        # Create a second df with: total params, trainable params, non-trainable params, input size, forward/backward pass, params size, estimated total size
+        cols = ['Total params', 'Trainable params', 'Non-trainable params', 'Input size (MB)', "Forward/backward pass size (MB):","Params size (MB)", "Estimated Total Size (MB)"]
+        vals = [total_params.item(), trainable_params.item(), (total_params - trainable_params).item(), total_input_size, total_output_size, total_params_size, total_size]
+        df2 = pd.DataFrame(vals, index = cols, columns = ['model params']).transpose()
+        df2.to_csv('model_params.csv')
+
+
+
+
+
+
+