add dast for nll

taigw · taigw · commit 303e624b80e5 · 2022-08-19T16:45:45.000+08:00
add dast for nll
set the output model of dual branch network
diff --git a/pymic/loss/seg/ce.py b/pymic/loss/seg/ce.py
@@ -6,7 +6,7 @@
 from pymic.loss.seg.util import reshape_tensor_to_2D
 
 class CrossEntropyLoss(nn.Module):
-    def __init__(self, params):
+    def __init__(self, params = None):
         super(CrossEntropyLoss, self).__init__()
         if(params is None):
             self.softmax = True
diff --git a/pymic/net/net2d/unet2d_dual_branch.py b/pymic/net/net2d/unet2d_dual_branch.py
@@ -16,6 +16,7 @@
 class UNet2D_DualBranch(nn.Module):
     def __init__(self, params):
         super(UNet2D_DualBranch, self).__init__()
+        self.output_mode = params.get("output_mode", "average")
         self.encoder  = Encoder(params)
         self.decoder1 = Decoder(params)    
         self.decoder2 = Decoder(params)        
@@ -41,47 +42,9 @@ def forward(self, x):
         if(self.training):
           return output1, output2
         else:
-          return (output1 + output2)/2
- # for backup
-class DualBranchUNet2D(UNet2D):
-    def __init__(self, params):
-        params['deep_supervise'] = False
-        super(DualBranchUNet2D, self).__init__(params)
-        if(len(self.ft_chns) == 5):
-            self.up1_aux = UpBlock(self.ft_chns[4], self.ft_chns[3], self.ft_chns[3], 0.0, self.bilinear) 
-        self.up2_aux = UpBlock(self.ft_chns[3], self.ft_chns[2], self.ft_chns[2], 0.0, self.bilinear) 
-        self.up3_aux = UpBlock(self.ft_chns[2], self.ft_chns[1], self.ft_chns[1], 0.0, self.bilinear) 
-        self.up4_aux = UpBlock(self.ft_chns[1], self.ft_chns[0], self.ft_chns[0], 0.0, self.bilinear) 
-    
-        self.out_conv_aux = nn.Conv2d(self.ft_chns[0], self.n_class, kernel_size = 1)
-
-    def forward(self, x):
-        x_shape = list(x.shape)
-        if(len(x_shape) == 5):
-          [N, C, D, H, W] = x_shape
-          new_shape = [N*D, C, H, W]
-          x = torch.transpose(x, 1, 2)
-          x = torch.reshape(x, new_shape)
-
-        x0 = self.in_conv(x)
-        x1 = self.down1(x0)
-        x2 = self.down2(x1)
-        x3 = self.down3(x2)
-        if(len(self.ft_chns) == 5):
-          x4 = self.down4(x3)
-          x_d3, x_d3_aux = self.up1(x4, x3), self.up1_aux(x4, x3)
-        else:
-          x_d3, x_d3_aux = x3, x3
-
-        x_d2, x_d2_aux = self.up2(x_d3, x2), self.up2_aux(x_d3_aux, x2)
-        x_d1, x_d1_aux = self.up3(x_d2, x1), self.up3_aux(x_d2_aux, x1)
-        x_d0, x_d0_aux = self.up4(x_d1, x0), self.up4_aux(x_d1_aux, x0)
-        output, output_aux = self.out_conv(x_d0), self.out_conv_aux(x_d0_aux)
-        
-        if(len(x_shape) == 5):
-            new_shape = [N, D] + list(output.shape)[1:]
-            output = torch.reshape(output, new_shape)
-            output = torch.transpose(output, 1, 2)
-            output_aux = torch.reshape(output_aux, new_shape)
-            output_aux = torch.transpose(output_aux, 1, 2)
-        return output, output_aux
+          if(self.output_mode == "average"):
+            return (output1 + output2)/2
+          elif(self.output_mode == "first"):
+            return output1
+          else:
+            return output2
diff --git a/pymic/net_run_nll/nll_dast.py b/pymic/net_run_nll/nll_dast.py
@@ -0,0 +1,260 @@
+# -*- coding: utf-8 -*-
+"""
+Implementation of DAST for noise robust learning according to the following paper.
+    Shuojue Yang, Guotai Wang, Hui Sun, Xiangde Luo, Peng Sun, Kang Li, Qijun Wang, 
+    Shaoting Zhang: Learning COVID-19 Pneumonia Lesion Segmentation from Imperfect 
+    Annotations via Divergence-Aware Selective Training.
+    JBHI 2022. https://ieeexplore.ieee.org/document/9770406    
+"""
+
+from __future__ import print_function, division
+import random
+import torch
+import numpy as np 
+import torch.nn as nn
+import torchvision.transforms as transforms
+from torch.optim import lr_scheduler
+from pymic.io.nifty_dataset import NiftyDataset
+from pymic.loss.seg.util import get_soft_label
+from pymic.loss.seg.util import reshape_prediction_and_ground_truth
+from pymic.loss.seg.util import get_classwise_dice
+from pymic.net_run.agent_seg import SegmentationAgent
+from pymic.util.parse_config import *
+from pymic.util.ramps import get_rampup_ratio
+
+class Rank(object):
+    """
+    Dynamically rank the current training sample with specific metrics 
+    """
+    def __init__(self, quene_length = 100):
+        self.vals = []
+        self.quene_length = quene_length
+
+    def add_val(self, val):
+        """
+        Update the quene and calculate the order of the input value.
+
+        Return
+        ---------
+        rank: rank of the input value with a range of  (0, self.quenen_length)
+        """
+        if len(self.vals) < self.quene_length:
+            self.vals.append(val)
+            rank = -1
+        else:
+            self.vals.pop(0)
+            self.vals.append(val)
+            assert len(self.vals) == self.quene_length
+            idxes = np.argsort(self.vals)
+            rank = np.where(idxes == self.quene_length-1)[0][0]
+        return rank
+
+class ConsistLoss(nn.Module):
+    def __init__(self):
+        super(ConsistLoss, self).__init__()
+
+    def kl_div_map(self, input, label):
+        kl_map = torch.sum(label * (torch.log(label + 1e-16) - torch.log(input + 1e-16)), dim = 1)
+        return kl_map
+
+    def kl_loss(self,input, target, size_average=True):
+        kl_div = self.kl_div_map(input, target)
+        if size_average:
+            return torch.mean(kl_div)
+        else:
+            return kl_div
+
+    def forward(self, input1, input2, size_average = True):
+        kl1 = self.kl_loss(input1, input2.detach(), size_average=size_average)
+        kl2 = self.kl_loss(input2, input1.detach(), size_average=size_average)
+        return (kl1 + kl2) / 2
+
+def get_ce(prob, soft_y, size_avg = True):
+    prob = prob * 0.999 + 5e-4
+    ce = - soft_y* torch.log(prob)
+    ce = torch.sum(ce, dim = 1) # shape is [N]
+    if(size_avg):
+        ce = torch.mean(ce)
+    return ce
+
+@torch.no_grad()
+def select_criterion(no_noisy_sample, cl_noisy_sample, label):
+    """
+    no_noisy_sample: noisy branch's output probability for noisy sample
+    cl_noisy_sample: clean branch's output probability for noisy sample
+    label: noisy label
+    """
+    l_n = get_ce(no_noisy_sample, label, size_avg = False)
+    l_c = get_ce(cl_noisy_sample, label, size_avg = False)
+    js_distance = ConsistLoss()
+    variance = js_distance(no_noisy_sample, cl_noisy_sample, size_average=False)
+    exp_variance = torch.exp(-16 * variance)
+    loss_n = torch.mean(l_c * exp_variance).item()
+    loss_c = torch.mean(l_n * exp_variance).item()
+    return loss_n, loss_c
+
+class NLLDAST(SegmentationAgent):
+    def __init__(self, config, stage = 'train'):
+        super(NLLDAST, self).__init__(config, stage)
+        self.train_set_noise = None 
+        self.train_loader_noise = None 
+        self.trainIter_noise    = None
+        self.noisy_rank = None 
+        self.clean_rank = None
+
+    def get_noisy_dataset_from_config(self):
+        root_dir  = self.config['dataset']['root_dir']
+        modal_num = self.config['dataset'].get('modal_num', 1)
+        transform_names = self.config['dataset']['train_transform']
+        
+        self.transform_list  = []
+        if(transform_names is None or len(transform_names) == 0):
+            data_transform = None 
+        else:
+            transform_param = self.config['dataset']
+            transform_param['task'] = 'segmentation' 
+            for name in transform_names:
+                if(name not in self.transform_dict):
+                    raise(ValueError("Undefined transform {0:}".format(name))) 
+                one_transform = self.transform_dict[name](transform_param)
+                self.transform_list.append(one_transform)
+            data_transform = transforms.Compose(self.transform_list)
+
+        csv_file = self.config['dataset'].get('train_csv_noise', None)
+        dataset  = NiftyDataset(root_dir=root_dir,
+                                csv_file  = csv_file,
+                                modal_num = modal_num,
+                                with_label= True,
+                                transform = data_transform )
+        return dataset
+
+    def create_dataset(self):
+        super(NLLDAST, self).create_dataset()
+        if(self.stage == 'train'):
+            if(self.train_set_noise is None):
+                self.train_set_noise = self.get_noisy_dataset_from_config()
+            if(self.deterministic):
+                def worker_init_fn(worker_id):
+                    random.seed(self.random_seed + worker_id)
+                worker_init = worker_init_fn
+            else:
+                worker_init = None
+
+            bn_train_noise = self.config['dataset']['train_batch_size_noise']
+            num_worker = self.config['dataset'].get('num_workder', 16)
+            self.train_loader_noise = torch.utils.data.DataLoader(self.train_set_noise, 
+                batch_size = bn_train_noise, shuffle=True, num_workers= num_worker,
+                worker_init_fn=worker_init)
+
+    def training(self):
+        class_num   = self.config['network']['class_num']
+        iter_valid  = self.config['training']['iter_valid']
+        nll_cfg     = self.config['noisy_label_learning']
+        iter_max     = self.config['training']['iter_max']
+        rampup_start = nll_cfg.get('rampup_start', 0)
+        rampup_end   = nll_cfg.get('rampup_end', iter_max)
+        train_loss   = 0
+        train_loss_sup = 0
+        train_loss_reg = 0
+        train_dice_list = []
+        self.net.train()
+
+        rank_length = nll_cfg.get("dast_rank_length", 20)
+        consist_loss = ConsistLoss()
+        for it in range(iter_valid):
+            try:
+                data_cl = next(self.trainIter)
+            except StopIteration:
+                self.trainIter = iter(self.train_loader)
+                data_cl = next(self.trainIter)
+            try:
+                data_no = next(self.trainIter_noise)
+            except StopIteration:
+                self.trainIter_noise = iter(self.train_loader_noise)
+                data_no = next(self.trainIter_noise)
+
+            # get the inputs
+            x0 = self.convert_tensor_type(data_cl['image'])  # clean sample
+            y0 = self.convert_tensor_type(data_cl['label_prob'])  
+            x1 = self.convert_tensor_type(data_no['image'])  # noisy sample
+            y1 = self.convert_tensor_type(data_no['label_prob']) 
+            inputs = torch.cat([x0, x1], dim = 0).to(self.device)               
+            y0, y1 = y0.to(self.device), y1.to(self.device)
+            
+            # zero the parameter gradients
+            self.optimizer.zero_grad()
+                
+            # forward + backward + optimize
+            b0_pred, b1_pred = self.net(inputs) 
+            n0 = list(x0.shape)[0]  # number of clean samples
+            b0_x0_pred = b0_pred[:n0] # predication of clean samples from clean branch
+            b0_x1_pred = b0_pred[n0:] # predication of noisy samples from clean branch
+            b1_x1_pred = b1_pred[n0:] # predication of noisy samples from noisy branch
+
+            # supervised loss for the clean and noisy branches, respectively
+            loss_sup_cl = self.get_loss_value(data_cl, b0_x0_pred, y0)
+            loss_sup_no = self.get_loss_value(data_no, b1_x1_pred, y1)
+            loss_sup = (loss_sup_cl + loss_sup_no) / 2
+            loss = loss_sup
+
+            # Severe Noise supression & Supplementary Training
+            rampup_ratio = get_rampup_ratio(self.glob_it, rampup_start, rampup_end, "sigmoid")
+            w_dbc = nll_cfg.get('dast_dbc_w', 0.1) * rampup_ratio
+            w_st  = nll_cfg.get('dast_st_w',  0.1) * rampup_ratio
+            b1_x1_prob = nn.Softmax(dim = 1)(b1_x1_pred)
+            b0_x1_prob = nn.Softmax(dim = 1)(b0_x1_pred)
+            loss_n, loss_c = select_criterion(b1_x1_prob, b0_x1_prob, y1)
+            rank_n = self.noisy_rank.add_val(loss_n)
+            rank_c = self.clean_rank.add_val(loss_c)
+            if loss_n < loss_c:
+                if rank_c >= rank_length * 0.8:
+                    loss_dbc = consist_loss(b1_x1_prob, b0_x1_prob)
+                    loss = loss + loss_dbc * w_dbc
+                if rank_n <= 0.2 * rank_length:
+                    b0_x1_argmax = torch.argmax(b0_x1_pred, dim = 1, keepdim = True)
+                    b0_x1_lab    = get_soft_label(b0_x1_argmax, class_num, self.tensor_type)
+                    b1_x1_argmax = torch.argmax(b1_x1_pred, dim = 1, keepdim = True)
+                    b1_x1_lab    = get_soft_label(b1_x1_argmax, class_num, self.tensor_type)
+                    pseudo_label = (b0_x1_lab + b1_x1_lab + y1) / 3
+                    sharpen = lambda p,T: p**(1.0/T)/(p**(1.0/T) + (1-p)**(1.0/T))
+                    b0_x1_prob   = nn.Softmax(dim = 1)(b0_x1_pred)
+                    loss_st  = torch.mean(torch.abs(b0_x1_prob - sharpen(pseudo_label, 0.5)))
+                    loss = loss + loss_st * w_st
+
+            loss.backward()
+            self.optimizer.step()
+            if(self.scheduler is not None and \
+                not isinstance(self.scheduler, lr_scheduler.ReduceLROnPlateau)):
+                self.scheduler.step()
+
+            train_loss = train_loss + loss.item()
+            train_loss_sup = train_loss_sup + loss_sup.item()
+            # train_loss_reg = train_loss_reg + loss_reg.item() 
+            # get dice evaluation for each class in annotated images
+            if(isinstance(b0_x0_pred, tuple) or isinstance(b0_x0_pred, list)):
+                p0 = b0_x0_pred[0] 
+            else:
+                p0 = b0_x0_pred
+            p0_argmax = torch.argmax(p0, dim = 1, keepdim = True)
+            p0_soft   = get_soft_label(p0_argmax, class_num, self.tensor_type)
+            p0_soft, y0 = reshape_prediction_and_ground_truth(p0_soft, y0) 
+            dice_list   = get_classwise_dice(p0_soft, y0)
+            train_dice_list.append(dice_list.cpu().numpy())
+        train_avg_loss = train_loss / iter_valid
+        train_avg_loss_sup = train_loss_sup / iter_valid
+        train_avg_loss_reg = train_loss_reg / iter_valid
+        train_cls_dice = np.asarray(train_dice_list).mean(axis = 0)
+        train_avg_dice = train_cls_dice.mean()
+
+        train_scalers = {'loss': train_avg_loss, 'loss_sup':train_avg_loss_sup,
+            'loss_reg':train_avg_loss_reg, 'regular_w':w_dbc,
+            'avg_dice':train_avg_dice,     'class_dice': train_cls_dice}
+        return train_scalers
+
+    def train_valid(self):
+        self.trainIter_noise = iter(self.train_loader_noise)   
+        nll_cfg     = self.config['noisy_label_learning']
+        rank_length = nll_cfg.get("dast_rank_length", 20)
+        self.noisy_rank = Rank(rank_length)
+        self.clean_rank = Rank(rank_length)
+        super(NLLDAST, self).train_valid()    
diff --git a/pymic/net_run_nll/nll_main.py b/pymic/net_run_nll/nll_main.py
@@ -7,9 +7,11 @@
 from pymic.util.parse_config import *
 from pymic.net_run_nll.nll_co_teaching import NLLCoTeaching
 from pymic.net_run_nll.nll_trinet import NLLTriNet
+from pymic.net_run_nll.nll_dast import NLLDAST
 
 NLLMethodDict = {'CoTeaching': NLLCoTeaching,
-    "TriNet": NLLTriNet}
+    "TriNet": NLLTriNet,
+    "DAST": NLLDAST}
 
 def main():
     if(len(sys.argv) < 3):