implement re-weighted loss; add as cmd line arguments

ankitdhall · ankitdhall · commit 107e9e5372d1 · 2019-04-24T15:07:51.000+02:00
diff --git a/network/ethec_experiments.py b/network/ethec_experiments.py
@@ -121,8 +121,9 @@ def ETHEC_train_model(arguments):
         trainloader = torch.utils.data.DataLoader(train_set,
                                                   batch_size=batch_size,
                                                   num_workers=n_workers,
-                                                  shuffle=True)
-                                                  # sampler=WeightedResampler(train_set))
+                                                  shuffle=True if arguments.class_weights else False,
+                                                  sampler=None if arguments.class_weights else WeightedResampler(
+                                                      train_set))
 
         valloader = torch.utils.data.DataLoader(val_set,
                                                 batch_size=batch_size,
@@ -138,7 +139,9 @@ def ETHEC_train_model(arguments):
         trainloader = torch.utils.data.DataLoader(train_set,
                                                   batch_size=batch_size,
                                                   num_workers=n_workers,
-                                                  sampler=WeightedResampler(train_set))
+                                                  shuffle=True if arguments.class_weights else False,
+                                                  sampler=None if arguments.class_weights else WeightedResampler(
+                                                      train_set))
         valloader = torch.utils.data.DataLoader(val_set,
                                                 batch_size=batch_size,
                                                 shuffle=False, num_workers=n_workers)
@@ -148,16 +151,23 @@ def ETHEC_train_model(arguments):
 
         data_loaders = {'train': trainloader, 'val': valloader, 'test': testloader}
 
+    weight = None
+    if arguments.class_weights:
+        n_train = torch.zeros(labelmap.n_classes)
+        for data_item in data_loaders['train']:
+            n_train += torch.sum(data_item['labels'], 0)
+        weight = 1.0/n_train
+
     eval_type = MultiLabelEvaluation(os.path.join(arguments.experiment_dir, arguments.experiment_name), labelmap)
     if arguments.evaluator == 'MLST':
         eval_type = MultiLabelEvaluationSingleThresh(os.path.join(arguments.experiment_dir, arguments.experiment_name),
                                                      labelmap)
 
     use_criterion = None
     if arguments.loss == 'multi_label':
-        use_criterion = MultiLabelSMLoss()
+        use_criterion = MultiLabelSMLoss(weight=weight)
     elif arguments.loss == 'multi_level':
-        use_criterion = MultiLevelCELoss(labelmap=labelmap)
+        use_criterion = MultiLevelCELoss(labelmap=labelmap, weight=weight)
         eval_type = MultiLevelEvaluation(os.path.join(arguments.experiment_dir, arguments.experiment_name), labelmap)
 
     ETHEC_trainer = ETHECExperiment(data_loaders=data_loaders, labelmap=labelmap,
@@ -196,6 +206,7 @@ def ETHEC_train_model(arguments):
     parser.add_argument("--model", help='NN model to use. Use one of [`multi_label`, `multi_level`]',
                         type=str, required=True)
     parser.add_argument("--loss", help='Loss function to use.', type=str, required=True)
+    parser.add_argument("--class_weights", help='Re-weigh the loss function based on inverse class freq.', action='store_true')
     parser.add_argument("--freeze_weights", help='This flag fine tunes only the last layer.', action='store_true')
     parser.add_argument("--set_mode", help='If use training or testing mode (loads best model).', type=str,
                         required=True)
diff --git a/network/fashion_mnist.py b/network/fashion_mnist.py
@@ -78,13 +78,13 @@ def train_FMNIST(arguments):
             # transforms.Normalize(0.5, 0.5)
         ])
 
-    lmap = labelmap_FMNIST()
+    labelmap = labelmap_FMNIST()
     batch_size = arguments.batch_size
     n_workers = arguments.n_workers
 
     if arguments.debug:
         print("== Running in DEBUG mode!")
-        trainset = FMNISTHierarchical(root='../database', labelmap=lmap, train=False,
+        trainset = FMNISTHierarchical(root='../database', labelmap=labelmap, train=False,
                                        download=True, transform=data_transforms)
         trainloader = torch.utils.data.DataLoader(torch.utils.data.Subset(trainset, list(range(100))), batch_size=batch_size,
                                                   shuffle=True, num_workers=n_workers)
@@ -100,14 +100,14 @@ def train_FMNIST(arguments):
         data_loaders = {'train': trainloader, 'val': valloader, 'test': testloader}
 
     else:
-        trainset = FMNISTHierarchical(root='../database', labelmap=lmap, train=True,
+        trainset = FMNISTHierarchical(root='../database', labelmap=labelmap, train=True,
                                        download=True, transform=data_transforms)
-        testset = FMNISTHierarchical(root='../database', labelmap=lmap, train=False,
+        testset = FMNISTHierarchical(root='../database', labelmap=labelmap, train=False,
                                       download=True, transform=data_transforms)
 
         # split the dataset into 80:10:10
         train_indices_from_train, val_indices_from_train, val_indices_from_test, test_indices_from_test = \
-            FMNIST_set_indices(trainset, testset, lmap)
+            FMNIST_set_indices(trainset, testset, labelmap)
 
         trainloader = torch.utils.data.DataLoader(torch.utils.data.Subset(trainset, train_indices_from_train),
                                                   batch_size=batch_size,
@@ -125,18 +125,25 @@ def train_FMNIST(arguments):
 
         data_loaders = {'train': trainloader, 'val': valloader, 'test': testloader}
 
-    eval_type = MultiLabelEvaluation(os.path.join(arguments.experiment_dir, arguments.experiment_name), lmap)
+    weight = None
+    if arguments.class_weights:
+        n_train = torch.zeros(labelmap.n_classes)
+        for data_item in data_loaders['train']:
+            n_train += torch.sum(data_item['labels'], 0)
+        weight = 1.0 / n_train
+
+    eval_type = MultiLabelEvaluation(os.path.join(arguments.experiment_dir, arguments.experiment_name), labelmap)
     if arguments.evaluator == 'MLST':
-        eval_type = MultiLabelEvaluationSingleThresh(os.path.join(arguments.experiment_dir, arguments.experiment_name), lmap)
+        eval_type = MultiLabelEvaluationSingleThresh(os.path.join(arguments.experiment_dir, arguments.experiment_name), labelmap)
 
     use_criterion = None
     if arguments.loss == 'multi_label':
-        use_criterion = MultiLabelSMLoss()
+        use_criterion = MultiLabelSMLoss(weight=weight)
     elif arguments.loss == 'multi_level':
-        use_criterion = MultiLevelCELoss(labelmap=lmap)
-        eval_type = MultiLevelEvaluation(os.path.join(arguments.experiment_dir, arguments.experiment_name), lmap)
+        use_criterion = MultiLevelCELoss(labelmap=labelmap, weight=weight)
+        eval_type = MultiLevelEvaluation(os.path.join(arguments.experiment_dir, arguments.experiment_name), labelmap)
 
-    FMNIST_trainer = FMNIST(data_loaders=data_loaders, labelmap=lmap,
+    FMNIST_trainer = FMNIST(data_loaders=data_loaders, labelmap=labelmap,
                             criterion=use_criterion,
                             lr=arguments.lr,
                             batch_size=batch_size, evaluator=eval_type,
@@ -257,6 +264,7 @@ def FMNIST_set_indices(trainset, testset, labelmap=labelmap_FMNIST()):
     parser.add_argument("--resume", help='Continue training from last checkpoint.', action='store_true')
     parser.add_argument("--model", help='NN model to use.', type=str, required=True)
     parser.add_argument("--freeze_weights", help='This flag fine tunes only the last layer.', action='store_true')
+    parser.add_argument("--class_weights", help='Re-weigh the loss function based on inverse class freq.', action='store_true')
     parser.add_argument("--set_mode", help='If use training or testing mode (loads best model).', type=str, required=True)
     parser.add_argument("--loss", help='Loss function to use.', type=str, required=True)
     args = parser.parse_args()
diff --git a/network/finetuner.py b/network/finetuner.py
@@ -409,16 +409,23 @@ def train_cifar10(arguments):
 
         data_loaders = {'train': trainloader, 'val': valloader, 'test': testloader}
 
+    weight = None
+    if arguments.class_weights:
+        n_train = torch.zeros(labelmap.n_classes)
+        for data_item in data_loaders['train']:
+            n_train += torch.sum(data_item['labels'], 0)
+        weight = 1.0 / n_train
+
     eval_type = MultiLabelEvaluation(os.path.join(arguments.experiment_dir, arguments.experiment_name), labelmap)
     if arguments.evaluator == 'MLST':
         eval_type = MultiLabelEvaluationSingleThresh(os.path.join(arguments.experiment_dir, arguments.experiment_name),
                                                      labelmap)
 
     use_criterion = None
     if arguments.loss == 'multi_label':
-        use_criterion = MultiLabelSMLoss()
+        use_criterion = MultiLabelSMLoss(weight=weight)
     elif arguments.loss == 'multi_level':
-        use_criterion = MultiLevelCELoss(labelmap=labelmap)
+        use_criterion = MultiLevelCELoss(labelmap=labelmap, weight=weight)
         eval_type = MultiLevelEvaluation(os.path.join(arguments.experiment_dir, arguments.experiment_name), labelmap)
 
     cifar_trainer = CIFAR10(data_loaders=data_loaders, labelmap=labelmap,
@@ -586,6 +593,7 @@ def train_alexnet_binary():
     parser.add_argument("--resume", help='Continue training from last checkpoint.', action='store_true')
     parser.add_argument("--model", help='NN model to use.', type=str, required=True)
     parser.add_argument("--loss", help='Loss function to use.', type=str, required=True)
+    parser.add_argument("--class_weights", help='Re-weigh the loss function based on inverse class freq.', action='store_true')
     parser.add_argument("--freeze_weights", help='This flag fine tunes only the last layer.', action='store_true')
     parser.add_argument("--set_mode", help='If use training or testing mode (loads best model).', type=str,
                         required=True)
diff --git a/network/loss.py b/network/loss.py
@@ -4,12 +4,28 @@
 
 
 class MultiLevelCELoss(torch.nn.Module):
-    def __init__(self, labelmap, weights=None):
+    def __init__(self, labelmap, level_weights=None, weight=None):
         torch.nn.Module.__init__(self)
         self.labelmap = labelmap
-        self.weights = [1.0]*len(self.labelmap.levels) if weights is None else weights
-        self.criterion = nn.CrossEntropyLoss(reduction='none')
-        print('==Using the following weights config for multi level cross entropy loss: {}'.format(self.weights))
+        self.level_weights = [1.0] * len(self.labelmap.levels) if level_weights is None else level_weights
+
+        self.criterion = []
+        if weight is None:
+            for level_len in self.labelmap.levels:
+                self.criterion.append(nn.CrossEntropyLoss(weight=None, reduction='none'))
+        else:
+            level_stop, level_start = [], []
+            for level_id, level_len in enumerate(self.labelmap.levels):
+                if level_id == 0:
+                    level_start.append(0)
+                    level_stop.append(level_len)
+                else:
+                    level_start.append(level_stop[level_id - 1])
+                    level_stop.append(level_stop[level_id - 1] + level_len)
+                self.criterion.append(nn.CrossEntropyLoss(weight=weight[level_start[level_id]:level_stop[level_id]],
+                                                          reduction='none'))
+
+        print('==Using the following weights config for multi level cross entropy loss: {}'.format(self.level_weights))
 
     def forward(self, outputs, labels, level_labels):
         # print('Outputs: {}'.format(outputs))
@@ -18,23 +34,24 @@ def forward(self, outputs, labels, level_labels):
         loss = 0.0
         for level_id, level in enumerate(self.labelmap.levels):
             if level_id == 0:
-                loss += self.weights[level_id] * self.criterion(outputs[:, 0:level], level_labels[:, level_id])
+                loss += self.level_weights[level_id] * self.criterion[level_id](outputs[:, 0:level], level_labels[:, level_id])
                 # print(self.weights[level_id] * self.criterion(outputs[:, 0:level], level_labels[:, level_id]))
             else:
                 start = sum([self.labelmap.levels[l_id] for l_id in range(level_id)])
                 # print([self.labelmap.levels[l_id] for l_id in range(level_id)], level)
                 # print(outputs[:, start:start+level])
                 # print(self.weights[level_id] * self.criterion(outputs[:, start:start+level],
                 #                                                 level_labels[:, level_id]))
-                loss += self.weights[level_id] * self.criterion(outputs[:, start:start+level],
-                                                                level_labels[:, level_id])
+                loss += self.level_weights[level_id] * self.criterion[level_id](outputs[:, start:start + level],
+                                                                          level_labels[:, level_id])
         # print('Loss per sample: {}'.format(loss))
         # print('Avg loss: {}'.format(torch.mean(loss)))
         return torch.mean(loss)
 
 
 class MultiLabelSMLoss(torch.nn.MultiLabelSoftMarginLoss):
     def __init__(self, weight=None, size_average=None, reduce=None, reduction='mean'):
+        print(weight)
         torch.nn.MultiLabelSoftMarginLoss.__init__(self, weight, size_average, reduce, reduction)
 
     def forward(self, outputs, labels, level_labels):
@@ -43,7 +60,7 @@ def forward(self, outputs, labels, level_labels):
 
 if __name__ == '__main__':
     lmap = ETHECLabelMap()
-    criterion = MultiLevelCELoss(labelmap=lmap, weights=[1, 1, 1, 1])
+    criterion = MultiLevelCELoss(labelmap=lmap, level_weights=[1, 1, 1, 1])
     output, level_labels = torch.zeros((1, lmap.n_classes)), torch.tensor([[0,
                                                                             7-lmap.levels[0],
                                                                             90-(lmap.levels[0]+lmap.levels[1]),