Cleanup, add validation every n subset, fixup input config resolution between args and model config.

rwightman · rwightman · commit b523cf630f87 · 2020-09-30T17:20:31.000-07:00
diff --git a/effdet/__init__.py b/effdet/__init__.py
@@ -1,6 +1,6 @@
 from .efficientdet import EfficientDet
 from .bench import DetBenchPredict, DetBenchTrain, unwrap_bench
-from .data import create_dataset, create_loader, create_parser
+from .data import create_dataset, create_loader, create_parser, DetectionDatset, SkipSubset
 from .evaluator import CocoEvaluator, PascalEvaluator, OpenImagesEvaluator, create_evaluator
 from .config import get_efficientdet_config, default_detection_model_configs
 from .factory import create_model, create_model_from_config
diff --git a/effdet/anchors.py b/effdet/anchors.py
@@ -171,28 +171,6 @@ def clip_boxes_xyxy(boxes: torch.Tensor, size: torch.Tensor):
     return boxes
 
 
-# def batched_nms(
-#     boxes: torch.Tensor, scores: torch.Tensor, idxs: torch.Tensor, iou_threshold: float
-# ):
-#     """
-#     Same as torchvision.ops.boxes.batched_nms, but safer.
-#     """
-#     assert boxes.shape[-1] == 4
-#     # TODO may need better strategy.
-#     # Investigate after having a fully-cuda NMS op.
-#     if len(boxes) < 40000:
-#         return tvb.batched_nms(boxes, scores, idxs, iou_threshold)
-#
-#     result_mask = scores.new_zeros(scores.size(), dtype=torch.bool)
-#     for id in torch.jit.annotate(List[int], torch.unique(idxs).cpu().tolist()):
-#         mask = (idxs == id).nonzero().view(-1)
-#         keep = tvb.nms(boxes[mask], scores[mask], iou_threshold)
-#         result_mask[mask[keep]] = True
-#     keep = result_mask.nonzero().view(-1)
-#     keep = keep[scores[keep].argsort(descending=True)]
-#     return keep
-
-
 def generate_detections(
         cls_outputs, box_outputs, anchor_boxes, indices, classes,
         img_scale: Optional[torch.Tensor], img_size: Optional[torch.Tensor],
@@ -432,10 +410,9 @@ def batch_label_anchors(self, batch_size: int, gt_boxes, gt_classes):
             # cls_weights, box_weights are not used
             cls_targets, _, box_targets, _, matches = self.target_assigner.assign(
                 anchor_box_list, BoxList(gt_boxes[i]), gt_classes[i])
-            #print(gt_boxes[i].shape, gt_classes[i].shape, matches.match_results.shape)
+
             # class labels start from 1 and the background class = -1
-            cls_targets -= 1
-            cls_targets = cls_targets.long()
+            cls_targets = (cls_targets - 1).long()
 
             # Unpack labels.
             """Unpacks an array of cls/box into multiple scales."""
diff --git a/effdet/data/__init__.py b/effdet/data/__init__.py
@@ -1,5 +1,6 @@
 from .dataset_factory import create_dataset
-from .dataset import DetectionDatset
+from .dataset import DetectionDatset, SkipSubset
+from .input_config import resolve_input_config
 from .loader import create_loader
 from .parsers import create_parser
 from .transforms import *
diff --git a/effdet/data/dataset.py b/effdet/data/dataset.py
@@ -3,6 +3,7 @@
 Hacked together by Ross Wightman
 """
 import torch.utils.data as data
+import numpy as np
 
 from PIL import Image
 from .parsers import create_parser
@@ -22,11 +23,11 @@ def __init__(self, data_dir, parser=None, parser_kwargs=None, transform=None):
         parser_kwargs = parser_kwargs or {}
         self.data_dir = data_dir
         if isinstance(parser, str):
-            self.parser = create_parser(parser, **parser_kwargs)
+            self._parser = create_parser(parser, **parser_kwargs)
         else:
             assert parser is not None and len(parser.img_ids)
-            self.parser = parser
-        self.transform = transform
+            self._parser = parser
+        self._transform = transform
 
     def __getitem__(self, index):
         """
@@ -35,10 +36,10 @@ def __getitem__(self, index):
         Returns:
             tuple: Tuple (image, annotations (target)).
         """
-        img_info = self.parser.img_infos[index]
+        img_info = self._parser.img_infos[index]
         target = dict(img_idx=index, img_size=(img_info['width'], img_info['height']))
-        if self.parser.has_labels:
-            ann = self.parser.get_ann_info(index)
+        if self._parser.has_labels:
+            ann = self._parser.get_ann_info(index)
             target.update(ann)
 
         img_path = self.data_dir / img_info['file_name']
@@ -49,4 +50,48 @@ def __getitem__(self, index):
         return img, target
 
     def __len__(self):
-        return len(self.parser.img_ids)
+        return len(self._parser.img_ids)
+
+    @property
+    def parser(self):
+        return self._parser
+
+    @property
+    def transform(self):
+        return self._transform
+
+    @transform.setter
+    def transform(self, t):
+        self._transform = t
+
+
+class SkipSubset(data.Dataset):
+    r"""
+    Subset of a dataset at specified indices.
+
+    Arguments:
+        dataset (Dataset): The whole Dataset
+        n (int): skip rate (select every nth)
+    """
+    def __init__(self, dataset, n=2):
+        self.dataset = dataset
+        assert n >= 1
+        self.indices = np.arange(len(dataset))[::n]
+
+    def __getitem__(self, idx):
+        return self.dataset[self.indices[idx]]
+
+    def __len__(self):
+        return len(self.indices)
+
+    @property
+    def parser(self):
+        return self.dataset.parser
+
+    @property
+    def transform(self):
+        return self.dataset.transform
+
+    @transform.setter
+    def transform(self, t):
+        self.dataset.transform = t
diff --git a/effdet/data/input_config.py b/effdet/data/input_config.py
@@ -0,0 +1,70 @@
+from .transforms import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
+
+
+def resolve_input_config(args, model_config=None, model=None):
+    if not isinstance(args, dict):
+        args = vars(args)
+    input_config = {}
+    if not model_config and model is not None and hasattr(model, 'config'):
+        model_config = model.config
+
+    # Resolve input/image size
+    in_chans = 3
+    # if 'chans' in args and args['chans'] is not None:
+    #     in_chans = args['chans']
+
+    input_size = (in_chans, 512, 512)
+    # if 'input_size' in args and args['input_size'] is not None:
+    #     assert isinstance(args['input_size'], (tuple, list))
+    #     assert len(args['input_size']) == 3
+    #     input_size = tuple(args['input_size'])
+    #     in_chans = input_size[0]  # input_size overrides in_chans
+    # elif 'img_size' in args and args['img_size'] is not None:
+    #     assert isinstance(args['img_size'], int)
+    #     input_size = (in_chans, args['img_size'], args['img_size'])
+    if 'input_size' in model_config:
+        input_size = model_config['input_size']
+    elif 'image_size' in model_config:
+        input_size = (in_chans, model_config['image_size'], model_config['image_size'])
+    input_config['input_size'] = input_size
+
+    # resolve interpolation method
+    input_config['interpolation'] = 'bicubic'
+    if 'interpolation' in args and args['interpolation']:
+        input_config['interpolation'] = args['interpolation']
+    elif 'interpolation' in model_config:
+        input_config['interpolation'] = model_config['interpolation']
+
+    # resolve dataset + model mean for normalization
+    input_config['mean'] = IMAGENET_DEFAULT_MEAN
+    if 'mean' in args and args['mean'] is not None:
+        mean = tuple(args['mean'])
+        if len(mean) == 1:
+            mean = tuple(list(mean) * in_chans)
+        else:
+            assert len(mean) == in_chans
+        input_config['mean'] = mean
+    elif 'mean' in model_config:
+        input_config['mean'] = model_config['mean']
+
+    # resolve dataset + model std deviation for normalization
+    input_config['std'] = IMAGENET_DEFAULT_STD
+    if 'std' in args and args['std'] is not None:
+        std = tuple(args['std'])
+        if len(std) == 1:
+            std = tuple(list(std) * in_chans)
+        else:
+            assert len(std) == in_chans
+        input_config['std'] = std
+    elif 'std' in model_config:
+        input_config['std'] = model_config['std']
+
+    # resolve letterbox fill color
+    input_config['fill_color'] = 'mean'
+    if 'fill_color' in args and args['fill_color'] is not None:
+        print('ff')
+        input_config['fill_color'] = args['fill_color']
+    elif 'fill_color' in model_config:
+        input_config['fill_color'] = model_config['fill_color']
+
+    return input_config
diff --git a/effdet/data/parsers/parser.py b/effdet/data/parsers/parser.py
@@ -1,16 +1,14 @@
 from numbers import Integral
+from typing import List, Union, Dict, Any
 
 
 class Parser:
     """ Parser base class.
 
+    The attributes listed below make up a public interface common to all parsers. They can be accessed directly
+    once the dataset is constructed and annotations are populated.
+
     Attributes:
-        yxyx (bool):
-        has_labels (bool):
-        include_masks (bool):
-        include_bboxes_ignore (bool):
-        ignore_empty_gt (bool):
-        min_img_size (bool)
 
         cat_names (list[str]):
             list of category (class) names, with background class at position 0.
@@ -28,13 +26,23 @@ class Parser:
     """
     def __init__(
             self,
-            bbox_yxyx=False,
-            has_labels=True,
-            include_masks=False,
-            include_bboxes_ignore=False,
-            ignore_empty_gt=False,
-            min_img_size=32,
+            bbox_yxyx: bool = False,
+            has_labels: bool = True,
+            include_masks: bool = False,
+            include_bboxes_ignore: bool = False,
+            ignore_empty_gt: bool = False,
+            min_img_size: int = 32,
     ):
+        """
+        Args:
+            yxyx (bool): output coords in yxyx format, otherwise xyxy
+            has_labels (bool): dataset has labels (for training validation, False usually for test sets)
+            include_masks (bool): include segmentation masks in target output (not supported yet for any dataset)
+            include_bboxes_ignore (bool): include ignored bbox in target output
+            ignore_empty_gt (bool): ignore images with no ground truth (no negative images)
+            min_img_size (bool): ignore images with width or height smaller than this number
+            sub_sample (int): sample every N images from the dataset
+        """
         # parser config, determines how dataset parsed and validated
         self.yxyx = bbox_yxyx
         self.has_labels = has_labels
@@ -45,14 +53,14 @@ def __init__(
         self.label_offset = 1
 
         # Category (class) metadata. Populated by _load_annotations()
-        self.cat_names = []
-        self.cat_ids = []
-        self.cat_id_to_label = dict()
+        self.cat_names: List[str] = []
+        self.cat_ids: List[Union[str, Integral]] = []
+        self.cat_id_to_label: Dict[Union[str, Integral], Integral] = dict()
 
         # Image metadata. Populated by _load_annotations()
-        self.img_ids = []
-        self.img_ids_invalid = []
-        self.img_infos = []
+        self.img_ids: List[Union[str, Integral]] = []
+        self.img_ids_invalid: List[Union[str, Integral]] = []
+        self.img_infos: List[Dict[str, Any]] = []
 
     @property
     def cat_dicts(self):
diff --git a/effdet/data/parsers/parser_coco.py b/effdet/data/parsers/parser_coco.py
@@ -2,7 +2,6 @@
 
 Copyright 2020 Ross Wightman
 """
-import os
 import numpy as np
 from pycocotools.coco import COCO
 from .parser import Parser
diff --git a/effdet/data/parsers/parser_open_images.py b/effdet/data/parsers/parser_open_images.py
@@ -93,6 +93,9 @@ def _load_img_info(csv_file, select_img_ids=None):
             img_sizes, img_id_to_idx = _load_img_info(img_info_filename, select_img_ids=anno_img_ids)
 
             masks_df['ImageIdx'] = masks_df['ImageID'].map(img_id_to_idx)
+            if np.issubdtype(masks_df.ImageIdx.dtype, np.floating):
+                masks_df = masks_df.dropna(axis='rows')
+                masks_df['ImageIdx'] = masks_df.ImageIdx.astype(np.int32)
             masks_df.sort_values('ImageIdx', inplace=True)
             ann_img_idx = masks_df['ImageIdx'].values
             img_sizes = img_sizes[ann_img_idx]
@@ -121,6 +124,9 @@ def _load_img_info(csv_file, select_img_ids=None):
 
             _logger.info('Process bbox...')
             bbox_df['ImageIdx'] = bbox_df['ImageID'].map(img_id_to_idx)
+            if np.issubdtype(bbox_df.ImageIdx.dtype, np.floating):
+                bbox_df = bbox_df.dropna(axis='rows')
+                bbox_df['ImageIdx'] = bbox_df.ImageIdx.astype(np.int32)
             bbox_df.sort_values('ImageIdx', inplace=True)
             ann_img_idx = bbox_df['ImageIdx'].values
             img_sizes = img_sizes[ann_img_idx]
diff --git a/effdet/data/parsers/parser_voc.py b/effdet/data/parsers/parser_voc.py
@@ -51,11 +51,11 @@ def _load_annotations(
         self.cat_ids = self.cat_names
         self.cat_id_to_label = {cat: i + self.label_offset for i, cat in enumerate(self.cat_ids)}
 
-        with open(split_filename) as f:
-            ids = f.readlines()
         self.anns = []
 
-        for img_idx, img_id in enumerate(ids):
+        with open(split_filename) as f:
+            ids = f.readlines()
+        for img_id in ids:
             img_id = img_id.strip("\n")
             filename = img_filename % img_id
             xml_path = ann_filename % img_id
@@ -85,18 +85,14 @@ def _load_annotations(
                 self.anns.append(anns)
                 self.img_infos.append(dict(id=img_id, file_name=filename, width=width, height=height))
                 self.img_ids.append(img_id)
-                self.img_id_to_idx[img_id] = img_idx
             else:
                 self.img_ids_invalid.append(img_id)
 
     def merge(self, other):
-        this_size = len(self.img_ids)
         assert len(self.cat_ids) == len(other.cat_ids)
         self.img_ids.extend(other.img_ids)
         self.img_infos.extend(other.img_infos)
         self.anns.extend(other.anns)
-        for id, idx in other.img_id_to_idx.items():
-            self.img_id_to_idx[id] = idx + this_size
 
     def get_ann_info(self, idx):
         return self._parse_ann_info(self.anns[idx])
diff --git a/effdet/evaluation/detection_evaluator.py b/effdet/evaluation/detection_evaluator.py
@@ -205,7 +205,7 @@ def add_single_ground_truth_image_info(self, image_id, gt_dict):
                 raise error if instance masks are not in groundtruth dictionary.
         """
         if image_id in self._image_ids:
-            raise ValueError('Image with id {} already added.'.format(image_id))
+            return
 
         gt_classes = gt_dict[InputDataFields.gt_classes] - self._label_id_offset
         # If the key is not present in the gt_dict or the array is empty
@@ -403,18 +403,6 @@ def __init__(self,
             metric_prefix=metric_prefix,
             group_of_weight=group_of_weight,
             evaluate_masks=evaluate_masks)
-        # self._expected_keys = set([
-        #     InputDataFields.key,
-        #     InputDataFields.gt_boxes,
-        #     InputDataFields.gt_classes,
-        #     InputDataFields.gt_group_of,
-        #     DetectionResultFields.detection_boxes,
-        #     DetectionResultFields.detection_scores,
-        #     DetectionResultFields.detection_classes,
-        # ])
-        # if evaluate_masks:
-        #     self._expected_keys.add(InputDataFields.gt_instance_masks)
-        #     self._expected_keys.add(DetectionResultFields.detection_masks)
 
     def add_single_ground_truth_image_info(self, image_id, gt_dict):
         """Adds groundtruth for a single image to be used for evaluation.
@@ -432,7 +420,7 @@ def add_single_ground_truth_image_info(self, image_id, gt_dict):
             ValueError: On adding groundtruth for an image more than once.
         """
         if image_id in self._image_ids:
-            raise ValueError('Image with id {} already added.'.format(image_id))
+            return
 
         gt_classes = (gt_dict[InputDataFields.gt_classes] - self._label_id_offset)
         # If the key is not present in the gt_dict or the array is empty
@@ -522,11 +510,6 @@ def __init__(
             metric_prefix=metrics_prefix)
 
         self._evaluatable_labels = {}
-        # Only one of the two has to be provided, but both options are given
-        # for compatibility with previous codebase.
-        self._expected_keys.update([
-            InputDataFields.gt_image_classes,
-            InputDataFields.gt_labeled_classes])
 
     def add_single_ground_truth_image_info(self, image_id, gt_dict):
         """Adds groundtruth for a single image to be used for evaluation.
diff --git a/effdet/evaluator.py b/effdet/evaluator.py
diff --git a/train.py b/train.py
diff --git a/validate.py b/validate.py