Skip to content

Commit b523cf6

Browse files
committed
Cleanup, add validation every n subset, fixup input config resolution between args and model config.
1 parent 1a11aad commit b523cf6

13 files changed

+208
-116
lines changed

Diff for: effdet/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from .efficientdet import EfficientDet
22
from .bench import DetBenchPredict, DetBenchTrain, unwrap_bench
3-
from .data import create_dataset, create_loader, create_parser
3+
from .data import create_dataset, create_loader, create_parser, DetectionDatset, SkipSubset
44
from .evaluator import CocoEvaluator, PascalEvaluator, OpenImagesEvaluator, create_evaluator
55
from .config import get_efficientdet_config, default_detection_model_configs
66
from .factory import create_model, create_model_from_config

Diff for: effdet/anchors.py

+2-25
Original file line numberDiff line numberDiff line change
@@ -171,28 +171,6 @@ def clip_boxes_xyxy(boxes: torch.Tensor, size: torch.Tensor):
171171
return boxes
172172

173173

174-
# def batched_nms(
175-
# boxes: torch.Tensor, scores: torch.Tensor, idxs: torch.Tensor, iou_threshold: float
176-
# ):
177-
# """
178-
# Same as torchvision.ops.boxes.batched_nms, but safer.
179-
# """
180-
# assert boxes.shape[-1] == 4
181-
# # TODO may need better strategy.
182-
# # Investigate after having a fully-cuda NMS op.
183-
# if len(boxes) < 40000:
184-
# return tvb.batched_nms(boxes, scores, idxs, iou_threshold)
185-
#
186-
# result_mask = scores.new_zeros(scores.size(), dtype=torch.bool)
187-
# for id in torch.jit.annotate(List[int], torch.unique(idxs).cpu().tolist()):
188-
# mask = (idxs == id).nonzero().view(-1)
189-
# keep = tvb.nms(boxes[mask], scores[mask], iou_threshold)
190-
# result_mask[mask[keep]] = True
191-
# keep = result_mask.nonzero().view(-1)
192-
# keep = keep[scores[keep].argsort(descending=True)]
193-
# return keep
194-
195-
196174
def generate_detections(
197175
cls_outputs, box_outputs, anchor_boxes, indices, classes,
198176
img_scale: Optional[torch.Tensor], img_size: Optional[torch.Tensor],
@@ -432,10 +410,9 @@ def batch_label_anchors(self, batch_size: int, gt_boxes, gt_classes):
432410
# cls_weights, box_weights are not used
433411
cls_targets, _, box_targets, _, matches = self.target_assigner.assign(
434412
anchor_box_list, BoxList(gt_boxes[i]), gt_classes[i])
435-
#print(gt_boxes[i].shape, gt_classes[i].shape, matches.match_results.shape)
413+
436414
# class labels start from 1 and the background class = -1
437-
cls_targets -= 1
438-
cls_targets = cls_targets.long()
415+
cls_targets = (cls_targets - 1).long()
439416

440417
# Unpack labels.
441418
"""Unpacks an array of cls/box into multiple scales."""

Diff for: effdet/data/__init__.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from .dataset_factory import create_dataset
2-
from .dataset import DetectionDatset
2+
from .dataset import DetectionDatset, SkipSubset
3+
from .input_config import resolve_input_config
34
from .loader import create_loader
45
from .parsers import create_parser
56
from .transforms import *

Diff for: effdet/data/dataset.py

+52-7
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
Hacked together by Ross Wightman
44
"""
55
import torch.utils.data as data
6+
import numpy as np
67

78
from PIL import Image
89
from .parsers import create_parser
@@ -22,11 +23,11 @@ def __init__(self, data_dir, parser=None, parser_kwargs=None, transform=None):
2223
parser_kwargs = parser_kwargs or {}
2324
self.data_dir = data_dir
2425
if isinstance(parser, str):
25-
self.parser = create_parser(parser, **parser_kwargs)
26+
self._parser = create_parser(parser, **parser_kwargs)
2627
else:
2728
assert parser is not None and len(parser.img_ids)
28-
self.parser = parser
29-
self.transform = transform
29+
self._parser = parser
30+
self._transform = transform
3031

3132
def __getitem__(self, index):
3233
"""
@@ -35,10 +36,10 @@ def __getitem__(self, index):
3536
Returns:
3637
tuple: Tuple (image, annotations (target)).
3738
"""
38-
img_info = self.parser.img_infos[index]
39+
img_info = self._parser.img_infos[index]
3940
target = dict(img_idx=index, img_size=(img_info['width'], img_info['height']))
40-
if self.parser.has_labels:
41-
ann = self.parser.get_ann_info(index)
41+
if self._parser.has_labels:
42+
ann = self._parser.get_ann_info(index)
4243
target.update(ann)
4344

4445
img_path = self.data_dir / img_info['file_name']
@@ -49,4 +50,48 @@ def __getitem__(self, index):
4950
return img, target
5051

5152
def __len__(self):
52-
return len(self.parser.img_ids)
53+
return len(self._parser.img_ids)
54+
55+
@property
56+
def parser(self):
57+
return self._parser
58+
59+
@property
60+
def transform(self):
61+
return self._transform
62+
63+
@transform.setter
64+
def transform(self, t):
65+
self._transform = t
66+
67+
68+
class SkipSubset(data.Dataset):
69+
r"""
70+
Subset of a dataset at specified indices.
71+
72+
Arguments:
73+
dataset (Dataset): The whole Dataset
74+
n (int): skip rate (select every nth)
75+
"""
76+
def __init__(self, dataset, n=2):
77+
self.dataset = dataset
78+
assert n >= 1
79+
self.indices = np.arange(len(dataset))[::n]
80+
81+
def __getitem__(self, idx):
82+
return self.dataset[self.indices[idx]]
83+
84+
def __len__(self):
85+
return len(self.indices)
86+
87+
@property
88+
def parser(self):
89+
return self.dataset.parser
90+
91+
@property
92+
def transform(self):
93+
return self.dataset.transform
94+
95+
@transform.setter
96+
def transform(self, t):
97+
self.dataset.transform = t

Diff for: effdet/data/input_config.py

+70
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
from .transforms import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
2+
3+
4+
def resolve_input_config(args, model_config=None, model=None):
5+
if not isinstance(args, dict):
6+
args = vars(args)
7+
input_config = {}
8+
if not model_config and model is not None and hasattr(model, 'config'):
9+
model_config = model.config
10+
11+
# Resolve input/image size
12+
in_chans = 3
13+
# if 'chans' in args and args['chans'] is not None:
14+
# in_chans = args['chans']
15+
16+
input_size = (in_chans, 512, 512)
17+
# if 'input_size' in args and args['input_size'] is not None:
18+
# assert isinstance(args['input_size'], (tuple, list))
19+
# assert len(args['input_size']) == 3
20+
# input_size = tuple(args['input_size'])
21+
# in_chans = input_size[0] # input_size overrides in_chans
22+
# elif 'img_size' in args and args['img_size'] is not None:
23+
# assert isinstance(args['img_size'], int)
24+
# input_size = (in_chans, args['img_size'], args['img_size'])
25+
if 'input_size' in model_config:
26+
input_size = model_config['input_size']
27+
elif 'image_size' in model_config:
28+
input_size = (in_chans, model_config['image_size'], model_config['image_size'])
29+
input_config['input_size'] = input_size
30+
31+
# resolve interpolation method
32+
input_config['interpolation'] = 'bicubic'
33+
if 'interpolation' in args and args['interpolation']:
34+
input_config['interpolation'] = args['interpolation']
35+
elif 'interpolation' in model_config:
36+
input_config['interpolation'] = model_config['interpolation']
37+
38+
# resolve dataset + model mean for normalization
39+
input_config['mean'] = IMAGENET_DEFAULT_MEAN
40+
if 'mean' in args and args['mean'] is not None:
41+
mean = tuple(args['mean'])
42+
if len(mean) == 1:
43+
mean = tuple(list(mean) * in_chans)
44+
else:
45+
assert len(mean) == in_chans
46+
input_config['mean'] = mean
47+
elif 'mean' in model_config:
48+
input_config['mean'] = model_config['mean']
49+
50+
# resolve dataset + model std deviation for normalization
51+
input_config['std'] = IMAGENET_DEFAULT_STD
52+
if 'std' in args and args['std'] is not None:
53+
std = tuple(args['std'])
54+
if len(std) == 1:
55+
std = tuple(list(std) * in_chans)
56+
else:
57+
assert len(std) == in_chans
58+
input_config['std'] = std
59+
elif 'std' in model_config:
60+
input_config['std'] = model_config['std']
61+
62+
# resolve letterbox fill color
63+
input_config['fill_color'] = 'mean'
64+
if 'fill_color' in args and args['fill_color'] is not None:
65+
print('ff')
66+
input_config['fill_color'] = args['fill_color']
67+
elif 'fill_color' in model_config:
68+
input_config['fill_color'] = model_config['fill_color']
69+
70+
return input_config

Diff for: effdet/data/parsers/parser.py

+26-18
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,14 @@
11
from numbers import Integral
2+
from typing import List, Union, Dict, Any
23

34

45
class Parser:
56
""" Parser base class.
67
8+
The attributes listed below make up a public interface common to all parsers. They can be accessed directly
9+
once the dataset is constructed and annotations are populated.
10+
711
Attributes:
8-
yxyx (bool):
9-
has_labels (bool):
10-
include_masks (bool):
11-
include_bboxes_ignore (bool):
12-
ignore_empty_gt (bool):
13-
min_img_size (bool)
1412
1513
cat_names (list[str]):
1614
list of category (class) names, with background class at position 0.
@@ -28,13 +26,23 @@ class Parser:
2826
"""
2927
def __init__(
3028
self,
31-
bbox_yxyx=False,
32-
has_labels=True,
33-
include_masks=False,
34-
include_bboxes_ignore=False,
35-
ignore_empty_gt=False,
36-
min_img_size=32,
29+
bbox_yxyx: bool = False,
30+
has_labels: bool = True,
31+
include_masks: bool = False,
32+
include_bboxes_ignore: bool = False,
33+
ignore_empty_gt: bool = False,
34+
min_img_size: int = 32,
3735
):
36+
"""
37+
Args:
38+
yxyx (bool): output coords in yxyx format, otherwise xyxy
39+
has_labels (bool): dataset has labels (for training validation, False usually for test sets)
40+
include_masks (bool): include segmentation masks in target output (not supported yet for any dataset)
41+
include_bboxes_ignore (bool): include ignored bbox in target output
42+
ignore_empty_gt (bool): ignore images with no ground truth (no negative images)
43+
min_img_size (bool): ignore images with width or height smaller than this number
44+
sub_sample (int): sample every N images from the dataset
45+
"""
3846
# parser config, determines how dataset parsed and validated
3947
self.yxyx = bbox_yxyx
4048
self.has_labels = has_labels
@@ -45,14 +53,14 @@ def __init__(
4553
self.label_offset = 1
4654

4755
# Category (class) metadata. Populated by _load_annotations()
48-
self.cat_names = []
49-
self.cat_ids = []
50-
self.cat_id_to_label = dict()
56+
self.cat_names: List[str] = []
57+
self.cat_ids: List[Union[str, Integral]] = []
58+
self.cat_id_to_label: Dict[Union[str, Integral], Integral] = dict()
5159

5260
# Image metadata. Populated by _load_annotations()
53-
self.img_ids = []
54-
self.img_ids_invalid = []
55-
self.img_infos = []
61+
self.img_ids: List[Union[str, Integral]] = []
62+
self.img_ids_invalid: List[Union[str, Integral]] = []
63+
self.img_infos: List[Dict[str, Any]] = []
5664

5765
@property
5866
def cat_dicts(self):

Diff for: effdet/data/parsers/parser_coco.py

-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
33
Copyright 2020 Ross Wightman
44
"""
5-
import os
65
import numpy as np
76
from pycocotools.coco import COCO
87
from .parser import Parser

Diff for: effdet/data/parsers/parser_open_images.py

+6
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,9 @@ def _load_img_info(csv_file, select_img_ids=None):
9393
img_sizes, img_id_to_idx = _load_img_info(img_info_filename, select_img_ids=anno_img_ids)
9494

9595
masks_df['ImageIdx'] = masks_df['ImageID'].map(img_id_to_idx)
96+
if np.issubdtype(masks_df.ImageIdx.dtype, np.floating):
97+
masks_df = masks_df.dropna(axis='rows')
98+
masks_df['ImageIdx'] = masks_df.ImageIdx.astype(np.int32)
9699
masks_df.sort_values('ImageIdx', inplace=True)
97100
ann_img_idx = masks_df['ImageIdx'].values
98101
img_sizes = img_sizes[ann_img_idx]
@@ -121,6 +124,9 @@ def _load_img_info(csv_file, select_img_ids=None):
121124

122125
_logger.info('Process bbox...')
123126
bbox_df['ImageIdx'] = bbox_df['ImageID'].map(img_id_to_idx)
127+
if np.issubdtype(bbox_df.ImageIdx.dtype, np.floating):
128+
bbox_df = bbox_df.dropna(axis='rows')
129+
bbox_df['ImageIdx'] = bbox_df.ImageIdx.astype(np.int32)
124130
bbox_df.sort_values('ImageIdx', inplace=True)
125131
ann_img_idx = bbox_df['ImageIdx'].values
126132
img_sizes = img_sizes[ann_img_idx]

Diff for: effdet/data/parsers/parser_voc.py

+3-7
Original file line numberDiff line numberDiff line change
@@ -51,11 +51,11 @@ def _load_annotations(
5151
self.cat_ids = self.cat_names
5252
self.cat_id_to_label = {cat: i + self.label_offset for i, cat in enumerate(self.cat_ids)}
5353

54-
with open(split_filename) as f:
55-
ids = f.readlines()
5654
self.anns = []
5755

58-
for img_idx, img_id in enumerate(ids):
56+
with open(split_filename) as f:
57+
ids = f.readlines()
58+
for img_id in ids:
5959
img_id = img_id.strip("\n")
6060
filename = img_filename % img_id
6161
xml_path = ann_filename % img_id
@@ -85,18 +85,14 @@ def _load_annotations(
8585
self.anns.append(anns)
8686
self.img_infos.append(dict(id=img_id, file_name=filename, width=width, height=height))
8787
self.img_ids.append(img_id)
88-
self.img_id_to_idx[img_id] = img_idx
8988
else:
9089
self.img_ids_invalid.append(img_id)
9190

9291
def merge(self, other):
93-
this_size = len(self.img_ids)
9492
assert len(self.cat_ids) == len(other.cat_ids)
9593
self.img_ids.extend(other.img_ids)
9694
self.img_infos.extend(other.img_infos)
9795
self.anns.extend(other.anns)
98-
for id, idx in other.img_id_to_idx.items():
99-
self.img_id_to_idx[id] = idx + this_size
10096

10197
def get_ann_info(self, idx):
10298
return self._parse_ann_info(self.anns[idx])

Diff for: effdet/evaluation/detection_evaluator.py

+2-19
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,7 @@ def add_single_ground_truth_image_info(self, image_id, gt_dict):
205205
raise error if instance masks are not in groundtruth dictionary.
206206
"""
207207
if image_id in self._image_ids:
208-
raise ValueError('Image with id {} already added.'.format(image_id))
208+
return
209209

210210
gt_classes = gt_dict[InputDataFields.gt_classes] - self._label_id_offset
211211
# If the key is not present in the gt_dict or the array is empty
@@ -403,18 +403,6 @@ def __init__(self,
403403
metric_prefix=metric_prefix,
404404
group_of_weight=group_of_weight,
405405
evaluate_masks=evaluate_masks)
406-
# self._expected_keys = set([
407-
# InputDataFields.key,
408-
# InputDataFields.gt_boxes,
409-
# InputDataFields.gt_classes,
410-
# InputDataFields.gt_group_of,
411-
# DetectionResultFields.detection_boxes,
412-
# DetectionResultFields.detection_scores,
413-
# DetectionResultFields.detection_classes,
414-
# ])
415-
# if evaluate_masks:
416-
# self._expected_keys.add(InputDataFields.gt_instance_masks)
417-
# self._expected_keys.add(DetectionResultFields.detection_masks)
418406

419407
def add_single_ground_truth_image_info(self, image_id, gt_dict):
420408
"""Adds groundtruth for a single image to be used for evaluation.
@@ -432,7 +420,7 @@ def add_single_ground_truth_image_info(self, image_id, gt_dict):
432420
ValueError: On adding groundtruth for an image more than once.
433421
"""
434422
if image_id in self._image_ids:
435-
raise ValueError('Image with id {} already added.'.format(image_id))
423+
return
436424

437425
gt_classes = (gt_dict[InputDataFields.gt_classes] - self._label_id_offset)
438426
# If the key is not present in the gt_dict or the array is empty
@@ -522,11 +510,6 @@ def __init__(
522510
metric_prefix=metrics_prefix)
523511

524512
self._evaluatable_labels = {}
525-
# Only one of the two has to be provided, but both options are given
526-
# for compatibility with previous codebase.
527-
self._expected_keys.update([
528-
InputDataFields.gt_image_classes,
529-
InputDataFields.gt_labeled_classes])
530513

531514
def add_single_ground_truth_image_info(self, image_id, gt_dict):
532515
"""Adds groundtruth for a single image to be used for evaluation.

0 commit comments

Comments
 (0)