Skip to content

Commit fced16b

Browse files
RangiLyuZwwWayne
authored andcommitted
[Enhance]: Optimize augmentation pipeline to speed up training. (open-mmlab#6442)
1 parent d54bcbe commit fced16b

13 files changed

+85
-61
lines changed

configs/pascal_voc/ssd300_voc0712.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,8 @@
1111
data_root = 'data/VOCdevkit/'
1212
img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True)
1313
train_pipeline = [
14-
dict(type='LoadImageFromFile', to_float32=True),
14+
dict(type='LoadImageFromFile'),
1515
dict(type='LoadAnnotations', with_bbox=True),
16-
dict(
17-
type='PhotoMetricDistortion',
18-
brightness_delta=32,
19-
contrast_range=(0.5, 1.5),
20-
saturation_range=(0.5, 1.5),
21-
hue_delta=18),
2216
dict(
2317
type='Expand',
2418
mean=img_norm_cfg['mean'],
@@ -29,8 +23,14 @@
2923
min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),
3024
min_crop_size=0.3),
3125
dict(type='Resize', img_scale=(300, 300), keep_ratio=False),
32-
dict(type='Normalize', **img_norm_cfg),
3326
dict(type='RandomFlip', flip_ratio=0.5),
27+
dict(
28+
type='PhotoMetricDistortion',
29+
brightness_delta=32,
30+
contrast_range=(0.5, 1.5),
31+
saturation_range=(0.5, 1.5),
32+
hue_delta=18),
33+
dict(type='Normalize', **img_norm_cfg),
3434
dict(type='DefaultFormatBundle'),
3535
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
3636
]

configs/pascal_voc/ssd512_voc0712.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,8 @@
1515
ratios=([2], [2, 3], [2, 3], [2, 3], [2, 3], [2], [2]))))
1616
img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True)
1717
train_pipeline = [
18-
dict(type='LoadImageFromFile', to_float32=True),
18+
dict(type='LoadImageFromFile'),
1919
dict(type='LoadAnnotations', with_bbox=True),
20-
dict(
21-
type='PhotoMetricDistortion',
22-
brightness_delta=32,
23-
contrast_range=(0.5, 1.5),
24-
saturation_range=(0.5, 1.5),
25-
hue_delta=18),
2620
dict(
2721
type='Expand',
2822
mean=img_norm_cfg['mean'],
@@ -33,8 +27,14 @@
3327
min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),
3428
min_crop_size=0.3),
3529
dict(type='Resize', img_scale=(512, 512), keep_ratio=False),
36-
dict(type='Normalize', **img_norm_cfg),
3730
dict(type='RandomFlip', flip_ratio=0.5),
31+
dict(
32+
type='PhotoMetricDistortion',
33+
brightness_delta=32,
34+
contrast_range=(0.5, 1.5),
35+
saturation_range=(0.5, 1.5),
36+
hue_delta=18),
37+
dict(type='Normalize', **img_norm_cfg),
3838
dict(type='DefaultFormatBundle'),
3939
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
4040
]

configs/ssd/ssd300_coco.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,8 @@
77
data_root = 'data/coco/'
88
img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True)
99
train_pipeline = [
10-
dict(type='LoadImageFromFile', to_float32=True),
10+
dict(type='LoadImageFromFile'),
1111
dict(type='LoadAnnotations', with_bbox=True),
12-
dict(
13-
type='PhotoMetricDistortion',
14-
brightness_delta=32,
15-
contrast_range=(0.5, 1.5),
16-
saturation_range=(0.5, 1.5),
17-
hue_delta=18),
1812
dict(
1913
type='Expand',
2014
mean=img_norm_cfg['mean'],
@@ -25,8 +19,14 @@
2519
min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),
2620
min_crop_size=0.3),
2721
dict(type='Resize', img_scale=(300, 300), keep_ratio=False),
28-
dict(type='Normalize', **img_norm_cfg),
2922
dict(type='RandomFlip', flip_ratio=0.5),
23+
dict(
24+
type='PhotoMetricDistortion',
25+
brightness_delta=32,
26+
contrast_range=(0.5, 1.5),
27+
saturation_range=(0.5, 1.5),
28+
hue_delta=18),
29+
dict(type='Normalize', **img_norm_cfg),
3030
dict(type='DefaultFormatBundle'),
3131
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
3232
]

configs/ssd/ssd512_coco.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,8 @@
2020
data_root = 'data/coco/'
2121
img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True)
2222
train_pipeline = [
23-
dict(type='LoadImageFromFile', to_float32=True),
23+
dict(type='LoadImageFromFile'),
2424
dict(type='LoadAnnotations', with_bbox=True),
25-
dict(
26-
type='PhotoMetricDistortion',
27-
brightness_delta=32,
28-
contrast_range=(0.5, 1.5),
29-
saturation_range=(0.5, 1.5),
30-
hue_delta=18),
3125
dict(
3226
type='Expand',
3327
mean=img_norm_cfg['mean'],
@@ -38,8 +32,14 @@
3832
min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),
3933
min_crop_size=0.3),
4034
dict(type='Resize', img_scale=(512, 512), keep_ratio=False),
41-
dict(type='Normalize', **img_norm_cfg),
4235
dict(type='RandomFlip', flip_ratio=0.5),
36+
dict(
37+
type='PhotoMetricDistortion',
38+
brightness_delta=32,
39+
contrast_range=(0.5, 1.5),
40+
saturation_range=(0.5, 1.5),
41+
hue_delta=18),
42+
dict(type='Normalize', **img_norm_cfg),
4343
dict(type='DefaultFormatBundle'),
4444
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
4545
]

configs/ssd/ssdlite_mobilenetv2_scratch_600e_coco.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -70,14 +70,8 @@
7070
img_norm_cfg = dict(
7171
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
7272
train_pipeline = [
73-
dict(type='LoadImageFromFile', to_float32=True),
73+
dict(type='LoadImageFromFile'),
7474
dict(type='LoadAnnotations', with_bbox=True),
75-
dict(
76-
type='PhotoMetricDistortion',
77-
brightness_delta=32,
78-
contrast_range=(0.5, 1.5),
79-
saturation_range=(0.5, 1.5),
80-
hue_delta=18),
8175
dict(
8276
type='Expand',
8377
mean=img_norm_cfg['mean'],
@@ -88,8 +82,14 @@
8882
min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),
8983
min_crop_size=0.3),
9084
dict(type='Resize', img_scale=(320, 320), keep_ratio=False),
91-
dict(type='Normalize', **img_norm_cfg),
9285
dict(type='RandomFlip', flip_ratio=0.5),
86+
dict(
87+
type='PhotoMetricDistortion',
88+
brightness_delta=32,
89+
contrast_range=(0.5, 1.5),
90+
saturation_range=(0.5, 1.5),
91+
hue_delta=18),
92+
dict(type='Normalize', **img_norm_cfg),
9393
dict(type='Pad', size_divisor=320),
9494
dict(type='DefaultFormatBundle'),
9595
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),

configs/yolact/yolact_r50_1x8_coco.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -90,15 +90,9 @@
9090
img_norm_cfg = dict(
9191
mean=[123.68, 116.78, 103.94], std=[58.40, 57.12, 57.38], to_rgb=True)
9292
train_pipeline = [
93-
dict(type='LoadImageFromFile', to_float32=True),
93+
dict(type='LoadImageFromFile'),
9494
dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
9595
dict(type='FilterAnnotations', min_gt_bbox_wh=(4.0, 4.0)),
96-
dict(
97-
type='PhotoMetricDistortion',
98-
brightness_delta=32,
99-
contrast_range=(0.5, 1.5),
100-
saturation_range=(0.5, 1.5),
101-
hue_delta=18),
10296
dict(
10397
type='Expand',
10498
mean=img_norm_cfg['mean'],
@@ -109,8 +103,14 @@
109103
min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),
110104
min_crop_size=0.3),
111105
dict(type='Resize', img_scale=(img_size, img_size), keep_ratio=False),
112-
dict(type='Normalize', **img_norm_cfg),
113106
dict(type='RandomFlip', flip_ratio=0.5),
107+
dict(
108+
type='PhotoMetricDistortion',
109+
brightness_delta=32,
110+
contrast_range=(0.5, 1.5),
111+
saturation_range=(0.5, 1.5),
112+
hue_delta=18),
113+
dict(type='Normalize', **img_norm_cfg),
114114
dict(type='DefaultFormatBundle'),
115115
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
116116
]

configs/yolo/yolov3_d53_320_273e_coco.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,8 @@
22
# dataset settings
33
img_norm_cfg = dict(mean=[0, 0, 0], std=[255., 255., 255.], to_rgb=True)
44
train_pipeline = [
5-
dict(type='LoadImageFromFile', to_float32=True),
5+
dict(type='LoadImageFromFile'),
66
dict(type='LoadAnnotations', with_bbox=True),
7-
dict(type='PhotoMetricDistortion'),
87
dict(
98
type='Expand',
109
mean=img_norm_cfg['mean'],
@@ -16,6 +15,7 @@
1615
min_crop_size=0.3),
1716
dict(type='Resize', img_scale=(320, 320), keep_ratio=True),
1817
dict(type='RandomFlip', flip_ratio=0.5),
18+
dict(type='PhotoMetricDistortion'),
1919
dict(type='Normalize', **img_norm_cfg),
2020
dict(type='Pad', size_divisor=32),
2121
dict(type='DefaultFormatBundle'),

configs/yolo/yolov3_d53_mstrain-416_273e_coco.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,8 @@
22
# dataset settings
33
img_norm_cfg = dict(mean=[0, 0, 0], std=[255., 255., 255.], to_rgb=True)
44
train_pipeline = [
5-
dict(type='LoadImageFromFile', to_float32=True),
5+
dict(type='LoadImageFromFile'),
66
dict(type='LoadAnnotations', with_bbox=True),
7-
dict(type='PhotoMetricDistortion'),
87
dict(
98
type='Expand',
109
mean=img_norm_cfg['mean'],
@@ -16,6 +15,7 @@
1615
min_crop_size=0.3),
1716
dict(type='Resize', img_scale=[(320, 320), (416, 416)], keep_ratio=True),
1817
dict(type='RandomFlip', flip_ratio=0.5),
18+
dict(type='PhotoMetricDistortion'),
1919
dict(type='Normalize', **img_norm_cfg),
2020
dict(type='Pad', size_divisor=32),
2121
dict(type='DefaultFormatBundle'),

configs/yolo/yolov3_d53_mstrain-608_273e_coco.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,6 @@
6262
train_pipeline = [
6363
dict(type='LoadImageFromFile', to_float32=True),
6464
dict(type='LoadAnnotations', with_bbox=True),
65-
dict(type='PhotoMetricDistortion'),
6665
dict(
6766
type='Expand',
6867
mean=img_norm_cfg['mean'],
@@ -74,6 +73,7 @@
7473
min_crop_size=0.3),
7574
dict(type='Resize', img_scale=[(320, 320), (608, 608)], keep_ratio=True),
7675
dict(type='RandomFlip', flip_ratio=0.5),
76+
dict(type='PhotoMetricDistortion'),
7777
dict(type='Normalize', **img_norm_cfg),
7878
dict(type='Pad', size_divisor=32),
7979
dict(type='DefaultFormatBundle'),

configs/yolo/yolov3_mobilenetv2_320_300e_coco.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,8 @@
1313
img_norm_cfg = dict(
1414
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
1515
train_pipeline = [
16-
dict(type='LoadImageFromFile', to_float32=True),
16+
dict(type='LoadImageFromFile'),
1717
dict(type='LoadAnnotations', with_bbox=True),
18-
dict(type='PhotoMetricDistortion'),
1918
dict(
2019
type='Expand',
2120
mean=img_norm_cfg['mean'],
@@ -27,6 +26,7 @@
2726
min_crop_size=0.3),
2827
dict(type='Resize', img_scale=(320, 320), keep_ratio=True),
2928
dict(type='RandomFlip', flip_ratio=0.5),
29+
dict(type='PhotoMetricDistortion'),
3030
dict(type='Normalize', **img_norm_cfg),
3131
dict(type='Pad', size_divisor=32),
3232
dict(type='DefaultFormatBundle'),

configs/yolo/yolov3_mobilenetv2_mstrain-416_300e_coco.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,8 @@
6262
img_norm_cfg = dict(
6363
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
6464
train_pipeline = [
65-
dict(type='LoadImageFromFile', to_float32=True),
65+
dict(type='LoadImageFromFile'),
6666
dict(type='LoadAnnotations', with_bbox=True),
67-
dict(type='PhotoMetricDistortion'),
6867
dict(
6968
type='Expand',
7069
mean=img_norm_cfg['mean'],
@@ -80,6 +79,7 @@
8079
multiscale_mode='range',
8180
keep_ratio=True),
8281
dict(type='RandomFlip', flip_ratio=0.5),
82+
dict(type='PhotoMetricDistortion'),
8383
dict(type='Normalize', **img_norm_cfg),
8484
dict(type='Pad', size_divisor=32),
8585
dict(type='DefaultFormatBundle'),

mmdet/datasets/pipelines/transforms.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -980,10 +980,7 @@ def __call__(self, results):
980980
assert results['img_fields'] == ['img'], \
981981
'Only single img_fields is allowed'
982982
img = results['img']
983-
assert img.dtype == np.float32, \
984-
'PhotoMetricDistortion needs the input image of dtype ' \
985-
'np.float32, please set "to_float32=True" in ' \
986-
'"LoadImageFromFile" pipeline'
983+
img = img.astype(np.float32)
987984
# random brightness
988985
if random.randint(2):
989986
delta = random.uniform(-self.brightness_delta,

tests/test_data/test_pipelines/test_transform/test_transform.py

+27
Original file line numberDiff line numberDiff line change
@@ -965,3 +965,30 @@ def test_mixup():
965965
assert results['gt_labels'].dtype == np.int64
966966
assert results['gt_bboxes'].dtype == np.float32
967967
assert results['gt_bboxes_ignore'].dtype == np.float32
968+
969+
970+
def test_photo_metric_distortion():
971+
img = mmcv.imread(
972+
osp.join(osp.dirname(__file__), '../../../data/color.jpg'), 'color')
973+
transform = dict(type='PhotoMetricDistortion')
974+
distortion_module = build_from_cfg(transform, PIPELINES)
975+
976+
# test assertion for invalid img_fields
977+
with pytest.raises(AssertionError):
978+
results = dict()
979+
results['img'] = img
980+
results['img2'] = img
981+
results['img_fields'] = ['img', 'img2']
982+
distortion_module(results)
983+
984+
# test uint8 input
985+
results = dict()
986+
results['img'] = img
987+
results = distortion_module(results)
988+
assert results['img'].dtype == np.float32
989+
990+
# test float32 input
991+
results = dict()
992+
results['img'] = img.astype(np.float32)
993+
results = distortion_module(results)
994+
assert results['img'].dtype == np.float32

0 commit comments

Comments
 (0)