Skip to content

Commit a244af7

Browse files
authored
Add BBox Coder (open-mmlab#2480)
* add coder registry * add coder for single stage * rename to bbox_coder * add doc string * add doc string * change default
1 parent 127a5d2 commit a244af7

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+494
-294
lines changed

configs/_base_/models/cascade_mask_rcnn_r50_fpn.py

+16-8
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,10 @@
2323
anchor_scales=[8],
2424
anchor_ratios=[0.5, 1.0, 2.0],
2525
anchor_strides=[4, 8, 16, 32, 64],
26-
target_means=[.0, .0, .0, .0],
27-
target_stds=[1.0, 1.0, 1.0, 1.0],
26+
bbox_coder=dict(
27+
type='DeltaXYWHBBoxCoder',
28+
target_means=[.0, .0, .0, .0],
29+
target_stds=[1.0, 1.0, 1.0, 1.0]),
2830
loss_cls=dict(
2931
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
3032
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
@@ -44,8 +46,10 @@
4446
fc_out_channels=1024,
4547
roi_feat_size=7,
4648
num_classes=80,
47-
target_means=[0., 0., 0., 0.],
48-
target_stds=[0.1, 0.1, 0.2, 0.2],
49+
bbox_coder=dict(
50+
type='DeltaXYWHBBoxCoder',
51+
target_means=[0., 0., 0., 0.],
52+
target_stds=[0.1, 0.1, 0.2, 0.2]),
4953
reg_class_agnostic=True,
5054
loss_cls=dict(
5155
type='CrossEntropyLoss',
@@ -59,8 +63,10 @@
5963
fc_out_channels=1024,
6064
roi_feat_size=7,
6165
num_classes=80,
62-
target_means=[0., 0., 0., 0.],
63-
target_stds=[0.05, 0.05, 0.1, 0.1],
66+
bbox_coder=dict(
67+
type='DeltaXYWHBBoxCoder',
68+
target_means=[0., 0., 0., 0.],
69+
target_stds=[0.05, 0.05, 0.1, 0.1]),
6470
reg_class_agnostic=True,
6571
loss_cls=dict(
6672
type='CrossEntropyLoss',
@@ -74,8 +80,10 @@
7480
fc_out_channels=1024,
7581
roi_feat_size=7,
7682
num_classes=80,
77-
target_means=[0., 0., 0., 0.],
78-
target_stds=[0.033, 0.033, 0.067, 0.067],
83+
bbox_coder=dict(
84+
type='DeltaXYWHBBoxCoder',
85+
target_means=[0., 0., 0., 0.],
86+
target_stds=[0.033, 0.033, 0.067, 0.067]),
7987
reg_class_agnostic=True,
8088
loss_cls=dict(
8189
type='CrossEntropyLoss',

configs/_base_/models/cascade_rcnn_r50_fpn.py

+16-8
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,10 @@
2323
anchor_scales=[8],
2424
anchor_ratios=[0.5, 1.0, 2.0],
2525
anchor_strides=[4, 8, 16, 32, 64],
26-
target_means=[.0, .0, .0, .0],
27-
target_stds=[1.0, 1.0, 1.0, 1.0],
26+
bbox_coder=dict(
27+
type='DeltaXYWHBBoxCoder',
28+
target_means=[.0, .0, .0, .0],
29+
target_stds=[1.0, 1.0, 1.0, 1.0]),
2830
loss_cls=dict(
2931
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
3032
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
@@ -44,8 +46,10 @@
4446
fc_out_channels=1024,
4547
roi_feat_size=7,
4648
num_classes=80,
47-
target_means=[0., 0., 0., 0.],
48-
target_stds=[0.1, 0.1, 0.2, 0.2],
49+
bbox_coder=dict(
50+
type='DeltaXYWHBBoxCoder',
51+
target_means=[0., 0., 0., 0.],
52+
target_stds=[0.1, 0.1, 0.2, 0.2]),
4953
reg_class_agnostic=True,
5054
loss_cls=dict(
5155
type='CrossEntropyLoss',
@@ -59,8 +63,10 @@
5963
fc_out_channels=1024,
6064
roi_feat_size=7,
6165
num_classes=80,
62-
target_means=[0., 0., 0., 0.],
63-
target_stds=[0.05, 0.05, 0.1, 0.1],
66+
bbox_coder=dict(
67+
type='DeltaXYWHBBoxCoder',
68+
target_means=[0., 0., 0., 0.],
69+
target_stds=[0.05, 0.05, 0.1, 0.1]),
6470
reg_class_agnostic=True,
6571
loss_cls=dict(
6672
type='CrossEntropyLoss',
@@ -74,8 +80,10 @@
7480
fc_out_channels=1024,
7581
roi_feat_size=7,
7682
num_classes=80,
77-
target_means=[0., 0., 0., 0.],
78-
target_stds=[0.033, 0.033, 0.067, 0.067],
83+
bbox_coder=dict(
84+
type='DeltaXYWHBBoxCoder',
85+
target_means=[0., 0., 0., 0.],
86+
target_stds=[0.033, 0.033, 0.067, 0.067]),
7987
reg_class_agnostic=True,
8088
loss_cls=dict(
8189
type='CrossEntropyLoss',

configs/_base_/models/fast_rcnn_r50_fpn.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,10 @@
2929
fc_out_channels=1024,
3030
roi_feat_size=7,
3131
num_classes=80,
32-
target_means=[0., 0., 0., 0.],
33-
target_stds=[0.1, 0.1, 0.2, 0.2],
32+
bbox_coder=dict(
33+
type='DeltaXYWHBBoxCoder',
34+
target_means=[0., 0., 0., 0.],
35+
target_stds=[0.1, 0.1, 0.2, 0.2]),
3436
reg_class_agnostic=False,
3537
loss_cls=dict(
3638
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),

configs/_base_/models/faster_rcnn_r50_caffe_c4.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,10 @@
2121
anchor_scales=[2, 4, 8, 16, 32],
2222
anchor_ratios=[0.5, 1.0, 2.0],
2323
anchor_strides=[16],
24-
target_means=[.0, .0, .0, .0],
25-
target_stds=[1.0, 1.0, 1.0, 1.0],
24+
bbox_coder=dict(
25+
type='DeltaXYWHBBoxCoder',
26+
target_means=[.0, .0, .0, .0],
27+
target_stds=[1.0, 1.0, 1.0, 1.0]),
2628
loss_cls=dict(
2729
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
2830
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
@@ -48,8 +50,10 @@
4850
roi_feat_size=7,
4951
in_channels=2048,
5052
num_classes=80,
51-
target_means=[0., 0., 0., 0.],
52-
target_stds=[0.1, 0.1, 0.2, 0.2],
53+
bbox_coder=dict(
54+
type='DeltaXYWHBBoxCoder',
55+
target_means=[0., 0., 0., 0.],
56+
target_stds=[0.1, 0.1, 0.2, 0.2]),
5357
reg_class_agnostic=False,
5458
loss_cls=dict(
5559
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),

configs/_base_/models/faster_rcnn_r50_fpn.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,10 @@
2222
anchor_scales=[8],
2323
anchor_ratios=[0.5, 1.0, 2.0],
2424
anchor_strides=[4, 8, 16, 32, 64],
25-
target_means=[.0, .0, .0, .0],
26-
target_stds=[1.0, 1.0, 1.0, 1.0],
25+
bbox_coder=dict(
26+
type='DeltaXYWHBBoxCoder',
27+
target_means=[.0, .0, .0, .0],
28+
target_stds=[1.0, 1.0, 1.0, 1.0]),
2729
loss_cls=dict(
2830
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
2931
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
@@ -40,8 +42,10 @@
4042
fc_out_channels=1024,
4143
roi_feat_size=7,
4244
num_classes=80,
43-
target_means=[0., 0., 0., 0.],
44-
target_stds=[0.1, 0.1, 0.2, 0.2],
45+
bbox_coder=dict(
46+
type='DeltaXYWHBBoxCoder',
47+
target_means=[0., 0., 0., 0.],
48+
target_stds=[0.1, 0.1, 0.2, 0.2]),
4549
reg_class_agnostic=False,
4650
loss_cls=dict(
4751
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),

configs/_base_/models/mask_rcnn_r50_caffe_c4.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,10 @@
2121
anchor_scales=[2, 4, 8, 16, 32],
2222
anchor_ratios=[0.5, 1.0, 2.0],
2323
anchor_strides=[16],
24-
target_means=[.0, .0, .0, .0],
25-
target_stds=[1.0, 1.0, 1.0, 1.0],
24+
bbox_coder=dict(
25+
type='DeltaXYWHBBoxCoder',
26+
target_means=[.0, .0, .0, .0],
27+
target_stds=[1.0, 1.0, 1.0, 1.0]),
2628
loss_cls=dict(
2729
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
2830
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
@@ -48,8 +50,10 @@
4850
roi_feat_size=7,
4951
in_channels=2048,
5052
num_classes=80,
51-
target_means=[0., 0., 0., 0.],
52-
target_stds=[0.1, 0.1, 0.2, 0.2],
53+
bbox_coder=dict(
54+
type='DeltaXYWHBBoxCoder',
55+
target_means=[0., 0., 0., 0.],
56+
target_stds=[0.1, 0.1, 0.2, 0.2]),
5357
reg_class_agnostic=False,
5458
loss_cls=dict(
5559
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),

configs/_base_/models/mask_rcnn_r50_fpn.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,10 @@
2323
anchor_scales=[8],
2424
anchor_ratios=[0.5, 1.0, 2.0],
2525
anchor_strides=[4, 8, 16, 32, 64],
26-
target_means=[.0, .0, .0, .0],
27-
target_stds=[1.0, 1.0, 1.0, 1.0],
26+
bbox_coder=dict(
27+
type='DeltaXYWHBBoxCoder',
28+
target_means=[.0, .0, .0, .0],
29+
target_stds=[1.0, 1.0, 1.0, 1.0]),
2830
loss_cls=dict(
2931
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
3032
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
@@ -41,8 +43,10 @@
4143
fc_out_channels=1024,
4244
roi_feat_size=7,
4345
num_classes=80,
44-
target_means=[0., 0., 0., 0.],
45-
target_stds=[0.1, 0.1, 0.2, 0.2],
46+
bbox_coder=dict(
47+
type='DeltaXYWHBBoxCoder',
48+
target_means=[0., 0., 0., 0.],
49+
target_stds=[0.1, 0.1, 0.2, 0.2]),
4650
reg_class_agnostic=False,
4751
loss_cls=dict(
4852
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),

configs/_base_/models/retinanet_r50_fpn.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,10 @@
2828
scales_per_octave=3,
2929
anchor_ratios=[0.5, 1.0, 2.0],
3030
anchor_strides=[8, 16, 32, 64, 128],
31-
target_means=[.0, .0, .0, .0],
32-
target_stds=[1.0, 1.0, 1.0, 1.0],
31+
bbox_coder=dict(
32+
type='DeltaXYWHBBoxCoder',
33+
target_means=[.0, .0, .0, .0],
34+
target_stds=[1.0, 1.0, 1.0, 1.0]),
3335
loss_cls=dict(
3436
type='FocalLoss',
3537
use_sigmoid=True,

configs/_base_/models/rpn_r50_caffe_c4.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,10 @@
2121
anchor_scales=[2, 4, 8, 16, 32],
2222
anchor_ratios=[0.5, 1.0, 2.0],
2323
anchor_strides=[16],
24-
target_means=[.0, .0, .0, .0],
25-
target_stds=[1.0, 1.0, 1.0, 1.0],
24+
bbox_coder=dict(
25+
type='DeltaXYWHBBoxCoder',
26+
target_means=[.0, .0, .0, .0],
27+
target_stds=[1.0, 1.0, 1.0, 1.0]),
2628
loss_cls=dict(
2729
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
2830
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)))

configs/_base_/models/rpn_r50_fpn.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,10 @@
2323
anchor_scales=[8],
2424
anchor_ratios=[0.5, 1.0, 2.0],
2525
anchor_strides=[4, 8, 16, 32, 64],
26-
target_means=[.0, .0, .0, .0],
27-
target_stds=[1.0, 1.0, 1.0, 1.0],
26+
bbox_coder=dict(
27+
type='DeltaXYWHBBoxCoder',
28+
target_means=[.0, .0, .0, .0],
29+
target_stds=[1.0, 1.0, 1.0, 1.0]),
2830
loss_cls=dict(
2931
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
3032
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)))

configs/_base_/models/ssd300.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,10 @@
2121
anchor_strides=(8, 16, 32, 64, 100, 300),
2222
basesize_ratio_range=(0.15, 0.9),
2323
anchor_ratios=([2], [2, 3], [2, 3], [2, 3], [2], [2]),
24-
target_means=(.0, .0, .0, .0),
25-
target_stds=(0.1, 0.1, 0.2, 0.2)))
24+
bbox_coder=dict(
25+
type='DeltaXYWHBBoxCoder',
26+
target_means=[.0, .0, .0, .0],
27+
target_stds=[0.1, 0.1, 0.2, 0.2])))
2628
cudnn_benchmark = True
2729
train_cfg = dict(
2830
assigner=dict(

configs/atss/atss_r50_fpn.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,10 @@
2828
scales_per_octave=1,
2929
anchor_ratios=[1.0],
3030
anchor_strides=[8, 16, 32, 64, 128],
31-
target_means=[.0, .0, .0, .0],
32-
target_stds=[0.1, 0.1, 0.2, 0.2],
31+
bbox_coder=dict(
32+
type='DeltaXYWHBBoxCoder',
33+
target_means=[.0, .0, .0, .0],
34+
target_stds=[0.1, 0.1, 0.2, 0.2]),
3335
loss_cls=dict(
3436
type='FocalLoss',
3537
use_sigmoid=True,

configs/cityscapes/faster_rcnn_r50_fpn_1x_cityscapes.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,10 @@
1212
fc_out_channels=1024,
1313
roi_feat_size=7,
1414
num_classes=8,
15-
target_means=[0., 0., 0., 0.],
16-
target_stds=[0.1, 0.1, 0.2, 0.2],
15+
bbox_coder=dict(
16+
type='DeltaXYWHBBoxCoder',
17+
target_means=[0., 0., 0., 0.],
18+
target_stds=[0.1, 0.1, 0.2, 0.2]),
1719
reg_class_agnostic=False,
1820
loss_cls=dict(
1921
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),

configs/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,10 @@
1111
fc_out_channels=1024,
1212
roi_feat_size=7,
1313
num_classes=8,
14-
target_means=[0., 0., 0., 0.],
15-
target_stds=[0.1, 0.1, 0.2, 0.2],
14+
bbox_coder=dict(
15+
type='DeltaXYWHBBoxCoder',
16+
target_means=[0., 0., 0., 0.],
17+
target_stds=[0.1, 0.1, 0.2, 0.2]),
1618
reg_class_agnostic=False,
1719
loss_cls=dict(
1820
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),

configs/double_heads/dh_faster_rcnn_r50_fpn_1x_coco.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,10 @@
1313
fc_out_channels=1024,
1414
roi_feat_size=7,
1515
num_classes=80,
16-
target_means=[0., 0., 0., 0.],
17-
target_stds=[0.1, 0.1, 0.2, 0.2],
16+
bbox_coder=dict(
17+
type='DeltaXYWHBBoxCoder',
18+
target_means=[0., 0., 0., 0.],
19+
target_stds=[0.1, 0.1, 0.2, 0.2]),
1820
reg_class_agnostic=False,
1921
loss_cls=dict(
2022
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=2.0),

configs/free_anchor/retinanet_free_anchor_r50_fpn_1x_coco.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
scales_per_octave=3,
1212
anchor_ratios=[0.5, 1.0, 2.0],
1313
anchor_strides=[8, 16, 32, 64, 128],
14-
target_means=[.0, .0, .0, .0],
15-
target_stds=[0.1, 0.1, 0.2, 0.2],
14+
bbox_coder=dict(
15+
type='DeltaXYWHBBoxCoder',
16+
target_means=[.0, .0, .0, .0],
17+
target_stds=[0.1, 0.1, 0.2, 0.2]),
1618
loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=0.75)))

configs/grid_rcnn/grid_rcnn_r50_fpn_gn-head_2x_coco.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,10 @@
2626
anchor_scales=[8],
2727
anchor_ratios=[0.5, 1.0, 2.0],
2828
anchor_strides=[4, 8, 16, 32, 64],
29-
target_means=[.0, .0, .0, .0],
30-
target_stds=[1.0, 1.0, 1.0, 1.0],
29+
bbox_coder=dict(
30+
type='DeltaXYWHBBoxCoder',
31+
target_means=[.0, .0, .0, .0],
32+
target_stds=[1.0, 1.0, 1.0, 1.0]),
3133
loss_cls=dict(
3234
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
3335
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
@@ -45,8 +47,10 @@
4547
fc_out_channels=1024,
4648
roi_feat_size=7,
4749
num_classes=80,
48-
target_means=[0., 0., 0., 0.],
49-
target_stds=[0.1, 0.1, 0.2, 0.2],
50+
bbox_coder=dict(
51+
type='DeltaXYWHBBoxCoder',
52+
target_means=[0., 0., 0., 0.],
53+
target_stds=[0.1, 0.1, 0.2, 0.2]),
5054
reg_class_agnostic=False),
5155
grid_roi_extractor=dict(
5256
type='SingleRoIExtractor',

configs/guided_anchoring/ga_fast_r50_caffe_fpn_1x_coco.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@
1010
norm_cfg=dict(type='BN', requires_grad=False),
1111
norm_eval=True,
1212
style='caffe'),
13-
roi_head=dict(bbox_head=dict(target_stds=[0.05, 0.05, 0.1, 0.1])))
13+
roi_head=dict(
14+
bbox_head=dict(bbox_coder=dict(target_stds=[0.05, 0.05, 0.1, 0.1]))))
1415
# model training and testing settings
1516
train_cfg = dict(
1617
rcnn=dict(

0 commit comments

Comments
 (0)