Add pafpn (open-mmlab#2392)

yhcao6 · web-flow · commit 55d3b8cd0b59 · 2020-04-11T16:29:15.000+08:00
* add PAFPN

* add doc

* rename cfg, inherit from fpn

* reformat doc string

* standard doc string

* fix doc of fpn

* rename lateral_dconv to downsample_convs
diff --git a/configs/pafpn/faster_rcnn_r50_pafpn_1x_coco.py b/configs/pafpn/faster_rcnn_r50_pafpn_1x_coco.py
@@ -0,0 +1,8 @@
+_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'
+
+model = dict(
+    neck=dict(
+        type='PAFPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        num_outs=5))
diff --git a/mmdet/models/necks/__init__.py b/mmdet/models/necks/__init__.py
@@ -3,5 +3,6 @@
 from .fpn_carafe import FPN_CARAFE
 from .hrfpn import HRFPN
 from .nas_fpn import NASFPN
+from .pafpn import PAFPN
 
-__all__ = ['FPN', 'BFP', 'HRFPN', 'NASFPN', 'FPN_CARAFE']
+__all__ = ['FPN', 'BFP', 'HRFPN', 'NASFPN', 'FPN_CARAFE', 'PAFPN']
diff --git a/mmdet/models/necks/fpn.py b/mmdet/models/necks/fpn.py
@@ -16,20 +16,25 @@ class FPN(nn.Module):
     Detection (https://arxiv.org/abs/1612.03144)
 
     Args:
-        in_channels (List[int]):
-            number of input channels per scale
-
-        out_channels (int):
-            number of output channels (used at each scale)
-
-        num_outs (int):
-            number of output scales
-
-        start_level (int):
-            index of the first input scale to use as an output scale
-
-        end_level (int, default=-1):
-            index of the last input scale to use as an output scale
+        in_channels (List[int]): Number of input channels per scale.
+        out_channels (int): Number of output channels (used at each scale)
+        num_outs (int): Number of output scales.
+        start_level (int): Index of the start input backbone level used to
+            build the feature pyramid. Default: 0.
+        end_level (int): Index of the end input backbone level (exclusive) to
+            build the feature pyramid. Default: -1, which means the last level.
+        add_extra_convs (bool): Whether to add conv layers on top of the
+            original feature maps. Default: False.
+        extra_convs_on_inputs (bool): Whether to apply extra conv on
+            the original feature from the backbone. Default: False.
+        relu_before_extra_convs (bool): Whether to apply relu before the extra
+            conv. Default: False.
+        no_norm_on_lateral (bool): Whether to apply norm on lateral.
+            Default: False.
+        conv_cfg (dict): Config dict for convolution layer. Default: None.
+        norm_cfg (dict): Config dict for normalization layer. Default: None.
+        act_cfg (str): Config dict for activation layer in ConvModule.
+            Default: None.
 
     Example:
         >>> import torch
diff --git a/mmdet/models/necks/pafpn.py b/mmdet/models/necks/pafpn.py
@@ -0,0 +1,136 @@
+import torch.nn as nn
+import torch.nn.functional as F
+
+from mmdet.core import auto_fp16
+from mmdet.ops import ConvModule
+from ..registry import NECKS
+from .fpn import FPN
+
+
+@NECKS.register_module
+class PAFPN(FPN):
+    """Path Aggregation Network for Instance Segmentation.
+
+    This is an implementation of the PAFPN in Path Aggregation Network
+    (https://arxiv.org/abs/1803.01534).
+
+    Args:
+        in_channels (List[int]): Number of input channels per scale.
+        out_channels (int): Number of output channels (used at each scale)
+        num_outs (int): Number of output scales.
+        start_level (int): Index of the start input backbone level used to
+            build the feature pyramid. Default: 0.
+        end_level (int): Index of the end input backbone level (exclusive) to
+            build the feature pyramid. Default: -1, which means the last level.
+        add_extra_convs (bool): Whether to add conv layers on top of the
+            original feature maps. Default: False.
+        extra_convs_on_inputs (bool): Whether to apply extra conv on
+            the original feature from the backbone. Default: False.
+        relu_before_extra_convs (bool): Whether to apply relu before the extra
+            conv. Default: False.
+        no_norm_on_lateral (bool): Whether to apply norm on lateral.
+            Default: False.
+        conv_cfg (dict): Config dict for convolution layer. Default: None.
+        norm_cfg (dict): Config dict for normalization layer. Default: None.
+        act_cfg (str): Config dict for activation layer in ConvModule.
+            Default: None.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 num_outs,
+                 start_level=0,
+                 end_level=-1,
+                 add_extra_convs=False,
+                 extra_convs_on_inputs=True,
+                 relu_before_extra_convs=False,
+                 no_norm_on_lateral=False,
+                 conv_cfg=None,
+                 norm_cfg=None,
+                 act_cfg=None):
+        super(PAFPN,
+              self).__init__(in_channels, out_channels, num_outs, start_level,
+                             end_level, add_extra_convs, extra_convs_on_inputs,
+                             relu_before_extra_convs, no_norm_on_lateral,
+                             conv_cfg, norm_cfg, act_cfg)
+        # add extra bottom up pathway
+        self.downsample_convs = nn.ModuleList()
+        self.pafpn_convs = nn.ModuleList()
+        for i in range(self.start_level + 1, self.backbone_end_level):
+            d_conv = ConvModule(
+                out_channels,
+                out_channels,
+                3,
+                stride=2,
+                padding=1,
+                conv_cfg=conv_cfg,
+                norm_cfg=norm_cfg,
+                act_cfg=act_cfg,
+                inplace=False)
+            pafpn_conv = ConvModule(
+                out_channels,
+                out_channels,
+                3,
+                padding=1,
+                conv_cfg=conv_cfg,
+                norm_cfg=norm_cfg,
+                act_cfg=act_cfg,
+                inplace=False)
+            self.downsample_convs.append(d_conv)
+            self.pafpn_convs.append(pafpn_conv)
+
+    @auto_fp16()
+    def forward(self, inputs):
+        assert len(inputs) == len(self.in_channels)
+
+        # build laterals
+        laterals = [
+            lateral_conv(inputs[i + self.start_level])
+            for i, lateral_conv in enumerate(self.lateral_convs)
+        ]
+
+        # build top-down path
+        used_backbone_levels = len(laterals)
+        for i in range(used_backbone_levels - 1, 0, -1):
+            prev_shape = laterals[i - 1].shape[2:]
+            laterals[i - 1] += F.interpolate(
+                laterals[i], size=prev_shape, mode='nearest')
+
+        # build outputs
+        # part 1: from original levels
+        inter_outs = [
+            self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels)
+        ]
+
+        # part 2: add bottom-up path
+        for i in range(0, used_backbone_levels - 1):
+            inter_outs[i + 1] += self.downsample_convs[i](inter_outs[i])
+
+        outs = []
+        outs.append(inter_outs[0])
+        outs.extend([
+            self.pafpn_convs[i - 1](inter_outs[i])
+            for i in range(1, used_backbone_levels)
+        ])
+
+        # part 3: add extra levels
+        if self.num_outs > len(outs):
+            # use max pool to get more levels on top of outputs
+            # (e.g., Faster R-CNN, Mask R-CNN)
+            if not self.add_extra_convs:
+                for i in range(self.num_outs - used_backbone_levels):
+                    outs.append(F.max_pool2d(outs[-1], 1, stride=2))
+            # add conv layers on top of original feature maps (RetinaNet)
+            else:
+                if self.extra_convs_on_inputs:
+                    orig = inputs[self.backbone_end_level - 1]
+                    outs.append(self.fpn_convs[used_backbone_levels](orig))
+                else:
+                    outs.append(self.fpn_convs[used_backbone_levels](outs[-1]))
+                for i in range(used_backbone_levels + 1, self.num_outs):
+                    if self.relu_before_extra_convs:
+                        outs.append(self.fpn_convs[i](F.relu(outs[-1])))
+                    else:
+                        outs.append(self.fpn_convs[i](outs[-1]))
+        return tuple(outs)