[Feature] Add Upernet (PaddlePaddle#2175)

justld · web-flow · commit e2b89e4d26db · 2022-06-02T15:12:26.000+08:00
diff --git a/configs/upernet/README.md b/configs/upernet/README.md
@@ -0,0 +1,14 @@
+# Unified Perceptual Parsing for SceneUnderstanding
+
+
+## Reference
+
+> Tete Xiao, Yingcheng Liu, Bolei Zhou, Yuning Jiang, Jian Sun. "Unified Perceptual Parsing for Scene Understanding." Proceedings of the European Conference on Computer Vision (ECCV). 2018.
+
+## Performance
+
+### Cityscapes
+
+| Model | Backbone | Resolution | Training Iters | mIoU | mIoU (flip) | mIoU (ms+flip) | Links |
+|-|-|-|-|-|-|-|-|
+|UPerNet|ResNet101_OS8|512x1024|40000|79.58%|80.11%|80.41%|[model](https://bj.bcebos.com/paddleseg/dygraph/cityscapes/upernet_resnet101_os8_cityscapes_512x1024_40k/model.pdparams)\|[log](https://bj.bcebos.com/paddleseg/dygraph/cityscapes/upernet_resnet101_os8_cityscapes_512x1024_40k/train.log)\|[vdl](https://www.paddlepaddle.org.cn/paddle/visualdl/service/app/index?id=c635ae2e70e148796cd58fae5273c3d6)|
diff --git a/configs/upernet/upernet_resnet101_os8_cityscapes_512x1024_40k.yml b/configs/upernet/upernet_resnet101_os8_cityscapes_512x1024_40k.yml
@@ -0,0 +1,33 @@
+_base_: '../_base_/cityscapes.yml'
+
+batch_size: 2
+iters: 40000
+
+model:
+  type: UPerNet
+  backbone:
+    type: ResNet101_vd
+    output_stride: 8
+    pretrained: https://bj.bcebos.com/paddleseg/dygraph/resnet101_vd_ssld.tar.gz
+  backbone_indices: [0, 1, 2, 3]
+  channels: 512
+  dropout_prob: 0.1
+  enable_auxiliary_loss: True
+
+optimizer:
+  type: sgd
+  weight_decay: 0.0005
+
+loss:
+  types:
+    - type: CrossEntropyLoss
+  types:
+    - type: CrossEntropyLoss
+  coef: [1, 0.4]
+
+
+lr_scheduler:
+  type: PolynomialDecay
+  learning_rate: 0.01
+  end_lr: 0.0
+  power: 0.9
diff --git a/paddleseg/models/__init__.py b/paddleseg/models/__init__.py
@@ -60,3 +60,4 @@
 from .ddrnet import DDRNet_23
 from .ccnet import CCNet
 from .mobileseg import MobileSeg
+from .upernet import UPerNet
diff --git a/paddleseg/models/upernet.py b/paddleseg/models/upernet.py
@@ -0,0 +1,173 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from paddleseg import utils
+from paddleseg.cvlibs import manager
+from paddleseg.models import layers
+
+
+@manager.MODELS.add_component
+class UPerNet(nn.Layer):
+    """
+    The UPerNet implementation based on PaddlePaddle.
+
+    The original article refers to
+    Tete Xiao, et, al. "Unified Perceptual Parsing for Scene Understanding"
+    (https://arxiv.org/abs/1807.10221).
+
+    Args:
+        num_classes (int): The unique number of target classes.
+        backbone (Paddle.nn.Layer): Backbone network, currently support Resnet50/101.
+        backbone_indices (tuple): Four values in the tuple indicate the indices of output of backbone.
+        channels (int): The channels of inter layers. Default: 512.
+        enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. Default: False.
+        align_corners (bool, optional): An argument of F.interpolate. It should be set to False when the feature size is even,
+            e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False.
+        dropout_prob (float): Dropout ratio for upernet head. Default: 0.1.
+        pretrained (str, optional): The path or url of pretrained model. Default: None.
+    """
+
+    def __init__(self,
+                 num_classes,
+                 backbone,
+                 backbone_indices,
+                 channels=512,
+                 enable_auxiliary_loss=False,
+                 align_corners=False,
+                 dropout_prob=0.1,
+                 pretrained=None):
+        super().__init__()
+        self.backbone = backbone
+        self.backbone_indices = backbone_indices
+        self.in_channels = [
+            self.backbone.feat_channels[i] for i in backbone_indices
+        ]
+        self.align_corners = align_corners
+        self.pretrained = pretrained
+        self.enable_auxiliary_loss = enable_auxiliary_loss
+
+        fpn_inplanes = [
+            self.backbone.feat_channels[i] for i in backbone_indices
+        ]
+        self.head = UPerNetHead(
+            num_classes=num_classes,
+            fpn_inplanes=fpn_inplanes,
+            dropout_prob=dropout_prob,
+            channels=channels,
+            enable_auxiliary_loss=self.enable_auxiliary_loss)
+        self.init_weight()
+
+    def forward(self, x):
+        feats = self.backbone(x)
+        feats = [feats[i] for i in self.backbone_indices]
+        logit_list = self.head(feats)
+        logit_list = [
+            F.interpolate(
+                logit,
+                paddle.shape(x)[2:],
+                mode='bilinear',
+                align_corners=self.align_corners) for logit in logit_list
+        ]
+        return logit_list
+
+    def init_weight(self):
+        if self.pretrained is not None:
+            utils.load_entire_model(self, self.pretrained)
+
+
+class UPerNetHead(nn.Layer):
+    def __init__(self,
+                 num_classes,
+                 fpn_inplanes,
+                 channels,
+                 dropout_prob=0.1,
+                 enable_auxiliary_loss=False,
+                 align_corners=True):
+        super(UPerNetHead, self).__init__()
+        self.align_corners = align_corners
+        self.ppm = layers.PPModule(
+            in_channels=fpn_inplanes[-1],
+            out_channels=channels,
+            bin_sizes=(1, 2, 3, 6),
+            dim_reduction=True,
+            align_corners=True)
+        self.enable_auxiliary_loss = enable_auxiliary_loss
+        self.lateral_convs = nn.LayerList()
+        self.fpn_convs = nn.LayerList()
+
+        for fpn_inplane in fpn_inplanes[:-1]:
+            self.lateral_convs.append(
+                layers.ConvBNReLU(fpn_inplane, channels, 1))
+            self.fpn_convs.append(
+                layers.ConvBNReLU(
+                    channels, channels, 3, bias_attr=False))
+
+        if self.enable_auxiliary_loss:
+            self.aux_head = layers.AuxLayer(
+                fpn_inplanes[2],
+                fpn_inplanes[2],
+                num_classes,
+                dropout_prob=dropout_prob)
+
+        self.fpn_bottleneck = layers.ConvBNReLU(
+            len(fpn_inplanes) * channels, channels, 3, padding=1)
+
+        self.conv_last = nn.Sequential(
+            layers.ConvBNReLU(
+                len(fpn_inplanes) * channels, channels, 3, bias_attr=False),
+            nn.Conv2D(
+                channels, num_classes, kernel_size=1))
+        self.conv_seg = nn.Conv2D(channels, num_classes, kernel_size=1)
+
+    def forward(self, inputs):
+        laterals = []
+        for i, lateral_conv in enumerate(self.lateral_convs):
+            laterals.append(lateral_conv(inputs[i]))
+
+        laterals.append(self.ppm(inputs[-1]))
+        fpn_levels = len(laterals)
+        for i in range(fpn_levels - 1, 0, -1):
+            prev_shape = paddle.shape(laterals[i - 1])
+            laterals[i - 1] = laterals[i - 1] + F.interpolate(
+                laterals[i],
+                size=prev_shape[2:],
+                mode='bilinear',
+                align_corners=self.align_corners)
+
+        fpn_outs = []
+        for i in range(fpn_levels - 1):
+            fpn_outs.append(self.fpn_convs[i](laterals[i]))
+        fpn_outs.append(laterals[-1])
+
+        for i in range(fpn_levels - 1, 0, -1):
+            fpn_outs[i] = F.interpolate(
+                fpn_outs[i],
+                size=paddle.shape(fpn_outs[0])[2:],
+                mode='bilinear',
+                align_corners=self.align_corners)
+        fuse_out = paddle.concat(fpn_outs, axis=1)
+        x = self.fpn_bottleneck(fuse_out)
+
+        x = self.conv_seg(x)
+        logits_list = [x]
+        if self.enable_auxiliary_loss:
+            aux_out = self.aux_head(inputs[2])
+            logits_list.append(aux_out)
+            return logits_list
+        else:
+            return logits_list
diff --git a/test_tipc/README.md b/test_tipc/README.md
@@ -41,6 +41,7 @@
 | CCNet | CCNet | 支持 | - | - | - |
 | PP-LiteSeg | PP-LiteSeg(STDC-1) | 支持 | - | - | - |
 | PP-LiteSeg | PP-LiteSeg(STDC-2) | 支持 | - | - | - |
+| UPerNet | UPerNet | 支持 | - | - | - |
 
 ## 3. 测试工具简介
 ### 目录介绍
diff --git a/test_tipc/configs/upernet/train_infer_python.txt b/test_tipc/configs/upernet/train_infer_python.txt
@@ -0,0 +1,52 @@
+===========================train_params===========================
+model_name:upernet
+python:python3
+gpu_list:0|0,1
+Global.use_gpu:null|null
+--precision:null
+--iters:lite_train_lite_infer=20|lite_train_whole_infer=20|whole_train_whole_infer=1000
+--save_dir:
+--batch_size:lite_train_lite_infer=2|lite_train_whole_infer=2|whole_train_whole_infer=3
+--model_path:null
+train_model_name:best_model/model.pdparams
+train_infer_img_dir:test_tipc/data/cityscapes/cityscapes_val_5.list
+null:null
+##
+trainer:norm
+norm_train:train.py --config test_tipc/configs/upernet/upernet_resnet101_os8_cityscapes_512x1024_40k.yml --save_interval 500 --seed 100  --num_workers 8
+pact_train:null
+fpgm_train:null
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params===========================
+eval:val.py --config test_tipc/configs/upernet/upernet_resnet101_os8_cityscapes_512x1024_40k.yml --num_workers 8
+null:null
+##
+===========================export_params===========================
+--save_dir:
+--model_path:
+norm_export:export.py --config test_tipc/configs/upernet/upernet_resnet101_os8_cityscapes_512x1024_40k.yml
+quant_export:null
+fpgm_export:null
+distill_export:null
+export1:null
+export2:null
+===========================infer_params===========================
+infer_model:./test_tipc/output/upernet/model.pdparams
+infer_export:export.py --config test_tipc/configs/upernet/upernet_resnet101_os8_cityscapes_512x1024_40k.yml
+infer_quant:False
+inference:deploy/python/infer.py
+--device:cpu|gpu
+--enable_mkldnn:True|False
+--cpu_threads:6
+--batch_size:1
+--use_trt:False
+--precision:fp32
+--config:
+--image_path:./test_tipc/data/cityscapes/cityscapes_val_5.list
+--save_log_path:null
+--benchmark:True
+--save_dir:
+--model_name:upernet
diff --git a/test_tipc/configs/upernet/upernet_resnet101_os8_cityscapes_512x1024_40k.yml b/test_tipc/configs/upernet/upernet_resnet101_os8_cityscapes_512x1024_40k.yml
@@ -0,0 +1,32 @@
+_base_: '../_base_/cityscapes.yml'
+
+batch_size: 2
+iters: 40000
+
+model:
+  type: UPerNet
+  backbone:
+    type: ResNet101_vd
+    output_stride: 8
+    pretrained: https://bj.bcebos.com/paddleseg/dygraph/resnet101_vd_ssld.tar.gz
+  backbone_indices: [0, 1, 2, 3]
+  channels: 512
+  dropout_prob: 0.1
+  enable_auxiliary_loss: True
+
+optimizer:
+  type: sgd
+  weight_decay: 0.0005
+
+loss:
+  types:
+    - type: CrossEntropyLoss
+  types:
+    - type: CrossEntropyLoss
+  coef: [1, 0.4]
+
+lr_scheduler:
+  type: PolynomialDecay
+  learning_rate: 0.01
+  end_lr: 0.0
+  power: 0.9
diff --git a/test_tipc/prepare.sh b/test_tipc/prepare.sh
@@ -205,7 +205,7 @@ if [ ${MODE} = "cpp_infer" ];then
     bash build.sh
 else
     models=("enet" "bisenetv2" "ocrnet_hrnetw18" "ocrnet_hrnetw48" "deeplabv3p_resnet50_cityscapes" \
-            "fastscnn" "fcn_hrnetw18" "pp_liteseg_stdc1" "pp_liteseg_stdc2" "ccnet")
+            "fastscnn" "fcn_hrnetw18" "pp_liteseg_stdc1" "pp_liteseg_stdc2" "ccnet" "upernet")
     if [ $(contains "${models[@]}" "${model_name}") == "y" ]; then
         cp ./test_tipc/data/cityscapes_val_5.list ./test_tipc/data/cityscapes
     fi