|
| 1 | +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
| 2 | +# |
| 3 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +# you may not use this file except in compliance with the License. |
| 5 | +# You may obtain a copy of the License at |
| 6 | +# |
| 7 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +# |
| 9 | +# Unless required by applicable law or agreed to in writing, software |
| 10 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +# See the License for the specific language governing permissions and |
| 13 | +# limitations under the License. |
| 14 | + |
| 15 | +import paddle |
| 16 | +import paddle.nn as nn |
| 17 | +import paddle.nn.functional as F |
| 18 | + |
| 19 | +from paddleseg import utils |
| 20 | +from paddleseg.cvlibs import manager |
| 21 | +from paddleseg.models import layers |
| 22 | + |
| 23 | + |
| 24 | +@manager.MODELS.add_component |
| 25 | +class UPerNet(nn.Layer): |
| 26 | + """ |
| 27 | + The UPerNet implementation based on PaddlePaddle. |
| 28 | +
|
| 29 | + The original article refers to |
| 30 | + Tete Xiao, et, al. "Unified Perceptual Parsing for Scene Understanding" |
| 31 | + (https://arxiv.org/abs/1807.10221). |
| 32 | +
|
| 33 | + Args: |
| 34 | + num_classes (int): The unique number of target classes. |
| 35 | + backbone (Paddle.nn.Layer): Backbone network, currently support Resnet50/101. |
| 36 | + backbone_indices (tuple): Four values in the tuple indicate the indices of output of backbone. |
| 37 | + channels (int): The channels of inter layers. Default: 512. |
| 38 | + enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. Default: False. |
| 39 | + align_corners (bool, optional): An argument of F.interpolate. It should be set to False when the feature size is even, |
| 40 | + e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False. |
| 41 | + dropout_prob (float): Dropout ratio for upernet head. Default: 0.1. |
| 42 | + pretrained (str, optional): The path or url of pretrained model. Default: None. |
| 43 | + """ |
| 44 | + |
| 45 | + def __init__(self, |
| 46 | + num_classes, |
| 47 | + backbone, |
| 48 | + backbone_indices, |
| 49 | + channels=512, |
| 50 | + enable_auxiliary_loss=False, |
| 51 | + align_corners=False, |
| 52 | + dropout_prob=0.1, |
| 53 | + pretrained=None): |
| 54 | + super().__init__() |
| 55 | + self.backbone = backbone |
| 56 | + self.backbone_indices = backbone_indices |
| 57 | + self.in_channels = [ |
| 58 | + self.backbone.feat_channels[i] for i in backbone_indices |
| 59 | + ] |
| 60 | + self.align_corners = align_corners |
| 61 | + self.pretrained = pretrained |
| 62 | + self.enable_auxiliary_loss = enable_auxiliary_loss |
| 63 | + |
| 64 | + fpn_inplanes = [ |
| 65 | + self.backbone.feat_channels[i] for i in backbone_indices |
| 66 | + ] |
| 67 | + self.head = UPerNetHead( |
| 68 | + num_classes=num_classes, |
| 69 | + fpn_inplanes=fpn_inplanes, |
| 70 | + dropout_prob=dropout_prob, |
| 71 | + channels=channels, |
| 72 | + enable_auxiliary_loss=self.enable_auxiliary_loss) |
| 73 | + self.init_weight() |
| 74 | + |
| 75 | + def forward(self, x): |
| 76 | + feats = self.backbone(x) |
| 77 | + feats = [feats[i] for i in self.backbone_indices] |
| 78 | + logit_list = self.head(feats) |
| 79 | + logit_list = [ |
| 80 | + F.interpolate( |
| 81 | + logit, |
| 82 | + paddle.shape(x)[2:], |
| 83 | + mode='bilinear', |
| 84 | + align_corners=self.align_corners) for logit in logit_list |
| 85 | + ] |
| 86 | + return logit_list |
| 87 | + |
| 88 | + def init_weight(self): |
| 89 | + if self.pretrained is not None: |
| 90 | + utils.load_entire_model(self, self.pretrained) |
| 91 | + |
| 92 | + |
| 93 | +class UPerNetHead(nn.Layer): |
| 94 | + def __init__(self, |
| 95 | + num_classes, |
| 96 | + fpn_inplanes, |
| 97 | + channels, |
| 98 | + dropout_prob=0.1, |
| 99 | + enable_auxiliary_loss=False, |
| 100 | + align_corners=True): |
| 101 | + super(UPerNetHead, self).__init__() |
| 102 | + self.align_corners = align_corners |
| 103 | + self.ppm = layers.PPModule( |
| 104 | + in_channels=fpn_inplanes[-1], |
| 105 | + out_channels=channels, |
| 106 | + bin_sizes=(1, 2, 3, 6), |
| 107 | + dim_reduction=True, |
| 108 | + align_corners=True) |
| 109 | + self.enable_auxiliary_loss = enable_auxiliary_loss |
| 110 | + self.lateral_convs = nn.LayerList() |
| 111 | + self.fpn_convs = nn.LayerList() |
| 112 | + |
| 113 | + for fpn_inplane in fpn_inplanes[:-1]: |
| 114 | + self.lateral_convs.append( |
| 115 | + layers.ConvBNReLU(fpn_inplane, channels, 1)) |
| 116 | + self.fpn_convs.append( |
| 117 | + layers.ConvBNReLU( |
| 118 | + channels, channels, 3, bias_attr=False)) |
| 119 | + |
| 120 | + if self.enable_auxiliary_loss: |
| 121 | + self.aux_head = layers.AuxLayer( |
| 122 | + fpn_inplanes[2], |
| 123 | + fpn_inplanes[2], |
| 124 | + num_classes, |
| 125 | + dropout_prob=dropout_prob) |
| 126 | + |
| 127 | + self.fpn_bottleneck = layers.ConvBNReLU( |
| 128 | + len(fpn_inplanes) * channels, channels, 3, padding=1) |
| 129 | + |
| 130 | + self.conv_last = nn.Sequential( |
| 131 | + layers.ConvBNReLU( |
| 132 | + len(fpn_inplanes) * channels, channels, 3, bias_attr=False), |
| 133 | + nn.Conv2D( |
| 134 | + channels, num_classes, kernel_size=1)) |
| 135 | + self.conv_seg = nn.Conv2D(channels, num_classes, kernel_size=1) |
| 136 | + |
| 137 | + def forward(self, inputs): |
| 138 | + laterals = [] |
| 139 | + for i, lateral_conv in enumerate(self.lateral_convs): |
| 140 | + laterals.append(lateral_conv(inputs[i])) |
| 141 | + |
| 142 | + laterals.append(self.ppm(inputs[-1])) |
| 143 | + fpn_levels = len(laterals) |
| 144 | + for i in range(fpn_levels - 1, 0, -1): |
| 145 | + prev_shape = paddle.shape(laterals[i - 1]) |
| 146 | + laterals[i - 1] = laterals[i - 1] + F.interpolate( |
| 147 | + laterals[i], |
| 148 | + size=prev_shape[2:], |
| 149 | + mode='bilinear', |
| 150 | + align_corners=self.align_corners) |
| 151 | + |
| 152 | + fpn_outs = [] |
| 153 | + for i in range(fpn_levels - 1): |
| 154 | + fpn_outs.append(self.fpn_convs[i](laterals[i])) |
| 155 | + fpn_outs.append(laterals[-1]) |
| 156 | + |
| 157 | + for i in range(fpn_levels - 1, 0, -1): |
| 158 | + fpn_outs[i] = F.interpolate( |
| 159 | + fpn_outs[i], |
| 160 | + size=paddle.shape(fpn_outs[0])[2:], |
| 161 | + mode='bilinear', |
| 162 | + align_corners=self.align_corners) |
| 163 | + fuse_out = paddle.concat(fpn_outs, axis=1) |
| 164 | + x = self.fpn_bottleneck(fuse_out) |
| 165 | + |
| 166 | + x = self.conv_seg(x) |
| 167 | + logits_list = [x] |
| 168 | + if self.enable_auxiliary_loss: |
| 169 | + aux_out = self.aux_head(inputs[2]) |
| 170 | + logits_list.append(aux_out) |
| 171 | + return logits_list |
| 172 | + else: |
| 173 | + return logits_list |
0 commit comments