IBM · Joao-L-S-Almeida · Feb 21, 2025 · Feb 21, 2025 · Feb 21, 2025 · Feb 21, 2025
diff --git a/terratorch/cli_tools.py b/terratorch/cli_tools.py
@@ -102,7 +102,7 @@ def save_prediction(prediction, input_file_name, out_dir, dtype:str="int16"):
     mask = np.where(mask == metadata["nodata"], 1, 0)
     mask = np.max(mask, axis=0)
     result = np.where(mask == 1, -1, prediction.detach().cpu())
-
+    print(result.shape)
     ##### Save file to disk
     metadata["count"] = 1
     metadata["dtype"] = dtype

diff --git a/terratorch/models/backbones/unet.py b/terratorch/models/backbones/unet.py
@@ -14,6 +14,9 @@
 from terratorch.models.backbones.utils import UpConvBlock, BasicConvBlock
 from terratorch.models.decoders.utils import ConvModule
 
+from terratorch.registry import TERRATORCH_BACKBONE_REGISTRY
+
+@TERRATORCH_BACKBONE_REGISTRY.register
 class UNet(nn.Module):
     """UNet backbone.
 
@@ -22,7 +25,7 @@ class UNet(nn.Module):
 
     Args:
         in_channels (int): Number of input image channels. Default" 3.
-        base_channels (int): Number of base channels of each stage.
+        out_channels (int): Number of base channels of each stage.
             The output channels of the first stage. Default: 64.
         num_stages (int): Number of stages in encoder, normally 5. Default: 5.
         strides (Sequence[int 1 | 2]): Strides of each stage in encoder.
@@ -74,7 +77,7 @@ class UNet(nn.Module):
 
     def __init__(self,
                  in_channels=3,
-                 base_channels=64,
+                 out_channels=64,
                  num_stages=5,
                  strides=(1, 1, 1, 1, 1),
                  enc_num_convs=(2, 2, 2, 2, 2),
@@ -149,7 +152,7 @@ def __init__(self,
         self.strides = strides
         self.downsamples = downsamples
         self.norm_eval = norm_eval
-        self.base_channels = base_channels
+        self.out_channels = num_stages*[out_channels]
 
         self.encoder = nn.ModuleList()
         self.decoder = nn.ModuleList()
@@ -163,9 +166,9 @@ def __init__(self,
                 self.decoder.append(
                     UpConvBlock(
                         conv_block=BasicConvBlock,
-                        in_channels=base_channels * 2**i,
-                        skip_channels=base_channels * 2**(i - 1),
-                        out_channels=base_channels * 2**(i - 1),
+                        in_channels=out_channels * 2**i,
+                        skip_channels=out_channels * 2**(i - 1),
+                        out_channels=out_channels * 2**(i - 1),
                         num_convs=dec_num_convs[i - 1],
                         stride=1,
                         dilation=dec_dilations[i - 1],
@@ -180,7 +183,7 @@ def __init__(self,
             enc_conv_block.append(
                 BasicConvBlock(
                     in_channels=in_channels,
-                    out_channels=base_channels * 2**i,
+                    out_channels=out_channels * 2**i,
                     num_convs=enc_num_convs[i],
                     stride=strides[i],
                     dilation=enc_dilations[i],
@@ -191,11 +194,15 @@ def __init__(self,
                     dcn=None,
                     plugins=None))
             self.encoder.append((nn.Sequential(*enc_conv_block)))
-            in_channels = base_channels * 2**i
+            in_channels = out_channels * 2**i
 
     def forward(self, x):
-        x = x[0]
-        self._check_input_divisible(x)
+
+        # We can check just the first image, since the batch 
+        # already was approved by the stackability test, which means
+        # all images has the same dimensions. 
+        self._check_input_divisible(x[0])
+
         enc_outs = []
         for enc in self.encoder:
             x = enc(x)
@@ -204,8 +211,7 @@ def forward(self, x):
         for i in reversed(range(len(self.decoder))):
             x = self.decoder[i](enc_outs[i], x)
             dec_outs.append(x)
-
-        return dec_outs[-1]
+        return dec_outs
 
     def train(self, mode=True):
         """Convert the model into training mode while keep normalization layer

diff --git a/terratorch/models/decoders/aspp_head.py b/terratorch/models/decoders/aspp_head.py
@@ -2,8 +2,10 @@
 from torch import nn
 import numpy as np
 
+from terratorch.registry import TERRATORCH_DECODER_REGISTRY
 from .utils import ConvModule, resize
 
+@TERRATORCH_DECODER_REGISTRY.register
 class ASPPModule(nn.Module):
     """Atrous Spatial Pyramid Pooling (ASPP) Module.
 
@@ -57,6 +59,7 @@ def forward(self, x):
 
         return outs
 
+@TERRATORCH_DECODER_REGISTRY.register
 class ASPPHead(nn.Module):
     """Rethinking Atrous Convolution for Semantic Image Segmentation.
 
@@ -183,13 +186,15 @@ def _forward_feature(self, inputs):
                 H, W) which is feature map for last layer of decoder head.
         """
         inputs = self._transform_inputs(inputs)
+
         aspp_outs = [
             resize(
                 self.image_pool(inputs),
                 size=inputs.size()[2:],
                 mode='bilinear',
                 align_corners=self.align_corners)
         ]
+
         aspp_outs.extend(self.aspp_modules(inputs))
         aspp_outs = torch.cat(aspp_outs, dim=1)
         feats = self.bottleneck(aspp_outs)
@@ -202,6 +207,7 @@ def forward(self, inputs):
 
         return output
 
+@TERRATORCH_DECODER_REGISTRY.register
 class ASPPSegmentationHead(ASPPHead):
     """Rethinking Atrous Convolution for Semantic Image Segmentation.
 
@@ -255,6 +261,7 @@ def forward(self, inputs):
 
         return output
 
+@TERRATORCH_DECODER_REGISTRY.register
 class ASPPRegressionHead(ASPPHead):
     """Rethinking Atrous Convolution for regression.
 
@@ -293,7 +300,6 @@ def __init__(self, dilations:list | tuple =(1, 6, 12, 18),
     def regression_head(self, features):
 
         """PixelWise regression"""
-
         if self.dropout is not None:
             features = self.dropout(features)
         output = self.conv_reg(features)

diff --git a/terratorch/models/decoders/utils.py b/terratorch/models/decoders/utils.py
@@ -59,6 +59,7 @@ def resize(input,
                         'the output would more aligned if '
                         f'input size {(input_h, input_w)} is `x+1` and '
                         f'out size {(output_h, output_w)} is `nx+1`')
+
     return F.interpolate(input, size, scale_factor, mode, align_corners)