IBM
diff --git a/‎terratorch/datamodules/generic_pixel_wise_data_module.py
Lines changed: 13 additions & 7 deletions b/‎terratorch/datamodules/generic_pixel_wise_data_module.py
Lines changed: 13 additions & 7 deletions
diff --git a/‎terratorch/datamodules/generic_scalar_label_data_module.py
Lines changed: 7 additions & 3 deletions b/‎terratorch/datamodules/generic_scalar_label_data_module.py
Lines changed: 7 additions & 3 deletions
diff --git a/‎terratorch/io/file.py
Lines changed: 19 additions & 0 deletions b/‎terratorch/io/file.py
Lines changed: 19 additions & 0 deletions
diff --git a/‎terratorch/models/decoders/upernet_decoder.py
Lines changed: 30 additions & 103 deletions b/‎terratorch/models/decoders/upernet_decoder.py
Lines changed: 30 additions & 103 deletions
@@ -3,11 +3,11 @@
 """
 This module contains generic data modules for instantiation at runtime.
 """
-
+import os
 from collections.abc import Callable, Iterable
 from pathlib import Path
 from typing import Any
-
+import numpy as np
 import albumentations as A
 import kornia.augmentation as K
 import torch
@@ -17,7 +17,7 @@
 from torchgeo.transforms import AugmentationSequential
 
 from terratorch.datasets import GenericNonGeoPixelwiseRegressionDataset, GenericNonGeoSegmentationDataset, HLSBands
-
+from terratorch.io.file import load_from_file_or_attribute
 
 def wrap_in_compose_is_list(transform_list):
     # set check shapes to false because of the multitemporal case
@@ -79,8 +79,8 @@ def __init__(
         test_data_root: Path,
         img_grep: str,
         label_grep: str,
-        means: list[float],
-        stds: list[float],
+        means: list[float] | str,
+        stds: list[float] | str,
         num_classes: int,
         predict_data_root: Path | None = None,
         train_label_data_root: Path | None = None,
@@ -198,6 +198,9 @@ def __init__(
         #     K.Normalize(means, stds),
         #     data_keys=["image"],
         # )
+        means = load_from_file_or_attribute(means)
+        stds = load_from_file_or_attribute(stds)
+
         self.aug = Normalize(means, stds)
 
         # self.aug = Normalize(means, stds)
@@ -317,8 +320,8 @@ def __init__(
         train_data_root: Path,
         val_data_root: Path,
         test_data_root: Path,
-        means: list[float],
-        stds: list[float],
+        means: list[float] | str,
+        stds: list[float] | str,
         predict_data_root: Path | None = None,
         img_grep: str | None = "*",
         label_grep: str | None = "*",
@@ -430,6 +433,9 @@ def __init__(
         #     K.Normalize(means, stds),
         #     data_keys=["image"],
         # )
+        means = load_from_file_or_attribute(means)
+        stds = load_from_file_or_attribute(stds)
+
         self.aug = Normalize(means, stds)
         self.no_data_replace = no_data_replace
         self.no_label_replace = no_label_replace
 
@@ -22,12 +22,12 @@
     HLSBands,
 )
 
+from terratorch.io.file import load_from_file_or_attribute
 
 def wrap_in_compose_is_list(transform_list):
     # set check shapes to false because of the multitemporal case
     return A.Compose(transform_list, is_check_shapes=False) if isinstance(transform_list, Iterable) else transform_list
 
-
 class Normalize(Callable):
     def __init__(self, means, stds):
         super().__init__()
@@ -68,8 +68,8 @@ def __init__(
         train_data_root: Path,
         val_data_root: Path,
         test_data_root: Path,
-        means: list[float],
-        stds: list[float],
+        means: list[float] | str,
+        stds: list[float] | str,
         num_classes: int,
         predict_data_root: Path | None = None,
         train_split: Path | None = None,
@@ -166,6 +166,10 @@ def __init__(
         #     K.Normalize(means, stds),
         #     data_keys=["image"],
         # )
+
+        means = load_from_file_or_attribute(means)
+        stds = load_from_file_or_attribute(stds)
+
         self.aug = Normalize(means, stds)
 
         # self.aug = Normalize(means, stds)
 
@@ -1,6 +1,7 @@
 import os
 import importlib 
 from torch import nn
+import numpy as np
 
 def open_generic_torch_model(model: type | str = None,
                              model_kwargs: dict = None,
@@ -51,3 +52,21 @@ def load_torch_weights(model:nn.Module=None, save_dir: str = None, name: str = N
         )
 
     return model
+
+def load_from_file_or_attribute(value: list[float]|str):
+
+    if isinstance(value, list):
+        return value
+    elif isinstance(value, str):  # It can be the path for a file
+        if os.path.isfile(value):
+            try:
+                print(value)
+                content = np.genfromtxt(value).tolist()
+            except:
+                raise Exception(f"File must be txt, but received {value}")
+        else:
+            raise Exception(f"The input {value} does not exist or is not a file.")
+
+        return content
+
+
@@ -1,14 +1,10 @@
-# Copyright contributors to the Terratorch project
-
 import torch
 import torch.nn.functional as F  # noqa: N812
 from torch import Tensor, nn
 
 """
 Adapted from https://github.com/yassouali/pytorch-segmentation/blob/master/models/upernet.py
 """
-
-
 class ConvModule(nn.Module):
     def __init__(self, in_channels, out_channels, kernel_size, padding=0, inplace=False) -> None:  # noqa: FBT002
         super().__init__()
@@ -19,103 +15,6 @@ def __init__(self, in_channels, out_channels, kernel_size, padding=0, inplace=Fa
     def forward(self, x):
         return self.act(self.norm(self.conv(x)))
 
-
-# class PSPModule(nn.Module):
-#     # In the original inmplementation they use precise RoI pooling
-#     # Instead of using adaptative average pooling
-#     def __init__(self, in_channels: int, bin_sizes: list[int] | None = None):
-#         super().__init__()
-#         if bin_sizes is None:
-#             bin_sizes = [1, 2, 3, 6]
-#         out_channels = in_channels // len(bin_sizes)
-#         self.stages = nn.ModuleList([self._make_stages(in_channels, out_channels, b_s) for b_s in bin_sizes])
-#         self.bottleneck = nn.Sequential(
-#             nn.Conv2d(
-#                 in_channels + (out_channels * len(bin_sizes)),
-#                 in_channels,
-#                 kernel_size=3,
-#                 padding=1,
-#                 bias=False,
-#             ),
-#             nn.BatchNorm2d(in_channels),
-#             nn.ReLU(inplace=True),
-#             nn.Dropout2d(0.1),
-#         )
-
-#     def _make_stages(self, in_channels, out_channels, bin_sz):
-#         prior = nn.AdaptiveAvgPool2d(output_size=bin_sz)
-#         conv = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False)
-#         bn = nn.BatchNorm2d(out_channels)
-#         relu = nn.ReLU(inplace=True)
-#         return nn.Sequential(prior, conv, bn, relu)
-
-#     def forward(self, features):
-#         h, w = features.size()[2], features.size()[3]
-#         pyramids = [features]
-#         pyramids.extend(
-#             [F.interpolate(stage(features), size=(h, w), mode="bilinear", align_corners=True) for stage in self.stages]
-#         )
-#         output = self.bottleneck(torch.cat(pyramids, dim=1))
-#         return output
-
-
-# def up_and_add(x, y):
-#     return F.interpolate(x, size=(y.size(2), y.size(3)), mode="bilinear", align_corners=True) + y
-
-
-# class FPNFuse(nn.Module):
-#     def __init__(self, feature_channels=None, fpn_out=256):
-#         super().__init__()
-#         if feature_channels is None:
-#             feature_channels = [256, 512, 1024, 2048]
-#         if not feature_channels[0] == fpn_out:
-#             msg = f"First index of feature channel ({feature_channels[0]}) did not match fpn_out ({fpn_out})"
-#             raise Exception(msg)
-#         self.conv1x1 = nn.ModuleList([nn.Conv2d(ft_size, fpn_out, kernel_size=1) for ft_size in feature_channels[1:]])
-#         self.smooth_conv = nn.ModuleList(
-#             [nn.Conv2d(fpn_out, fpn_out, kernel_size=3, padding=1)] * (len(feature_channels) - 1)
-#         )
-#         self.conv_fusion = nn.Sequential(
-#             nn.Conv2d(
-#                 len(feature_channels) * fpn_out,
-#                 fpn_out,
-#                 kernel_size=3,
-#                 padding=1,
-#                 bias=False,
-#             ),
-#             nn.BatchNorm2d(fpn_out),
-#             nn.ReLU(inplace=True),
-#         )
-
-#     def forward(self, features):
-#         features[1:] = [conv1x1(feature) for feature, conv1x1 in zip(features[1:], self.conv1x1, strict=False)]
-#         p = [up_and_add(features[i], features[i - 1]) for i in reversed(range(1, len(features)))]
-#         p = [smooth_conv(x) for smooth_conv, x in zip(self.smooth_conv, p, strict=False)]
-#         p = list(reversed(p))
-#         p.append(features[-1])  # P = [P1, P2, P3, P4]
-#         h, w = p[0].size(2), p[0].size(3)
-#         p[1:] = [F.interpolate(feature, size=(h, w), mode="bilinear", align_corners=True) for feature in p[1:]]
-
-#         x = self.conv_fusion(torch.cat(p, dim=1))
-#         return x
-
-
-# class UperNetDecoder(nn.Module):
-#     def __init__(self, embed_dim: list[int]) -> None:
-#         super().__init__()
-#         self.embed_dim = embed_dim
-#         self.output_embed_dim = embed_dim[0]
-#         self.PPN = PSPModule(embed_dim[-1])
-#         self.FPN = FPNFuse(embed_dim, fpn_out=self.output_embed_dim)
-
-#     def forward(self, x: Tensor):
-#         x = [f.clone() for f in x]
-#         x[-1] = self.PPN(x[-1])
-#         x = self.FPN(x)
-
-#         return x
-
-
 # Adapted from MMSegmentation
 class UperNetDecoder(nn.Module):
     """UperNetDecoder. Adapted from MMSegmentation."""
@@ -126,6 +25,7 @@ def __init__(
         pool_scales: tuple[int] = (1, 2, 3, 6),
         channels: int = 256,
         align_corners: bool = True,  # noqa: FBT001, FBT002
+        scale_modules: bool = False
     ):
         """Constructor
 
@@ -134,10 +34,29 @@ def __init__(
             pool_scales (tuple[int], optional): Pooling scales used in Pooling Pyramid
                 Module applied on the last feature. Default: (1, 2, 3, 6).
             channels (int, optional): Channels used in the decoder. Defaults to 256.
-            align_corners (bool, optional): Whter to align corners in rescaling. Defaults to True.
+            align_corners (bool, optional): Wheter to align corners in rescaling. Defaults to True.
+            scale_modules (bool, optional): Whether to apply scale modules to the inputs. Needed for plain ViT.
+                Defaults to False.
         """
         super().__init__()
-        self.embed_dim = embed_dim
+        self.scale_modules = scale_modules
+        if scale_modules:
+            self.fpn1 = nn.Sequential(
+                nn.ConvTranspose2d(embed_dim[0],
+                                embed_dim[0] // 2, 2, 2),
+                nn.BatchNorm2d(embed_dim[0] // 2),
+                nn.GELU(),
+                nn.ConvTranspose2d(embed_dim[0] // 2,
+                                embed_dim[0] // 4, 2, 2))
+            self.fpn2 = nn.Sequential(
+                nn.ConvTranspose2d(embed_dim[1],
+                                embed_dim[1] // 2, 2, 2))
+            self.fpn3 = nn.Sequential(nn.Identity())
+            self.fpn4 = nn.Sequential(nn.MaxPool2d(kernel_size=2, stride=2))
+            self.embed_dim = [embed_dim[0] // 4, embed_dim[1] // 2, embed_dim[2], embed_dim[3]]
+        else:
+            self.embed_dim = embed_dim
+
         self.output_embed_dim = channels
         self.channels = channels
         self.align_corners = align_corners
@@ -192,6 +111,14 @@ def forward(self, inputs):
             feats (Tensor): A tensor of shape (batch_size, self.channels,
                 H, W) which is feature map for last layer of decoder head.
         """
+
+        if self.scale_modules:
+            scaled_inputs = []
+            scaled_inputs.append(self.fpn1(inputs[0]))
+            scaled_inputs.append(self.fpn2(inputs[1]))
+            scaled_inputs.append(self.fpn3(inputs[2]))
+            scaled_inputs.append(self.fpn4(inputs[3]))
+            inputs = scaled_inputs
         # build laterals
         laterals = [lateral_conv(inputs[i]) for i, lateral_conv in enumerate(self.lateral_convs)]
         laterals.append(self.psp_forward(inputs))