IBM · Joao-L-S-Almeida · Aug 16, 2024 · Aug 19, 2024 · Aug 19, 2024 · Aug 21, 2024
diff --git a/terratorch/cli_tools.py b/terratorch/cli_tools.py
@@ -398,6 +398,7 @@ def from_config(
         config_path: Path,
         checkpoint_path: Path | None = None,
         predict_dataset_bands: list[str] | None = None,
+        predict_output_bands: list[str] | None = None,
     ):
         """
         Args:
@@ -416,6 +417,10 @@ def from_config(
             arguments.extend([ "--data.init_args.predict_dataset_bands",
             "[" + ",".join(predict_dataset_bands) + "]",])
 
+        if predict_output_bands is not None:
+            arguments.extend([ "--data.init_args.predict_output_bands",
+            "[" + ",".join(predict_output_bands) + "]",])
+
         cli = build_lightning_cli(arguments, run=False)
         trainer = cli.trainer
         # disable logging metrics
@@ -467,4 +472,4 @@ def inference(self, file_path: Path) -> torch.Tensor:
             prediction, file_name = self.inference_on_dir(
                 tmpdir,
             )
-            return prediction.squeeze(0)
+            return prediction.squeeze(0)
diff --git a/terratorch/datamodules/generic_pixel_wise_data_module.py b/terratorch/datamodules/generic_pixel_wise_data_module.py
@@ -272,7 +272,7 @@ def setup(self, stage: str) -> None:
                 self.predict_root,
                 self.num_classes,
                 dataset_bands=self.predict_dataset_bands,
-                output_bands=self.output_bands,
+                output_bands=self.predict_output_bands,
                 constant_scale=self.constant_scale,
                 rgb_indices=self.rgb_indices,
                 transform=self.test_transform,
@@ -335,6 +335,7 @@ def __init__(
         allow_substring_split_file: bool = True,
         dataset_bands: list[HLSBands | int | tuple[int, int] | str ] | None = None,
         predict_dataset_bands: list[HLSBands | int | tuple[int, int] | str ] | None = None,
+        predict_output_bands: list[HLSBands | int | tuple[int, int] | str ] | None = None,
         output_bands: list[HLSBands | int | tuple[int, int] | str ] | None = None,
         constant_scale: float = 1,
         rgb_indices: list[int] | None = None,
@@ -426,6 +427,7 @@ def __init__(
 
         self.dataset_bands = dataset_bands
         self.predict_dataset_bands = predict_dataset_bands if predict_dataset_bands else dataset_bands
+        self.predict_output_bands = predict_output_bands if predict_output_bands else dataset_bands
         self.output_bands = output_bands
         self.rgb_indices = rgb_indices
 
@@ -507,7 +509,7 @@ def setup(self, stage: str) -> None:
             self.predict_dataset = self.dataset_class(
                 self.predict_root,
                 dataset_bands=self.predict_dataset_bands,
-                output_bands=self.output_bands,
+                output_bands=self.predict_output_bands,
                 constant_scale=self.constant_scale,
                 rgb_indices=self.rgb_indices,
                 transform=self.test_transform,

diff --git a/terratorch/models/heads/regression_head.py b/terratorch/models/heads/regression_head.py
@@ -3,7 +3,7 @@
 import importlib
 
 from torch import nn
-
+import torch
 
 class PixelShuffleUpscale(nn.Module):
     def __init__(self, channels) -> None:
@@ -16,6 +16,8 @@ def __init__(self, channels) -> None:
 
     def forward(self, x):
         post_conv = self.conv(x)
+
+        torch.cuda.empty_cache()
         upscaled = self.upscale(post_conv)
         return self.relu(self.bn(upscaled))
 
@@ -81,5 +83,20 @@ def block(in_channels, out_channels):
         self.head = nn.Sequential(*[*pre_layers, dropout, final_layer])
 
     def forward(self, x):
+        # Memory usage
+        reserved = torch.cuda.memory_reserved(0)//(1024**2)
+        allocated = torch.cuda.memory_allocated(0)//(1024**2)
+        print("Memory")
+        print(f"Allocated: {allocated} MiB")
+        print(f"Reserved: {reserved} MiB")
+        print(self.head)
+        torch.cuda.empty_cache()
+        reserved = torch.cuda.memory_reserved(0)//(1024**2)
+        allocated = torch.cuda.memory_allocated(0)//(1024**2)
+        print("Memory")
+        print(f"Allocated: {allocated} MiB")
+        print(f"Reserved: {reserved} MiB")
+
         output = self.head(x)
+        print("I passed by the head forward.")
         return self.final_act(output)
diff --git a/terratorch/tasks/classification_tasks.py b/terratorch/tasks/classification_tasks.py
@@ -269,6 +269,12 @@ def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: int = 0) -> T
         x = batch["image"]
         file_names = batch["filename"]
 
+        # Avoiding GPU memory overloading
+        # Removing GPU cache
+        torch.cuda.empty_cache()
+        # Forcing the Python garbage collector
+        gc.collect()
+
         y_hat = self(x).output
         y_hat = y_hat.argmax(dim=1)
         return y_hat, file_names
diff --git a/terratorch/tasks/regression_tasks.py b/terratorch/tasks/regression_tasks.py
@@ -2,6 +2,7 @@
 
 from collections.abc import Sequence
 from typing import Any
+import gc
 
 import lightning
 import matplotlib.pyplot as plt
@@ -20,6 +21,8 @@
 from terratorch.tasks.optimizer_factory import optimizer_factory
 from terratorch.tasks.tiled_inference import TiledInferenceParameters, tiled_inference
 
+from torch.profiler import profile, record_function, ProfilerActivity
+
 BATCH_IDX_FOR_VALIDATION_PLOTTING = 10
 
 
@@ -368,8 +371,27 @@ def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: int = 0) -> T
         def model_forward(x):
             return self(x).output
 
-        if self.tiled_inference_parameters:
-            y_hat: Tensor = tiled_inference(model_forward, x, 1, self.tiled_inference_parameters)
-        else:
-            y_hat: Tensor = self(x).output
+        # Avoiding GPU memory overloading
+        # Removing GPU cache
+        torch.cuda.empty_cache()
+        # Forcing the Python garbage collector
+        gc.collect()
+
+        # Memory usage
+        reserved = torch.cuda.memory_reserved(0)//(1024**2)
+        allocated = torch.cuda.memory_allocated(0)//(1024**2)
+        print("Memory")
+        print(f"Allocated: {allocated} MiB")
+        print(f"Reserved: {reserved} MiB")
+
+        with profile(activities=[ProfilerActivity.CUDA], record_shapes=True, profile_memory=True) as prof:
+            with record_function("model_inference"):
+
+                if self.tiled_inference_parameters:
+                    y_hat: Tensor = tiled_inference(model_forward, x, 1, self.tiled_inference_parameters)
+                else:
+                    y_hat: Tensor = self(x).output
+
+        print(prof.key_averages().table(sort_by="cuda_time_total", row_limit=10))
+
         return y_hat, file_names
diff --git a/terratorch/tasks/segmentation_tasks.py b/terratorch/tasks/segmentation_tasks.py
@@ -16,6 +16,8 @@
 from terratorch.tasks.optimizer_factory import optimizer_factory
 from terratorch.tasks.tiled_inference import TiledInferenceParameters, tiled_inference
 
+from torch.profiler import profile, record_function, ProfilerActivity
+
 BATCH_IDX_FOR_VALIDATION_PLOTTING = 10
 
 
@@ -301,7 +303,7 @@ def test_step(self, batch: Any, batch_idx: int, dataloader_idx: int = 0) -> None
         self.test_loss_handler.log_loss(self.log, loss_dict=loss, batch_size=x.shape[0])
         y_hat_hard = to_segmentation_prediction(model_output)
         self.test_metrics.update(y_hat_hard, y)
-
+        torch.cuda.memory_summary(device=None, abbreviated=False)
     def on_test_epoch_end(self) -> None:
         self.log_dict(self.test_metrics.compute(), sync_dist=True)
         self.test_metrics.reset()
@@ -324,11 +326,20 @@ def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: int = 0) -> T
         def model_forward(x):
             return self(x).output
 
-        if self.tiled_inference_parameters:
-            y_hat: Tensor = tiled_inference(
-                model_forward, x, self.hparams["model_args"]["num_classes"], self.tiled_inference_parameters
-            )
-        else:
-            y_hat: Tensor = self(x).output
+        # Avoiding GPU memory overloading
+        # Removing GPU cache
+        torch.cuda.empty_cache()
+        # Forcing the Python garbage collector
+        gc.collect()
+        with profile(activities=[ProfilerActivity.CUDA], record_shapes=True) as prof:
+            with record_function("model_inference"):
+                if self.tiled_inference_parameters:
+                    y_hat: Tensor = tiled_inference(
+                        model_forward, x, self.hparams["model_args"]["num_classes"], self.tiled_inference_parameters
+                    )
+                else:
+                    y_hat: Tensor = self(x).output
+        print(prof.key_averages().table(sort_by="cuda_time_total", row_limit=20))
+
         y_hat = y_hat.argmax(dim=1)
         return y_hat, file_names
diff --git a/tests/manufactured-finetune_prithvi_swin_B.yaml b/tests/manufactured-finetune_prithvi_swin_B.yaml
@@ -20,7 +20,7 @@ trainer:
       init_args:
         monitor: val/loss
         patience: 100
-  max_epochs: 5
+  max_epochs: 3
   check_val_every_n_epoch: 1
   log_every_n_steps: 20
   enable_checkpointing: true