ensemble: fix for grad runtime error

aditya0by0 · aditya0by0 · commit 55959de74caf · 2025-03-17T20:58:00.000+01:00
- dummy param should be linked to loss to build gradient graph
- Error : element 0 of tensors does not require grad and does not have a grad_fn
diff --git a/chebai/models/ensemble.py b/chebai/models/ensemble.py
@@ -1,6 +1,6 @@
 import os.path
 from abc import ABC, abstractmethod
-from typing import Any, Dict, Optional, Union
+from typing import Any, Dict, Optional, Tuple, Union
 
 import torch
 from torch import Tensor
@@ -94,7 +94,7 @@ class ChebiEnsemble(_EnsembleBase):
     def __init__(self, model_configs: Dict[str, ModelConfig], **kwargs):
         super().__init__(model_configs, **kwargs)
         # Add a dummy trainable parameter
-        self.dummy_param = torch.nn.Parameter(torch.randn(1))
+        self.dummy_param = torch.nn.Parameter(torch.randn(1, requires_grad=True))
 
     def forward(self, data: Dict[str, Tensor], **kwargs: Any) -> Dict[str, Any]:
         predictions = {}
@@ -103,8 +103,6 @@ def forward(self, data: Dict[str, Tensor], **kwargs: Any) -> Dict[str, Any]:
             data["labels"].shape[0], data["labels"].shape[1], device=self.device
         ).to(self.device)
 
-        print(data["features"].shape)  # Debugging
-
         for name, model in self.models.items():
             output = model(data)
             confidences[name] = torch.sigmoid(output["logits"])
@@ -193,7 +191,8 @@ def _execute(
                         )
                     loss = loss[0]
 
-                d["loss"] = loss
+                d["loss"] = loss + 0 * self.dummy_param.sum()
+
                 self.log(
                     f"{prefix}loss",
                     loss.item(),
@@ -229,6 +228,28 @@ def aggregate_predictions(self, predictions, confidences):
 
         return (true_scores > false_scores).long()  # Final class decision
 
+    def _process_for_loss(
+        self,
+        model_output: Dict[str, Tensor],
+        labels: Tensor,
+        loss_kwargs: Dict[str, Any],
+    ) -> Tuple[Tensor, Tensor, Dict[str, Any]]:
+        """
+        Process the model output for calculating the loss.
+
+        Args:
+            model_output (Dict[str, Tensor]): The output of the model.
+            labels (Tensor): The target labels.
+            loss_kwargs (Dict[str, Any]): Additional loss arguments.
+
+        Returns:
+            tuple: A tuple containing the processed model output, labels, and loss arguments.
+        """
+        kwargs_copy = dict(loss_kwargs)
+        if labels is not None:
+            labels = labels.float()
+        return model_output["logits"], labels, kwargs_copy
+
 
 class ChebiEnsembleLearning(_EnsembleBase):