pulp-platform · lukamac · Oct 31, 2025 · Oct 21, 2025
@@ -20,6 +20,7 @@ This file contains the changelog for the Deeploy project. The changelog is divid
 - Prepare Post v0.2.0 Release [#104](https://github.com/pulp-platform/Deeploy/pull/104)
 - Use Docker digests instead of arch-specific tags [#106](https://github.com/pulp-platform/Deeploy/pull/106)
 - Fix `Unsqueeze` Op. when using ONNX opset 13 or higher (from attribute to input) [#119](https://github.com/pulp-platform/Deeploy/pull/119)
+- Fix bias hoisting in generic GEMM with no bias [#126](https://github.com/pulp-platform/Deeploy/pull/126)
 
 ### Added
 - Add manual type inference feature (CLI: `--input-type-map`/`--input-offset-map`) to resolve ambiguities when test inputs are not representative enough
@@ -53,6 +54,7 @@ This file contains the changelog for the Deeploy project. The changelog is divid
 - Memory/I/O summaries and input/output logging in deployers
 - RequantHelpers.py for Neureka's TileConstraints
 - Added assertion that all the graph tensors after lowering have a shape annotated
+- Added testFloatGEMMnobias
 
 ### Changed
 - Replaced platform-specific tags (`*-amd64`, `*-arm64`) with direct digest references in `Noelware/docker-manifest-action`.
@@ -102,6 +104,7 @@ This file contains the changelog for the Deeploy project. The changelog is divid
 - Fixed aliasing
 - Missing layout transformation of the const's (bias, mul, add, shift in Conv/RequantizedConv)
 - Keep mul/add rank of requantized Neureka tile constraints
+- Fix bias hoisting in generic GEMM with no bias
 
 ### Removed
 - Delete outdated and unused `.gitlab-ci.yml` file

@@ -1682,14 +1682,6 @@ def parseNodeCtxt(self,
         for idx, outputNode in enumerate(node.outputs):
             self.operatorRepresentation[outputs[idx]] = ctxt.lookup(outputNode.name).name
 
-        # Create fake C node for GEMM-compatibility and hoist it
-        if not self.noBiasHoisting:
-            values = np.zeros(ctxt.lookup(node.inputs[0].name).shape, dtype = inputNode.dtype)
-            zeroTensor = gs.Constant(f'{node.name}_C_Tensor', values = values)
-            ctxt.hoistConstant(zeroTensor, _type = ctxt.lookup(inputNode.name)._type)
-            node.inputs.append(zeroTensor)
-            self.operatorRepresentation['C'] = f'{node.name}_C_Tensor'
-
         # Store the input and output shapes in the operator representation
         self.operatorRepresentation['size'] = np.prod(ctxt.lookup(node.inputs[0].name).shape)
         self.operatorRepresentation['A_shape'] = ctxt.lookup(node.inputs[0].name).shape
@@ -1772,8 +1764,7 @@ def parseNodeCtxt(self,
 class GEMMParser(MatMulParser):
 
     def __init__(self, noBiasHoisting = True):
-        self.noBiasHoisting = noBiasHoisting
-        super().__init__()
+        super().__init__(noBiasHoisting)
 
     def parseNode(self, node: gs.Node) -> (bool):
 
@@ -1805,6 +1796,10 @@ def parseNode(self, node: gs.Node) -> (bool):
             else:
                 self.operatorRepresentation['transB'] = 0
 
+            if len(node.inputs) == 2 and not self.noBiasHoisting:
+                C = gs.Constant(f"{node.name}_C", np.zeros((1,)))
+                node.inputs.append(C)
+
             return True
         # This might be a matmul node -> Cast up
         else:
@@ -1836,18 +1831,6 @@ def parseNodeCtxt(self,
                 # Create flag for same dimension between bias matrix and the final batch dimension
                 self.operatorRepresentation['C_batched'] = (self.operatorRepresentation['batch'] == np.prod(
                     newCtxt.lookup(node.inputs[2].name).shape[:-2]))
-            elif not self.noBiasHoisting:
-                # Create mock bias matrix if not present in the inputs
-                values = np.zeros((1))
-                zeroTensor = gs.Constant(f'{node.name}_C_Tensor', values = values)
-                newCtxt.hoistConstant(zeroTensor)
-
-                # Store it in the operator representation
-                self.operatorRepresentation['C'] = f'{node.name}_C_Tensor'
-                self.operatorRepresentation['C_shape'] = (0,)
-
-                # Create flag for same dimension between bias matrix and the final batch dimension
-                self.operatorRepresentation['C_batched'] = False
 
             self.operatorRepresentation['size'] = np.prod(newCtxt.lookup(node.inputs[0].name).shape)
 
@@ -2324,7 +2307,7 @@ def parseNodeCtxt(self,
 
 class GenericGEMMParser(GEMMParser):
 
-    def __init__(self, noBiasHoisting = True):
+    def __init__(self, noBiasHoisting = False):
         super().__init__(noBiasHoisting)
 
     def parseNode(self, node: gs.Node) -> (bool):