diff --git a/CHANGELOG.md b/CHANGELOG.md index a567305e2..4cdb588d5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ This file contains the changelog for the Deeploy project. The changelog is divid - Prepare Post v0.2.0 Release [#104](https://github.com/pulp-platform/Deeploy/pull/104) - Use Docker digests instead of arch-specific tags [#106](https://github.com/pulp-platform/Deeploy/pull/106) - Fix `Unsqueeze` Op. when using ONNX opset 13 or higher (from attribute to input) [#119](https://github.com/pulp-platform/Deeploy/pull/119) +- Fix bias hoisting in generic GEMM with no bias [#126](https://github.com/pulp-platform/Deeploy/pull/126) ### Added - Add manual type inference feature (CLI: `--input-type-map`/`--input-offset-map`) to resolve ambiguities when test inputs are not representative enough @@ -53,6 +54,7 @@ This file contains the changelog for the Deeploy project. The changelog is divid - Memory/I/O summaries and input/output logging in deployers - RequantHelpers.py for Neureka's TileConstraints - Added assertion that all the graph tensors after lowering have a shape annotated +- Added testFloatGEMMnobias ### Changed - Replaced platform-specific tags (`*-amd64`, `*-arm64`) with direct digest references in `Noelware/docker-manifest-action`. @@ -102,6 +104,7 @@ This file contains the changelog for the Deeploy project. The changelog is divid - Fixed aliasing - Missing layout transformation of the const's (bias, mul, add, shift in Conv/RequantizedConv) - Keep mul/add rank of requantized Neureka tile constraints +- Fix bias hoisting in generic GEMM with no bias ### Removed - Delete outdated and unused `.gitlab-ci.yml` file diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py index 7752834c5..f63bb5411 100644 --- a/Deeploy/Targets/Generic/Parsers.py +++ b/Deeploy/Targets/Generic/Parsers.py @@ -1682,14 +1682,6 @@ def parseNodeCtxt(self, for idx, outputNode in enumerate(node.outputs): self.operatorRepresentation[outputs[idx]] = ctxt.lookup(outputNode.name).name - # Create fake C node for GEMM-compatibility and hoist it - if not self.noBiasHoisting: - values = np.zeros(ctxt.lookup(node.inputs[0].name).shape, dtype = inputNode.dtype) - zeroTensor = gs.Constant(f'{node.name}_C_Tensor', values = values) - ctxt.hoistConstant(zeroTensor, _type = ctxt.lookup(inputNode.name)._type) - node.inputs.append(zeroTensor) - self.operatorRepresentation['C'] = f'{node.name}_C_Tensor' - # Store the input and output shapes in the operator representation self.operatorRepresentation['size'] = np.prod(ctxt.lookup(node.inputs[0].name).shape) self.operatorRepresentation['A_shape'] = ctxt.lookup(node.inputs[0].name).shape @@ -1772,8 +1764,7 @@ def parseNodeCtxt(self, class GEMMParser(MatMulParser): def __init__(self, noBiasHoisting = True): - self.noBiasHoisting = noBiasHoisting - super().__init__() + super().__init__(noBiasHoisting) def parseNode(self, node: gs.Node) -> (bool): @@ -1805,6 +1796,10 @@ def parseNode(self, node: gs.Node) -> (bool): else: self.operatorRepresentation['transB'] = 0 + if len(node.inputs) == 2 and not self.noBiasHoisting: + C = gs.Constant(f"{node.name}_C", np.zeros((1,))) + node.inputs.append(C) + return True # This might be a matmul node -> Cast up else: @@ -1836,18 +1831,6 @@ def parseNodeCtxt(self, # Create flag for same dimension between bias matrix and the final batch dimension self.operatorRepresentation['C_batched'] = (self.operatorRepresentation['batch'] == np.prod( newCtxt.lookup(node.inputs[2].name).shape[:-2])) - elif not self.noBiasHoisting: - # Create mock bias matrix if not present in the inputs - values = np.zeros((1)) - zeroTensor = gs.Constant(f'{node.name}_C_Tensor', values = values) - newCtxt.hoistConstant(zeroTensor) - - # Store it in the operator representation - self.operatorRepresentation['C'] = f'{node.name}_C_Tensor' - self.operatorRepresentation['C_shape'] = (0,) - - # Create flag for same dimension between bias matrix and the final batch dimension - self.operatorRepresentation['C_batched'] = False self.operatorRepresentation['size'] = np.prod(newCtxt.lookup(node.inputs[0].name).shape) @@ -2324,7 +2307,7 @@ def parseNodeCtxt(self, class GenericGEMMParser(GEMMParser): - def __init__(self, noBiasHoisting = True): + def __init__(self, noBiasHoisting = False): super().__init__(noBiasHoisting) def parseNode(self, node: gs.Node) -> (bool): diff --git a/DeeployTest/Tests/testFloatGEMMnobias/inputs.npz b/DeeployTest/Tests/testFloatGEMMnobias/inputs.npz new file mode 100644 index 000000000..47728a360 Binary files /dev/null and b/DeeployTest/Tests/testFloatGEMMnobias/inputs.npz differ diff --git a/DeeployTest/Tests/testFloatGEMMnobias/network.onnx b/DeeployTest/Tests/testFloatGEMMnobias/network.onnx new file mode 100644 index 000000000..59264acd4 Binary files /dev/null and b/DeeployTest/Tests/testFloatGEMMnobias/network.onnx differ diff --git a/DeeployTest/Tests/testFloatGEMMnobias/outputs.npz b/DeeployTest/Tests/testFloatGEMMnobias/outputs.npz new file mode 100644 index 000000000..4e0debdf5 Binary files /dev/null and b/DeeployTest/Tests/testFloatGEMMnobias/outputs.npz differ