Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ This file contains the changelog for the Deeploy project. The changelog is divid
- Prepare Post v0.2.0 Release [#104](https://github.com/pulp-platform/Deeploy/pull/104)
- Use Docker digests instead of arch-specific tags [#106](https://github.com/pulp-platform/Deeploy/pull/106)
- Fix `Unsqueeze` Op. when using ONNX opset 13 or higher (from attribute to input) [#119](https://github.com/pulp-platform/Deeploy/pull/119)
- Fix bias hoisting in generic GEMM with no bias [#126](https://github.com/pulp-platform/Deeploy/pull/126)

### Added
- Add manual type inference feature (CLI: `--input-type-map`/`--input-offset-map`) to resolve ambiguities when test inputs are not representative enough
Expand Down Expand Up @@ -53,6 +54,7 @@ This file contains the changelog for the Deeploy project. The changelog is divid
- Memory/I/O summaries and input/output logging in deployers
- RequantHelpers.py for Neureka's TileConstraints
- Added assertion that all the graph tensors after lowering have a shape annotated
- Added testFloatGEMMnobias

### Changed
- Replaced platform-specific tags (`*-amd64`, `*-arm64`) with direct digest references in `Noelware/docker-manifest-action`.
Expand Down Expand Up @@ -102,6 +104,7 @@ This file contains the changelog for the Deeploy project. The changelog is divid
- Fixed aliasing
- Missing layout transformation of the const's (bias, mul, add, shift in Conv/RequantizedConv)
- Keep mul/add rank of requantized Neureka tile constraints
- Fix bias hoisting in generic GEMM with no bias

### Removed
- Delete outdated and unused `.gitlab-ci.yml` file
Expand Down
29 changes: 6 additions & 23 deletions Deeploy/Targets/Generic/Parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1682,14 +1682,6 @@ def parseNodeCtxt(self,
for idx, outputNode in enumerate(node.outputs):
self.operatorRepresentation[outputs[idx]] = ctxt.lookup(outputNode.name).name

# Create fake C node for GEMM-compatibility and hoist it
if not self.noBiasHoisting:
values = np.zeros(ctxt.lookup(node.inputs[0].name).shape, dtype = inputNode.dtype)
zeroTensor = gs.Constant(f'{node.name}_C_Tensor', values = values)
ctxt.hoistConstant(zeroTensor, _type = ctxt.lookup(inputNode.name)._type)
node.inputs.append(zeroTensor)
self.operatorRepresentation['C'] = f'{node.name}_C_Tensor'

# Store the input and output shapes in the operator representation
self.operatorRepresentation['size'] = np.prod(ctxt.lookup(node.inputs[0].name).shape)
self.operatorRepresentation['A_shape'] = ctxt.lookup(node.inputs[0].name).shape
Expand Down Expand Up @@ -1772,8 +1764,7 @@ def parseNodeCtxt(self,
class GEMMParser(MatMulParser):

def __init__(self, noBiasHoisting = True):
self.noBiasHoisting = noBiasHoisting
super().__init__()
super().__init__(noBiasHoisting)

def parseNode(self, node: gs.Node) -> (bool):

Expand Down Expand Up @@ -1805,6 +1796,10 @@ def parseNode(self, node: gs.Node) -> (bool):
else:
self.operatorRepresentation['transB'] = 0

if len(node.inputs) == 2 and not self.noBiasHoisting:
C = gs.Constant(f"{node.name}_C", np.zeros((1,)))
node.inputs.append(C)

return True
# This might be a matmul node -> Cast up
else:
Expand Down Expand Up @@ -1836,18 +1831,6 @@ def parseNodeCtxt(self,
# Create flag for same dimension between bias matrix and the final batch dimension
self.operatorRepresentation['C_batched'] = (self.operatorRepresentation['batch'] == np.prod(
newCtxt.lookup(node.inputs[2].name).shape[:-2]))
elif not self.noBiasHoisting:
# Create mock bias matrix if not present in the inputs
values = np.zeros((1))
zeroTensor = gs.Constant(f'{node.name}_C_Tensor', values = values)
newCtxt.hoistConstant(zeroTensor)

# Store it in the operator representation
self.operatorRepresentation['C'] = f'{node.name}_C_Tensor'
self.operatorRepresentation['C_shape'] = (0,)

# Create flag for same dimension between bias matrix and the final batch dimension
self.operatorRepresentation['C_batched'] = False

self.operatorRepresentation['size'] = np.prod(newCtxt.lookup(node.inputs[0].name).shape)

Expand Down Expand Up @@ -2324,7 +2307,7 @@ def parseNodeCtxt(self,

class GenericGEMMParser(GEMMParser):

def __init__(self, noBiasHoisting = True):
def __init__(self, noBiasHoisting = False):
super().__init__(noBiasHoisting)

def parseNode(self, node: gs.Node) -> (bool):
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading