diff --git a/CHANGELOG.md b/CHANGELOG.md index 9c82f50bb..67a3684c8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ This file contains the changelog for the Deeploy project. The changelog is divid ## Unreleased (Planned Release Target: v0.2.1) ### List of Pull Requests +- Fix missing const's layout transformation and refactor NCHWtoNHWC passes [#122](https://github.com/pulp-platform/Deeploy/pull/122) - Fix aliasing [#125](https://github.com/pulp-platform/Deeploy/pull/125) - Support for 1D Autoencoder [#98](https://github.com/pulp-platform/Deeploy/pull/98) - Refactor Logging for Improved Debugging [#115](https://github.com/pulp-platform/Deeploy/pull/115) @@ -78,6 +79,7 @@ This file contains the changelog for the Deeploy project. The changelog is divid - Removed `fromVariableBuffer` - Refactored `hoistConstant` - Refactored TransientBuffer's `__init__` +- Refactor of the NCHWtoNHWC passes ### Fixed - Prevent node duplication for graphs generated via GraphSurgeon @@ -89,6 +91,7 @@ This file contains the changelog for the Deeploy project. The changelog is divid - Correctly return `signProp` from `setupDeployer` instead of hardcoding the value to `False` in `testMVP.py` - Fixed `Unsqueeze` Op. when using ONNX opset 13 or higher (from attribute to input) - Fixed aliasing +- Missing layout transformation of the const's (bias, mul, add, shift in Conv/RequantizedConv) ### Removed - Delete outdated and unused `.gitlab-ci.yml` file diff --git a/Deeploy/CommonExtensions/OptimizationPasses/TopologyOptimizationPasses/LoweringOptimizationPasses.py b/Deeploy/CommonExtensions/OptimizationPasses/TopologyOptimizationPasses/LoweringOptimizationPasses.py index 7ef9e96ef..a87a641d3 100644 --- a/Deeploy/CommonExtensions/OptimizationPasses/TopologyOptimizationPasses/LoweringOptimizationPasses.py +++ b/Deeploy/CommonExtensions/OptimizationPasses/TopologyOptimizationPasses/LoweringOptimizationPasses.py @@ -3,17 +3,36 @@ # SPDX-License-Identifier: Apache-2.0 from functools import partial -from typing import Iterable, List, Optional, Sequence, Tuple, TypeVar, Union +from typing import List, Optional, Sequence, Tuple, TypeVar, Union import numpy as np import onnx_graphsurgeon as gs -from Deeploy.CommonExtensions.OptimizationPasses.Matchers import Match +from Deeploy.CommonExtensions.OptimizationPasses.Matchers import Match, NonBranchingMatcher from Deeploy.CommonExtensions.OptimizationPasses.PassClasses import ReplaceSequentialPatternPass, SequentialPass, \ contextagnostic from Deeploy.TilingExtension.TilingCodegen import HyperRectangle +def _singleNodePattern(op: str) -> gs.Graph: + tensorIn = gs.Variable("input") + tensorOut = gs.Variable("output") + node = gs.Node(op = op, name = "node", inputs = [tensorIn], outputs = [tensorOut]) + graph = gs.Graph([node], inputs = [tensorIn], outputs = [tensorOut]) + return graph + + +def _isDepthwise(node: gs.Node) -> bool: + if node.op not in ["Conv", "RequantizedConv"]: + return False + + channels_first = node.attrs.get("channels_first", True) + spatialDims = len(node.inputs[1].shape) - 2 + shapeIn = node.inputs[0].shape + chIn = shapeIn[-spatialDims - 1] if channels_first else shapeIn[-1] + return chIn != 1 and node.attrs.get("group", 1) == chIn + + def _createReshape(tensorIn: gs.Tensor, name: str, newShape: Sequence[Union[int, str]], @@ -86,29 +105,27 @@ def _prependSqueezeDims(tensor: gs.Tensor, name: str, axis: Union[int, Sequence[ # Permute (0,1,2,3,...,N-2,N-1) -> (0,1,2,3,...,N-1,N-2) -def _permutationLastTwoDims(N: int) -> List[int]: +def _swapLastTwoDimsPermutation(N: int) -> List[int]: assert N >= 2, "N needs to be larger then 2" - return list(range(N - 2)) + [N - 1, N - 2] + return [*range(N - 2), N - 1, N - 2] -# Permute (0,1,2,3,...,N-1) -> (0,2,3,...,N-1,1) -def _permutationNCHWtoNHWC(N: int) -> List[int]: - assert N >= 3, "N needs to be larger then 3 for this to make any sense" - return [0] + list(range(2, N)) + [1] - - -# Permute (0,1,2,3,...,N-1) -> (0,N-1,1,2,3,...,N-2) -def _permutationNHWCtoNCHW(N: int) -> List[int]: - assert N >= 3, "N needs to be larger then 3 for this to make any sense" - return [0, N - 1] + list(range(1, N - 1)) +# Permute channels first <-> channels last: +# (*, ch, *) <-> (*, *, ch) +def _transformLayoutPermutation(dims: int, spatialDims: int, targetChannelsFirst: bool) -> List[int]: + batchDims = dims - spatialDims - 1 + if targetChannelsFirst: + ch = dims - 1 + nonBatchPerm = [ch, *range(batchDims, ch)] + else: + ch = batchDims + nonBatchPerm = [*range(ch + 1, dims), ch] + return list(range(batchDims)) + nonBatchPerm # Calculate permutation q = p^(-1) s.t. q(p(i)) = i -def _invertPermutation(permutation: List[int]) -> List[int]: - inverse = [0] * len(permutation) - for idx, permIdx in enumerate(permutation): - inverse[permIdx] = idx - return inverse +def _invertPermutation(permutation: Sequence[int]) -> List[int]: + return [permutation.index(i) for i in range(len(permutation))] T = TypeVar('T') @@ -124,78 +141,69 @@ def _permuteHyperRectangle(rect: HyperRectangle, permutation: List[int]) -> Hype return HyperRectangle(tuple(_permute(rect.offset, permutation)), tuple(_permute(rect.dims, permutation))) -def _prependTransposeNode(anchor: gs.Variable, - nodeName: str, - permutation: Iterable[int], - invert: bool = False) -> Tuple[gs.Node, gs.Variable]: - - if invert: - outShape = _permute(anchor.shape, _invertPermutation(permutation)) - else: - outShape = _permute(anchor.shape, permutation) - - anchorTransposeInput = gs.Variable(nodeName + "_Out", dtype = np.float32, shape = outShape) - anchorTransposeNode = gs.Node(name = nodeName, - op = "Transpose", - inputs = [anchorTransposeInput], - outputs = [anchor], - attrs = {'perm': permutation}) - - return anchorTransposeNode, anchorTransposeInput - - -def _appendTransposeNode(anchor: gs.Variable, - nodeName: str, - permutation: Iterable[int], - invert: bool = False) -> (gs.Node, gs.Variable): - - if invert: - outShape = _permute(anchor.shape, _invertPermutation(permutation)) - else: - outShape = _permute(anchor.shape, permutation) - - anchorTransposeOutput = gs.Variable(nodeName + "_In", dtype = np.float32, shape = outShape) - anchorTransposeNode = gs.Node(name = nodeName, - op = "Transpose", - inputs = [anchor], - outputs = [anchorTransposeOutput], - attrs = {'perm': permutation}) - - return anchorTransposeNode, anchorTransposeOutput +def _prependTranspose(tensor: gs.Variable, prevNode: gs.Node, perm: List[int]) -> gs.Node: + prevNodeTensorIdx = prevNode.outputs.index(tensor) + preTransposeTensor = gs.Variable(f"{prevNode.name}_{tensor.name}_pre_transposed", tensor.dtype, + _permute(tensor.shape, _invertPermutation(perm))) + transposeNode = gs.Node(op = "Transpose", + name = f"{prevNode.name}_{tensor.name}_pre_transpose", + attrs = {"perm": perm}, + inputs = [preTransposeTensor], + outputs = [tensor]) + prevNode.outputs[prevNodeTensorIdx] = preTransposeTensor + return transposeNode + + +def _appendTranspose(tensor: gs.Variable, nextNode: gs.Node, perm: List[int]) -> gs.Node: + nextNodeTensorIdx = nextNode.inputs.index(tensor) + transposedTensor = gs.Variable(f"{nextNode.name}_{tensor.name}_transposed", tensor.dtype, + _permute(tensor.shape, perm)) + transposeNode = gs.Node(op = "Transpose", + name = f"{nextNode.name}_{tensor.name}_transpose", + attrs = {"perm": perm}, + inputs = [tensor], + outputs = [transposedTensor]) + nextNode.inputs[nextNodeTensorIdx] = transposedTensor + return transposeNode + + +def _transformLayoutConst(const: gs.Constant, spatialDims: int, targetChannelsFirst: bool) -> None: + assert isinstance(const, gs.Constant) + if len(const.shape) < 2: + return + perm = _transformLayoutPermutation(len(const.shape), spatialDims, targetChannelsFirst) + const.values = const.values.transpose(perm) + + +def _transformLayoutDwWeightConst(const: gs.Constant, targetChannelsFirst: bool) -> None: + assert not targetChannelsFirst, "Target layout should be channels_last!" + assert isinstance(const, gs.Constant) + dims = len(const.shape) + perm = [*range(1, dims), 0] + const.values = const.values.transpose(perm) def _transposeMatMulInputs_fun(graph: gs.Graph, match: Match, name: str): + node = next(iter((match.nodes_map.values()))) - matched_nodes = [m for k, m in match.nodes_map.items()] - gemmNode = matched_nodes[0] - - inputA = gemmNode.inputs[0] - inputB = gemmNode.inputs[1] - - if 'transA' not in gemmNode.attrs: - gemmNode.attrs['transA'] = 0 - if 'transB' not in gemmNode.attrs: - gemmNode.attrs['transB'] = 0 - if 'alpha' not in gemmNode.attrs: - gemmNode.attrs['alpha'] = 1.0 - if 'beta' not in gemmNode.attrs: - gemmNode.attrs['beta'] = 1.0 + node.attrs['transA'] = node.attrs.get('transA', 0) + node.attrs['transB'] = node.attrs.get('transB', 0) + node.attrs['alpha'] = node.attrs.get('alpha', 1.0) + node.attrs['beta'] = node.attrs.get('beta', 1.0) # Prepend transpose on A if it's transposed - if gemmNode.attrs['transA'] != 0: - anchorTransposeNode, anchorTransposeOutput = _appendTransposeNode(inputA, name + "_A", - _permutationLastTwoDims(len(inputA.shape))) - gemmNode.inputs[0] = anchorTransposeOutput - gemmNode.attrs['transA'] = 0 - graph.nodes.append(anchorTransposeNode) + if node.attrs['transA'] == 1: + tensorA = node.inputs[0] + perm = _swapLastTwoDimsPermutation(len(tensorA.shape)) + graph.nodes.append(_appendTranspose(tensorA, node, perm)) + node.attrs['transA'] = False # Prepend transpose on B if it's not transposed - if gemmNode.attrs['transB'] != 1: - anchorTransposeNode, anchorTransposeOutput = _appendTransposeNode(inputB, name + "_B", - _permutationLastTwoDims(len(inputB.shape))) - gemmNode.inputs[1] = anchorTransposeOutput - gemmNode.attrs['transB'] = 1 - graph.nodes.append(anchorTransposeNode) + if node.attrs['transB'] == 0: + tensorB = node.inputs[1] + perm = _swapLastTwoDimsPermutation(len(tensorB.shape)) + graph.nodes.append(_appendTranspose(tensorB, node, perm)) + node.attrs['transB'] = True return graph @@ -206,62 +214,40 @@ def _transposeMatMulInputs_fun(graph: gs.Graph, match: Match, name: str): class TransposeMatmulInputsPass(ReplaceSequentialPatternPass): def __init__(self): - graph = gs.Graph() - _input = gs.Variable(name = 'input_1') - output = graph.layer(inputs = [_input], outputs = ['gemmOut'], op = 'RequantizedGemm', name = 'requantizedGemm') - graph.outputs.append(output) - graph.inputs.append(_input) - + graph = _singleNodePattern("RequantizedGemm") name = "_TRANSPOSE_MATMUL_INPUTS_PASS" super().__init__(graph, _transposeMatMulInputs_fun, name) def _NCHWtoNHWC_fun(graph: gs.Graph, match: Match, name: str, default_channels_first: bool = True): + node = next(iter((match.nodes_map.values()))) - matched_nodes = [m for k, m in match.nodes_map.items()] - opNode = matched_nodes[0] - node_op = opNode.op - - # Default for non-existent channels_first: True - channels_first = opNode.attrs["channels_first"] if "channels_first" in opNode.attrs else True - + channels_first = node.attrs.get("channels_first", True) if (channels_first != default_channels_first): + tensorIn = node.inputs[0] + tensorOut = node.outputs[0] - inputNode = opNode.inputs[0] - outputNode = opNode.outputs[0] - - inPermute = _permutationNCHWtoNHWC(len(inputNode.shape)) - outPermute = _permutationNHWCtoNCHW(len(outputNode.shape)) - - inputTransposeNode, inputTransposeOutput = _appendTransposeNode(inputNode, name + "_TransposeIn", inPermute) - outputTransposeNode, outputTransposeInput = _prependTransposeNode(outputNode, - name + "_TransposeOut", - outPermute, - invert = True) - - opNode.inputs[0] = inputTransposeOutput - opNode.outputs[0] = outputTransposeInput - graph.nodes.append(inputTransposeNode) - graph.nodes.append(outputTransposeNode) - - if node_op in ["RequantizedConv", "Conv"]: + if node.op in ["RequantizedConv", "Conv"]: + spatialDims = len(node.inputs[1].shape) - 2 + elif node.op == "MaxPool": + spatialDims = len(node.attrs["kernel_shape"]) + elif node.op == "Pad": + spatialDims = 2 # Hack based on current status + else: + raise ValueError(f"Cannot determine spatialDims for node {node.name} with operator {node.op}") - # Non DW-Type: - if opNode.attrs['group'] == 1: - weightNode = opNode.inputs[1] - weightTransposeNode, weightTransposeOutput = _appendTransposeNode(weightNode, name + "TransposeWeight", - inPermute) + permuteIn = _transformLayoutPermutation(len(tensorIn.shape), spatialDims, default_channels_first) + graph.nodes.append(_appendTranspose(tensorIn, node, permuteIn)) - else: - DWPermute = [inPermute[-1]] + inPermute[1:-1] + [inPermute[0]] - weightNode = opNode.inputs[1] - weightTransposeNode, weightTransposeOutput = _appendTransposeNode(weightNode, name + "TransposeWeight", - DWPermute) + permuteOut = _transformLayoutPermutation(len(tensorOut.shape), spatialDims, channels_first) + graph.nodes.append(_prependTranspose(tensorOut, node, permuteOut)) - opNode.inputs[1] = weightTransposeOutput - graph.nodes.append(weightTransposeNode) + if node.op in ["Conv", "RequantizedConv"]: + # In the case of Conv: [weights, opt. bias], RequantizedConv: [weights, mul, add, opt. shift] + for tensor in node.inputs[1:]: + _transformLayoutConst(tensor, spatialDims, default_channels_first) - opNode.attrs["channels_first"] = default_channels_first + node.attrs["channels_first"] = default_channels_first return graph @@ -270,12 +256,7 @@ def _NCHWtoNHWC_fun(graph: gs.Graph, match: Match, name: str, default_channels_f class NCHWtoNHWCMaxPoolPass(ReplaceSequentialPatternPass): def __init__(self, default_channels_first: bool = True): - graph = gs.Graph() - _input = gs.Variable(name = 'input_1') - output = graph.layer(inputs = [_input], outputs = ['maxPool'], op = 'MaxPool', name = 'MaxPool') - graph.outputs.append(output) - graph.inputs.append(_input) - + graph = _singleNodePattern(op = "MaxPool") name = "_NCHW_TO_NHWC_MAXPOOL_PASS" super().__init__(graph, partial(_NCHWtoNHWC_fun, default_channels_first = default_channels_first), name) @@ -284,214 +265,129 @@ def __init__(self, default_channels_first: bool = True): class NCHWtoNHWCConvPass(ReplaceSequentialPatternPass): def __init__(self, default_channels_first: bool = True): - graph = gs.Graph() - _input = gs.Variable(name = 'input_1') - output = graph.layer(inputs = [_input], outputs = ['convOut'], op = 'Conv', name = 'conv') - graph.outputs.append(output) - graph.inputs.append(_input) - - name = "_NCHW_TO_NHWC_CONV_PASS" - super().__init__(graph, partial(_NCHWtoNHWC_fun, default_channels_first = default_channels_first), name) - - -@contextagnostic -class NCHWtoNHWCRequantizedConvPass(ReplaceSequentialPatternPass): - - def __init__(self, default_channels_first: bool = True): - graph = gs.Graph() - _input = gs.Variable(name = 'input_1') - output = graph.layer(inputs = [_input], outputs = ['convOut'], op = 'RequantizedConv', name = 'requantizedConv') - graph.outputs.append(output) - graph.inputs.append(_input) - + graph = _singleNodePattern(op = "Conv|RequantizedConv") name = "_NCHW_TO_NHWC_CONV_PASS" - super().__init__(graph, partial(_NCHWtoNHWC_fun, default_channels_first = default_channels_first), name) + super().__init__(graph, partial(_NCHWtoNHWC_fun, default_channels_first = default_channels_first), name, + NonBranchingMatcher(regex_op = True)) @contextagnostic class NCHWtoNHWCPadPass(ReplaceSequentialPatternPass): def __init__(self, default_channels_first: bool = True): - graph = gs.Graph() - _input = gs.Variable(name = 'input_1') - output = graph.layer(inputs = [_input], outputs = ['padOut'], op = 'Pad', name = 'pad') - graph.outputs.append(output) - graph.inputs.append(_input) - + graph = _singleNodePattern(op = "Pad") name = "_NCHW_TO_NHWC_PAD_PASS" super().__init__(graph, partial(_NCHWtoNHWC_fun, default_channels_first = default_channels_first), name) -@contextagnostic -class NCHWtoNHWCPass(SequentialPass): - - def __init__(self, default_channels_first: bool = True): - passes = [ - NCHWtoNHWCPadPass(default_channels_first), - NCHWtoNHWCMaxPoolPass(default_channels_first), - NCHWtoNHWCConvPass(default_channels_first), - NCHWtoNHWCRequantizedConvPass(default_channels_first), - ] - super().__init__(*passes) - - -def _PULPDWNCHWtoNHWC_fun(graph: gs.Graph, match: Match, name: str, default_channels_first: bool = True): - - matched_nodes = [m for k, m in match.nodes_map.items()] - opNode = matched_nodes[0] - node_op = opNode.op +def _NCWHtoNHWC_dw_fun(graph: gs.Graph, match: Match, name: str, default_channels_first: bool) -> gs.Graph: + node = next(iter((match.nodes_map.values()))) - if opNode.attrs['group'] == 1: + if not _isDepthwise(node): return graph - if (("channels_first" in opNode.attrs and opNode.attrs["channels_first"] != default_channels_first) - or ("channels_first" not in opNode.attrs and default_channels_first == 0)): - - inputNode = opNode.inputs[0] - outputNode = opNode.outputs[0] + channels_first = node.attrs.get("channels_first", True) + if (channels_first != default_channels_first): + tensorIn = node.inputs[0] + tensorOut = node.outputs[0] - inPermute = _permutationNCHWtoNHWC(len(inputNode.shape)) - outPermute = _permutationNHWCtoNCHW(len(outputNode.shape)) + spatialDims = len(node.inputs[1].shape) - 2 - outputTransposeNode, outputTransposeInput = _prependTransposeNode(outputNode, - name + "_TransposeOut", - outPermute, - invert = True) + permuteIn = _transformLayoutPermutation(len(tensorIn.shape), spatialDims, default_channels_first) + permuteOut = _transformLayoutPermutation(len(tensorOut.shape), spatialDims, channels_first) - opNode.outputs[0] = outputTransposeInput - graph.nodes.append(outputTransposeNode) + graph.nodes.append(_appendTranspose(tensorIn, node, permuteIn)) + graph.nodes.append(_prependTranspose(tensorOut, node, permuteOut)) - if node_op == "RequantizedConv": + _transformLayoutDwWeightConst(node.inputs[1], default_channels_first) # weights - weightNode = opNode.inputs[1] - weightTransposeNode, weightTransposeOutput = _appendTransposeNode(weightNode, name + "TransposeWeight", - inPermute) - opNode.inputs[1] = weightTransposeOutput - graph.nodes.append(weightTransposeNode) + if len(node.inputs) > 2: + # In the case of Conv: [opt. bias], RequantizedConv: [mul, add, opt. shift] + for tensor in node.inputs[2:]: + _transformLayoutConst(tensor, spatialDims, default_channels_first) # bias - opNode.attrs["channels_first"] = default_channels_first + node.attrs["channels_first"] = default_channels_first return graph @contextagnostic -class PULPDWConvPass(ReplaceSequentialPatternPass): +class NCHWtoNHWCDwConvPass(ReplaceSequentialPatternPass): def __init__(self, default_channels_first: bool = True): - graph = gs.Graph() - _input = gs.Variable(name = 'input_1') - output = graph.layer(inputs = [_input], outputs = ['convOut'], op = 'RequantizedConv', name = 'requantizedConv') - graph.outputs.append(output) - graph.inputs.append(_input) + graph = _singleNodePattern(op = "Conv|RequantizedConv") + name = "_NCHW_TO_NHWC_DW_CONV_PASS" + super().__init__(graph, partial(_NCWHtoNHWC_dw_fun, default_channels_first = default_channels_first), name, + NonBranchingMatcher(regex_op = True)) - name = "_NCHW_TO_NHWC_CONV_PASS" - super().__init__(graph, partial(_PULPDWNCHWtoNHWC_fun, default_channels_first = default_channels_first), name) - - -def _PULPDenseNCHWtoNHWC_fun(graph: gs.Graph, match: Match, name: str, default_channels_first: bool = True): - matched_nodes = [m for k, m in match.nodes_map.items()] - opNode = matched_nodes[0] +def _PULP_NCHWtoNHWC_dw_fun(graph: gs.Graph, match: Match, name: str, default_channels_first: bool = True): + node = next(iter((match.nodes_map.values()))) - node_group = opNode.attrs['group'] if 'group' in opNode.attrs else 1 - if node_group != 1: + if not _isDepthwise(node): return graph - return _NCHWtoNHWC_fun(graph, match, name, default_channels_first) - - -@contextagnostic -class PULPNCHWtoNHWCDenseRequantizedConvPass(ReplaceSequentialPatternPass): - - def __init__(self, default_channels_first: bool = True): - graph = gs.Graph() - _input = gs.Variable(name = 'input_1') - output = graph.layer(inputs = [_input], outputs = ['convOut'], op = 'RequantizedConv', name = 'requantizedConv') - graph.outputs.append(output) - graph.inputs.append(_input) - - name = "_NCHW_TO_NHWC_CONV_PASS" - super().__init__(graph, partial(_PULPDenseNCHWtoNHWC_fun, default_channels_first = default_channels_first), - name) - - -def _NeurekaDWNCHWtoNHWC_fun(graph: gs.Graph, match: Match, name: str, default_channels_first: bool = True): - - matched_nodes = [m for k, m in match.nodes_map.items()] - opNode = matched_nodes[0] + channels_first = node.attrs.get("channels_first", True) + if (channels_first != default_channels_first): + tensorOut = node.outputs[0] - node_group = opNode.attrs['group'] if 'group' in opNode.attrs else 1 - if node_group == 1: - return graph + spatialDims = len(node.inputs[1].shape) - 2 - return _NCHWtoNHWC_fun(graph, match, name, default_channels_first) + # LMACAN: PULP DW doesn't transpose the input + permuteOut = _transformLayoutPermutation(len(tensorOut.shape), spatialDims, channels_first) + graph.nodes.append(_prependTranspose(tensorOut, node, permuteOut)) -@contextagnostic -class NeurekaNCHWtoNHWCDWRequantizedConvPass(ReplaceSequentialPatternPass): + # RequantizedConv: [weights, mul, add, opt. shift] + for tensor in node.inputs[1:]: + _transformLayoutConst(tensor, spatialDims, default_channels_first) - def __init__(self, default_channels_first: bool = True): - graph = gs.Graph() - _input = gs.Variable(name = 'input_1') - output = graph.layer(inputs = [_input], outputs = ['convOut'], op = 'RequantizedConv', name = 'requantizedConv') - graph.outputs.append(output) - graph.inputs.append(_input) + node.attrs["channels_first"] = default_channels_first - name = "_NCHW_TO_NHWC_CONV_PASS" - super().__init__(graph, partial(_NeurekaDWNCHWtoNHWC_fun, default_channels_first = default_channels_first), - name) + return graph @contextagnostic -class PULPNCHWtoNHWCDenseConvPass(ReplaceSequentialPatternPass): +class PULPNCHWtoNHWCDwConvPass(ReplaceSequentialPatternPass): def __init__(self, default_channels_first: bool = True): - graph = gs.Graph() - _input = gs.Variable(name = 'input_1') - output = graph.layer(inputs = [_input], outputs = ['convOut'], op = 'Conv', name = 'conv') - graph.outputs.append(output) - graph.inputs.append(_input) - - name = "_NCHW_TO_NHWC_CONV_PASS" - super().__init__(graph, partial(_PULPDenseNCHWtoNHWC_fun, default_channels_first = default_channels_first), - name) + graph = _singleNodePattern(op = "RequantizedConv") + name = "_PULP_NCHW_TO_NHWC_DW_CONV_PASS" + super().__init__(graph, partial(_PULP_NCHWtoNHWC_dw_fun, default_channels_first = default_channels_first), name) @contextagnostic -class PULPNCHWtoNHWCPass(SequentialPass): +class NCHWtoNHWCPass(SequentialPass): def __init__(self, default_channels_first: bool = True): passes = [ NCHWtoNHWCPadPass(default_channels_first), NCHWtoNHWCMaxPoolPass(default_channels_first), - PULPDWConvPass(default_channels_first), - PULPNCHWtoNHWCDenseConvPass(default_channels_first), - PULPNCHWtoNHWCDenseRequantizedConvPass(default_channels_first), + NCHWtoNHWCDwConvPass(default_channels_first), + NCHWtoNHWCConvPass(default_channels_first), ] super().__init__(*passes) @contextagnostic -class NeurekaNCHWtoNHWCPass(SequentialPass): +class PULPNCHWtoNHWCPass(SequentialPass): def __init__(self, default_channels_first: bool = True): passes = [ NCHWtoNHWCPadPass(default_channels_first), NCHWtoNHWCMaxPoolPass(default_channels_first), - NeurekaNCHWtoNHWCDWRequantizedConvPass(default_channels_first), - PULPNCHWtoNHWCDenseConvPass(default_channels_first), - PULPNCHWtoNHWCDenseRequantizedConvPass(default_channels_first), + PULPNCHWtoNHWCDwConvPass(default_channels_first), + NCHWtoNHWCConvPass(default_channels_first), ] super().__init__(*passes) def _requantized_gemm_to_pw_fun(graph: gs.Graph, match: Match, name: str): - matched_nodes = list(match.nodes_map.values()) - requantizedGemm = matched_nodes[0] + node = next(iter((match.nodes_map.values()))) - matrixA: gs.Variable = requantizedGemm.inputs[0] - matrixB: gs.Constant = requantizedGemm.inputs[1] - matrixY: gs.Variable = requantizedGemm.outputs[0] + matrixA: gs.Variable = node.inputs[0] + matrixB: gs.Constant = node.inputs[1] + matrixY: gs.Variable = node.outputs[0] # Check matrixB is a constant, otherwise don't transform if not isinstance(matrixB, gs.Constant): @@ -506,10 +402,22 @@ def _requantized_gemm_to_pw_fun(graph: gs.Graph, match: Match, name: str): # Pointwise with HWC layout (channels_first == False) + # Defaults + node.attrs['transA'] = node.attrs.get('transA', 0) + node.attrs['transB'] = node.attrs.get('transB', 0) + node.attrs['alpha'] = node.attrs.get('alpha', 1.0) + node.attrs['beta'] = node.attrs.get('beta', 1.0) + # If transA is set then the matrix is of shape [B x K x M] and it needs to be transposed, otherwise its shape is [B x M x K] - if 'transA' in requantizedGemm.attrs and requantizedGemm.attrs['transA'] == 1: - matrixATransposeNode, matrixA = _appendTransposeNode(matrixA, name, _permutationLastTwoDims(len(matrixA.shape))) - graph.nodes.append(matrixATransposeNode) + if node.attrs['transA'] == 1: + perm = _swapLastTwoDimsPermutation(len(matrixA.shape)) + graph.nodes.append(_appendTranspose(matrixA, node, perm)) + matrixA = node.inputs[0] + + # If transB is set then the matrix is of shape [N x K] and it doesn't need to be transposed, otherwise its shape is [K x N] and it has to be transposed + if node.attrs['transB'] == 0: + perm = _swapLastTwoDimsPermutation(len(matrixB.shape)) + matrixB.values = matrixB.values.transpose(perm) # Align dimensions for convolution expandAxis = [] @@ -522,11 +430,6 @@ def _requantized_gemm_to_pw_fun(graph: gs.Graph, match: Match, name: str): matrixAExpandDimsNode, pwIn = _appendExpandDims(matrixA, name, axis = expandAxis) graph.nodes.append(matrixAExpandDimsNode) - # If transB is set then the matrix is of shape [N x K] and it doesn't need to be transposed, otherwise its shape is [K x N] and it has to be transposed - if not 'transB' in requantizedGemm.attrs or requantizedGemm.attrs['transB'] == 0: - # matrixBTransposed, shape [N x K] - matrixBTransposeNode, matrixB = _appendTransposeNode(matrixB, name, _permutationLastTwoDims(len(matrixB.shape))) - graph.nodes.append(matrixBTransposeNode) # pwWeight, shape [N x 1 x 1 x K] matrixBExpandDimsNode, pwWeight = _appendExpandDims(matrixB, name, axis = (1, 2)) graph.nodes.append(matrixBExpandDimsNode) @@ -548,14 +451,14 @@ def _requantized_gemm_to_pw_fun(graph: gs.Graph, match: Match, name: str): 'kernel_shape': [1, 1], 'pads': [0, 0, 0, 0], 'strides': [1, 1], - 'div': requantizedGemm.attrs['div'], - 'n_levels_out': requantizedGemm.attrs['n_levels_out'], - 'shift': requantizedGemm.attrs['shift'], - 'signed': requantizedGemm.attrs['signed'], + 'div': node.attrs['div'], + 'n_levels_out': node.attrs['n_levels_out'], + 'shift': node.attrs['shift'], + 'signed': node.attrs['signed'], } - add = requantizedGemm.inputs[2] - mul = requantizedGemm.inputs[3] + add = node.inputs[2] + mul = node.inputs[3] _inputs = [pwIn, pwWeight, mul, add] @@ -566,9 +469,9 @@ def _requantized_gemm_to_pw_fun(graph: gs.Graph, match: Match, name: str): attrs = pwAttrs) graph.nodes.append(pw) - requantizedGemm.inputs.clear() - requantizedGemm.outputs.clear() - graph.nodes.remove(requantizedGemm) + node.inputs.clear() + node.outputs.clear() + graph.nodes.remove(node) return graph @@ -577,23 +480,16 @@ def _requantized_gemm_to_pw_fun(graph: gs.Graph, match: Match, name: str): class RequantizedGemmToPwPass(ReplaceSequentialPatternPass): def __init__(self): - graph = gs.Graph() - _input = gs.Variable(name = 'input_1') - output = graph.layer(inputs = [_input], outputs = ['out'], op = 'RequantizedGemm', name = 'requantizedGemm') - graph.outputs.append(output) - graph.inputs.append(_input) - + graph = _singleNodePattern("RequantizedGemm") super().__init__(graph, _requantized_gemm_to_pw_fun, "_REQUANTIZED_GEMM_TO_PW_PASS") def _remove_global_output_reshape_fun(graph: gs.Graph, match: Match, name: str): - matched_nodes = list(match.nodes_map.values()) - reshape = matched_nodes[0] - - isGlobalOutput = len(reshape.outputs[0].outputs) == 0 + node = next(iter((match.nodes_map.values()))) + isGlobalOutput = len(node.outputs[0].outputs) == 0 if isGlobalOutput: - graph.deleteNode(reshape) + graph.deleteNode(node) return graph @@ -602,26 +498,19 @@ def _remove_global_output_reshape_fun(graph: gs.Graph, match: Match, name: str): class RemoveGlobalOutputReshapePass(ReplaceSequentialPatternPass): def __init__(self): - graph = gs.Graph() - _input = gs.Variable(name = 'input_1') - output = graph.layer(inputs = [_input], outputs = ['out'], op = 'Reshape', name = 'reshape') - graph.outputs.append(output) - graph.inputs.append(_input) - + graph = _singleNodePattern("Reshape") super().__init__(graph, _remove_global_output_reshape_fun, "_REMOVE_GLOBAL_OUTPUT_RESHAPE_PASS") def _remove_empty_conv_bias_fun(graph: gs.Graph, match: Match, name: str): - # Extract matched convolution - matched_nodes = list(match.nodes_map.values()) - opNode = matched_nodes[0] + node = next(iter((match.nodes_map.values()))) - # Check if the Conv node has a bias input - # If it does, check if the bias only contains zeros - if len(opNode.inputs) > 2 and np.all(opNode.inputs[2].values == 0): - del opNode.inputs[2] + # Check if the node has an all-zero bias and remove it + if len(node.inputs) == 3: + bias = node.inputs[2] + if isinstance(bias, gs.Constant) and np.all(bias.values == 0): + del node.inputs[2] - # Return updated graph return graph @@ -629,13 +518,6 @@ def _remove_empty_conv_bias_fun(graph: gs.Graph, match: Match, name: str): class RemoveEmptyConvBiasPass(ReplaceSequentialPatternPass): def __init__(self): - # Initialized graph with a Conv node - graph = gs.Graph() - _input = gs.Variable(name = 'input_1') - output = graph.layer(inputs = [_input], outputs = ['convOut'], op = 'Conv', name = 'conv') - graph.outputs.append(output) - graph.inputs.append(_input) - - # Apply function + graph = _singleNodePattern("Conv") name = "_REMOVE_EMPTY_CONV_BIAS_PASS" super().__init__(graph, _remove_empty_conv_bias_fun, name) diff --git a/Deeploy/Targets/Neureka/Deployer.py b/Deeploy/Targets/Neureka/Deployer.py index e9b966569..be34e1f4d 100644 --- a/Deeploy/Targets/Neureka/Deployer.py +++ b/Deeploy/Targets/Neureka/Deployer.py @@ -8,7 +8,7 @@ from Deeploy.AbstractDataTypes import Pointer from Deeploy.CommonExtensions.OptimizationPasses.TopologyOptimizationPasses.LoweringOptimizationPasses import \ - NeurekaNCHWtoNHWCPass, PULPNCHWtoNHWCPass + NCHWtoNHWCPass, PULPNCHWtoNHWCPass from Deeploy.DeeployTypes import DeploymentPlatform, TopologyOptimizer from Deeploy.Targets.Neureka.TopologyOptimizationPasses.Passes import ConvEngineDiscolorationPass, \ NeurekaOptimizationPass @@ -33,7 +33,7 @@ def __init__(self, if self.Platform.engines[0].enable3x3: for idx in range(len(self.loweringOptimizer.passes)): if isinstance(self.loweringOptimizer.passes[idx], PULPNCHWtoNHWCPass): - self.loweringOptimizer.passes[idx] = NeurekaNCHWtoNHWCPass(self.default_channels_first) + self.loweringOptimizer.passes[idx] = NCHWtoNHWCPass(self.default_channels_first) self.loweringOptimizer.passes += [ ConvEngineDiscolorationPass(),