diff --git a/Deeploy/CommonExtensions/NetworkDeployers/SignPropDeployer.py b/Deeploy/CommonExtensions/NetworkDeployers/SignPropDeployer.py index fc9adf96b..ff0f8ffa1 100644 --- a/Deeploy/CommonExtensions/NetworkDeployers/SignPropDeployer.py +++ b/Deeploy/CommonExtensions/NetworkDeployers/SignPropDeployer.py @@ -7,7 +7,7 @@ import onnx_graphsurgeon as gs from Deeploy.AbstractDataTypes import Pointer -from Deeploy.DeeployTypes import DeploymentPlatform, NetworkDeployer, TopologyOptimizer +from Deeploy.DeeployTypes import DeploymentPlatform, NetworkDeployer, OperatorDescriptor, TopologyOptimizer class SignPropDeployer(NetworkDeployer): @@ -17,12 +17,13 @@ def __init__(self, deploymentPlatform: DeploymentPlatform, inputTypes: Dict[str, Type[Pointer]], loweringOptimizer: TopologyOptimizer, + operatorDescriptors: Dict[str, OperatorDescriptor], scheduler: Callable = lambda x: x, name: str = 'DeeployNetwork', default_channels_first: bool = True, deeployStateDir: str = "DeeployState", inputOffsets: Dict[str, int] = {}): - super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, scheduler, name, + super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, operatorDescriptors, scheduler, name, default_channels_first, deeployStateDir) if inputOffsets == {}: diff --git a/Deeploy/DeeployTypes.py b/Deeploy/DeeployTypes.py index b638cbd59..487dd376e 100644 --- a/Deeploy/DeeployTypes.py +++ b/Deeploy/DeeployTypes.py @@ -1054,6 +1054,155 @@ def copy(self) -> NetworkContext: return copy.copy(self) +class IoDesc: + + def __init__(self, required: Union[str, List[str]], optional: Union[str, List[str]] = []) -> None: + if isinstance(required, str): + required = [required] + self.required = required + if isinstance(optional, str): + optional = [optional] + self.optional = optional + + def symbolicName(self, idx: int) -> str: + return (self.required + self.optional)[idx] + + def checkTensors(self, tensors: Sequence[gs.Tensor]) -> bool: + return len(tensors) >= len(self.required) and \ + len(tensors) <= len(self.required) + len(self.optional) + + +class VariadicIoDesc(IoDesc): + + def __init__(self, baseName: str, minNumTensors: int = 0) -> None: + self.baseName = baseName + self.minNumTensors = minNumTensors + + def symbolicName(self, idx: int) -> str: + return f"{self.baseName}_{idx}" + + def checkTensors(self, tensors: Sequence[gs.Tensor]) -> bool: + return len(tensors) >= self.minNumTensors + + +@dataclass +class AttrDesc: + name: str + unpacker: Callable[[Any], Any] + default: Optional[Union[Any, Callable[[gs.Node], Any]]] = None + + @staticmethod + def _constUnpack(value: Any) -> Any: + if isinstance(value, gs.Constant): + return value.values.tolist() + elif isinstance(value, np.ndarray): + return value.tolist() + # LMACAN: hacky way to detect a 0-dim numpy array + elif hasattr(value, "ndim") and value.ndim == 0 and hasattr(value, "item"): + return value.item() + else: + return value + + def unpack(self, value: Any) -> Union[int, float, List[int], List[float]]: + return self.unpacker(self._constUnpack(value)) + + def getDefault(self, node: gs.Node) -> Any: + if callable(self.default): + return self.default(node) + else: + return self.default + + +@dataclass +class OperatorDescriptor: + inputDescriptor: IoDesc + outputDescriptor: IoDesc + attrDescriptors: List[AttrDesc] + + def check(self, node: gs.Node) -> bool: + """This method checks whether the node is valid. + + Parameters + ---------- + node : gs.Node + Graphsurgeon node to be validated + + Returns + ------- + bool : node validity + + """ + valid = True + + if not self.inputDescriptor.checkTensors(node.inputs): + # TODO: Change to logging + print(f"[ERROR OP {node.op}] Invalid input tensors: {[t.name for t in node.inputs]}") + valid = False + + if not self.outputDescriptor.checkTensors(node.outputs): + # TODO: Change to logging + print(f"[ERROR OP {node.op}] Invalid output tensors: {[t.name for t in node.outputs]}") + valid = False + + for attrDesc in self.attrDescriptors: + if attrDesc.default is None and not attrDesc.name in node.attrs: + # TODO: Change to logging + print(f"[ERROR OP {node.op}] Missing attribute {attrDesc.name}") + valid = False + + return valid + + def canonicalize(self, node: gs.Node, opset: int) -> bool: + _ = opset + for desc in self.attrDescriptors: + if desc.default is None: + value = node.attrs[desc.name] + else: + value = node.attrs.get(desc.name, desc.getDefault(node)) + try: + node.attrs[desc.name] = desc.unpack(value) + except Exception as e: + raise ValueError(f"[ERROR OP {node.op}] Error unpacking the attribute {desc.name}. {e}") from e + return True + + def parseTensors(self, ctxt: NetworkContext, tensors: Sequence[gs.Tensor], + ioDesc: IoDesc) -> OperatorRepresentation: + opRepr = {} + for i, tensor in enumerate(tensors): + symName = ioDesc.symbolicName(i) + buffer = ctxt.lookup(tensor.name) + assert isinstance(buffer, VariableBuffer) + opRepr[symName] = buffer.name + opRepr[f"{symName}_shape"] = buffer.shape + opRepr[f"{symName}_size"] = math.prod(buffer.shape) + opRepr[f"{symName}_type"] = buffer._type + return opRepr + + def parseAttrs(self, node: gs.Node) -> OperatorRepresentation: + return node.attrs.copy() + + def parse(self, ctxt: NetworkContext, node: gs.Node) -> OperatorRepresentation: + opReprs = { + "input tensors": self.parseTensors(ctxt, node.inputs, self.inputDescriptor), + "output tesnors": self.parseTensors(ctxt, node.outputs, self.outputDescriptor), + "attributes": self.parseAttrs(node), + } + + for (firstName, firstOpRepr), (secondName, secondOpRepr) in itertools.combinations(opReprs.items(), 2): + firstKeySet = set(firstOpRepr.keys()) + secondKeySet = set(secondOpRepr.keys()) + assert firstKeySet.isdisjoint(secondKeySet), \ + f"[PARSE ERROR] (Node: {node.name}, Op: {node.op}) " \ + f"Keys from parsing {firstName} clash with the keys from parsing {secondName}. "\ + f"Overlapping keys: {firstKeySet ^ secondKeySet}" + + resultOpRepr = {} + for opRepr in opReprs.values(): + resultOpRepr.update(opRepr) + + return resultOpRepr + + class NodeParser(): """Deeploy's core Parser class. Analyzes network nodes and evaluates whether they can be mapped by it. @@ -1177,7 +1326,9 @@ def _unpack_const(attr) -> Union[int, float]: The attributes can either be a numpy scalar value or a Constant tensor. This expects the numpy value to be of size 1. """ - if isinstance(attr, gs.Constant): + if isinstance(attr, (int, float, bool, str)): + return attr + elif isinstance(attr, gs.Constant): value = attr.values elif isinstance(attr, np.ndarray): value = attr @@ -1898,44 +2049,38 @@ def broadcast(self, ctxt: NetworkContext, default_channels_first: bool = True) - inputShapes = [ctxt.lookup(node.name).shape for node in self.node.inputs] outputShapes = [ctxt.lookup(node.name).shape for node in self.node.outputs] - if not "channels_first" in self.mapper.parser.operatorRepresentation: - channels_first = default_channels_first - else: - channels_first = self.mapper.parser.operatorRepresentation['channels_first'] + opRepr = self.mapper.parser.operatorRepresentation + channels_first = opRepr.get("channels_first", default_channels_first) + newInputShapes, newOutputShapes = self.computeShapes(inputShapes, outputShapes, opRepr, channels_first) - newInputShapes, newOutputShapes = self.computeShapes(inputShapes, outputShapes, - self.mapper.parser.operatorRepresentation, channels_first) + for tensor, shape in zip(self.node.inputs + self.node.outputs, newInputShapes + newOutputShapes): + buffer = ctxt.lookup(tensor.name) + assert isinstance(buffer, VariableBuffer) - for node, newShape in zip(self.node.inputs + self.node.outputs, newInputShapes + newOutputShapes): - if ctxt.is_local(node.name): - ctxt.localObjects[node.name].shape = newShape + if ctxt.is_local(tensor.name): + buffer.shape = shape # Update shape of tensors in onnx graph - node.shape = newShape + tensor.shape = shape # WIESEP: It is possible that the type was not yet set, so we assume some default type # At this state, we assume that all local buffers are float32 type inference is not yet done. - if node.dtype is None: - node.dtype = np.float32 + if tensor.dtype is None: + tensor.dtype = np.float32 - elif ctxt.is_global(node.name): - ctxt.globalObjects[node.name].shape = newShape - if isinstance(ctxt.globalObjects[node.name], ConstantBuffer): + elif ctxt.is_global(tensor.name): + buffer.shape = shape + if isinstance(buffer, ConstantBuffer): # If the number of elements is equal, reshape - if np.prod(ctxt.globalObjects[node.name].values.shape) == np.prod(newShape): - ctxt.globalObjects[node.name].values.reshape(newShape) + if np.prod(buffer.values.shape) == np.prod(shape): + buffer.values.reshape(shape) # The number of elements SHOULD be lower, and we broadcast else: try: - ctxt.globalObjects[node.name].values = np.broadcast_to(ctxt.globalObjects[node.name].values, - newShape) - except: - raise RuntimeError( - f"Could not broadcast {node.name} from {ctxt.globalObjects[node.name].values.shape} to {newShape}!" - ) - - else: - raise KeyError(f'Expected node {node.name} to be in context!') + buffer.values = np.broadcast_to(buffer.values, shape) + except ValueError as e: + raise ValueError( + f"Could not broadcast tensor {tensor.name} of node {self.node.name}.") from e return ctxt @@ -2409,6 +2554,7 @@ def __init__(self, graph: gs.Graph, platform: DeploymentPlatform, inputTypes: Dict[str, Type[Pointer]], + operatorDescriptors: Dict[str, OperatorDescriptor], scheduler: Callable[[gs.Graph], Schedule] = lambda graph: list(graph.nodes), name: str = 'DeeployNetwork', deeployStateDir: str = "DeeployState"): @@ -2433,6 +2579,7 @@ def __init__(self, """ self.graph = graph + self.operatorDescriptors = operatorDescriptors self.scheduler = scheduler self.layerBinding: 'OrderedDict[str, ONNXLayer]' = OrderedDict() self.parsed = False @@ -2604,19 +2751,34 @@ def parse(self, default_channels_first: bool = True) -> bool: constantBuffer = self.Platform.ConstantBuffer, structBuffer = self.Platform.StructBuffer, transientBuffer = self.Platform.TransientBuffer) + schedule = self.scheduler(self.graph) + flatSchedule = [] + for subGraph in schedule: + if isinstance(subGraph, gs.Node): + flatSchedule.append(subGraph) + else: + flatSchedule += subGraph - self.ctxt = self._createIOBindings(self.ctxt, self.graph) - - self._bindLayers() + self.layerBinding: 'OrderedDict[str, ONNXLayer]' = OrderedDict() + for node in flatSchedule: + assert node.op in self.operatorDescriptors, \ + f"[ERROR] Error parsing node {node.name}. There is no descriptor for operator {node.op}." + desc = self.operatorDescriptors[node.op] + desc.canonicalize(node, self.graph.opset) + assert desc.check(node), \ + f"[ERROR] Node {node.name} is not a valid instance of {node.op} operator" - ctxt = self.ctxt.copy() + layer = self._mapNode(node) + if isinstance(layer, ONNXLayer): + self.layerBinding[layer.node.name] = layer - ctxtStack = deque() scheduledLayerList = list(self.layerBinding.values()) - idx: int = 0 - deepestIdx = 0 + self.ctxt = self._createIOBindings(self.ctxt, self.graph) + ctxt = self.ctxt.copy() + ctxtStack = deque() + idx, deepestIdx = 0, 0 while (idx < len(scheduledLayerList)): currentLayer = scheduledLayerList[idx] @@ -3164,6 +3326,7 @@ def __init__(self, deploymentPlatform: DeploymentPlatform, inputTypes: Dict[str, Type[Pointer]], loweringOptimizer: TopologyOptimizer, + operatorDescriptors: Dict[str, OperatorDescriptor], scheduler: Callable[[gs.Graph], Schedule] = lambda graph: list(graph.nodes), name: str = 'DeeployNetwork', default_channels_first: bool = True, @@ -3196,7 +3359,13 @@ def __init__(self, """ - super().__init__(graph, deploymentPlatform, inputTypes, scheduler, name, deeployStateDir = deeployStateDir) + super().__init__(graph, + deploymentPlatform, + inputTypes, + operatorDescriptors, + scheduler, + name, + deeployStateDir = deeployStateDir) self.loweringOptimizer = loweringOptimizer self.default_channels_first = default_channels_first diff --git a/Deeploy/EngineExtension/NetworkDeployers/EngineColoringDeployer.py b/Deeploy/EngineExtension/NetworkDeployers/EngineColoringDeployer.py index 4b05ab5be..eb7175f61 100644 --- a/Deeploy/EngineExtension/NetworkDeployers/EngineColoringDeployer.py +++ b/Deeploy/EngineExtension/NetworkDeployers/EngineColoringDeployer.py @@ -8,7 +8,8 @@ from Deeploy.AbstractDataTypes import Pointer from Deeploy.CommonExtensions.NetworkDeployers.NetworkDeployerWrapper import NetworkDeployerWrapper -from Deeploy.DeeployTypes import DeploymentPlatform, NetworkDeployer, ONNXLayer, Schedule, TopologyOptimizer +from Deeploy.DeeployTypes import DeploymentPlatform, NetworkDeployer, ONNXLayer, OperatorDescriptor, Schedule, \ + TopologyOptimizer from Deeploy.EngineExtension.OptimizationPasses.TopologyOptimizationPasses.EngineColoringPasses import \ EngineColoringPass, EngineMapper @@ -20,12 +21,13 @@ def __init__(self, deploymentPlatform: DeploymentPlatform, inputTypes: Dict[str, Type[Pointer]], loweringOptimizer: TopologyOptimizer, + operatorDescriptors: Dict[str, OperatorDescriptor], scheduler: Callable[[gs.Graph], Schedule] = lambda graph: list(graph.nodes), name: str = 'DeeployNetwork', default_channels_first: bool = True, deeployStateDir: str = "DeeployState", engineMapperCls: Type[EngineMapper] = EngineMapper): - super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, scheduler, name, + super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, operatorDescriptors, scheduler, name, default_channels_first, deeployStateDir) self._initEngineColoringDeployer(engineMapperCls) diff --git a/Deeploy/MemoryLevelExtension/NetworkDeployers/MemoryLevelDeployer.py b/Deeploy/MemoryLevelExtension/NetworkDeployers/MemoryLevelDeployer.py index 9baeff239..b0541f8fd 100644 --- a/Deeploy/MemoryLevelExtension/NetworkDeployers/MemoryLevelDeployer.py +++ b/Deeploy/MemoryLevelExtension/NetworkDeployers/MemoryLevelDeployer.py @@ -11,8 +11,8 @@ from Deeploy.CommonExtensions.NetworkDeployers.NetworkDeployerWrapper import NetworkDeployerWrapper from Deeploy.CommonExtensions.NetworkDeployers.SignPropDeployer import SignPropDeployer from Deeploy.DeeployTypes import CodeGenVerbosity, ConstantBuffer, DeploymentEngine, DeploymentPlatform, \ - NetworkContext, NetworkDeployer, NetworkOptimizationPass, NetworkOptimizer, ONNXLayer, Schedule, StructBuffer, \ - TopologyOptimizer, TransientBuffer, VariableBuffer, _NoVerbosity + NetworkContext, NetworkDeployer, NetworkOptimizationPass, NetworkOptimizer, ONNXLayer, OperatorDescriptor, \ + Schedule, StructBuffer, TopologyOptimizer, TransientBuffer, VariableBuffer, _NoVerbosity from Deeploy.MemoryLevelExtension.MemoryLevels import MemoryHierarchy, MemoryLevel from Deeploy.MemoryLevelExtension.OptimizationPasses.MemoryLevelAnnotationPasses import AnnotateDefaultMemoryLevel @@ -81,12 +81,13 @@ def __init__(self, deploymentPlatform: Union[MemoryPlatform, MemoryPlatformWrapper], inputTypes: Dict[str, Type[Pointer]], loweringOptimizer: TopologyOptimizer, + operatorDescriptors: Dict[str, OperatorDescriptor], scheduler: Callable[[gs.Graph], Schedule] = lambda graph: list(graph.nodes), name: str = 'DeeployNetwork', default_channels_first: bool = True, deeployStateDir: str = "DeeployState", memoryLevelAnnotationPasses: List[NetworkOptimizationPass] = []): - super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, scheduler, name, + super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, operatorDescriptors, scheduler, name, default_channels_first, deeployStateDir) if len(memoryLevelAnnotationPasses) == 0: memoryLevelAnnotationPasses.append(AnnotateDefaultMemoryLevel(self.Platform.memoryHierarchy)) @@ -136,13 +137,14 @@ def __init__(self, deploymentPlatform: Union[MemoryPlatform, MemoryPlatformWrapper], inputTypes: Dict[str, Type[Pointer]], loweringOptimizer: TopologyOptimizer, + operatorDescriptors: Dict[str, OperatorDescriptor], scheduler: Callable = lambda x: x, name: str = 'DeeployNetwork', default_channels_first: bool = True, deeployStateDir: str = "DeeployState", inputOffsets: Dict[str, int] = {}, memoryLevelAnnotationPasses: List[NetworkOptimizationPass] = []): - super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, scheduler, name, + super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, operatorDescriptors, scheduler, name, default_channels_first, deeployStateDir, inputOffsets) if len(memoryLevelAnnotationPasses) == 0: memoryLevelAnnotationPasses.append(AnnotateDefaultMemoryLevel(self.Platform.memoryHierarchy)) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py new file mode 100644 index 000000000..a68706a2c --- /dev/null +++ b/Deeploy/OperatorDescriptor.py @@ -0,0 +1,772 @@ +# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna +# +# SPDX-License-Identifier: Apache-2.0 + +from enum import Enum, IntEnum +from typing import Any, Dict, Tuple, Union + +import numpy as np +import onnx_graphsurgeon as gs + +from Deeploy.DeeployTypes import AttrDesc, IoDesc, OperatorDescriptor, VariadicIoDesc + + +def IntUnpack(value: Any) -> int: + if isinstance(value, (list, tuple)) and len(value) == 1: + value = value[0] + + if isinstance(value, int): + return value + elif isinstance(value, float): + assert value.is_integer(), f"Received a non-integer value {value}" + return int(value) + raise ValueError(f"Unsupported value type {type(value)}") + + +def BoolUnpack(value: Any) -> bool: + value = IntUnpack(value) + assert value in [0, 1], f"Casting to bool only supported from 0, 1. Received {value}" + return bool(value) + + +def FloatUnpack(value: Any) -> float: + if isinstance(value, (list, tuple)) and len(value) == 1: + value = value[0] + + assert isinstance(value, (int, float)), f"Unsupported value type {type(value)}" + return float(value) + + +def IntTupleUnpack(value: Any) -> Tuple[int, ...]: + try: + return tuple(IntUnpack(item) for item in value) + except TypeError: + return (IntUnpack(value),) + + +def FloatTupleUnpack(value: Any) -> Tuple[float, ...]: + try: + return tuple(FloatUnpack(item) for item in value) + except TypeError: + return (FloatUnpack(value),) + + +def IntTupleIfNotSingleItemUnpack(value: Any) -> Union[int, Tuple[int, ...]]: + try: + return IntUnpack(value) + except: + return IntTupleUnpack(value) + + +def attrToTensor(node: gs.Node, attr: str) -> None: + values = node.attrs[attr] + if isinstance(values, (int, float)): + values = np.array([values]) + elif isinstance(values, (list, tuple)): + values = np.array(values) + assert isinstance(values, np.ndarray), f"Unsupported values type {type(values)}" + tensor = gs.Constant(f"{node.name}_{attr}", values) + node.inputs.append(tensor) + node.attrs.pop(attr) + + +concatDesc = OperatorDescriptor( + inputDescriptor = VariadicIoDesc("data_in", minNumTensors = 2), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [], +) + +iRMSNormDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["data_in", "weight"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("D", IntUnpack), + AttrDesc("n_levels", IntUnpack), + ], +) + + +class SliceDescriptor(OperatorDescriptor): + + def canonicalize(self, node: gs.Node, opset: int) -> bool: + if opset < 10: + attrToTensor(node, "starts") + attrToTensor(node, "ends") + if "axes" in node.attrs: + attrToTensor(node, "axes") + + return super().canonicalize(node, opset) + + +# Opset: 13 +sliceDesc = SliceDescriptor( + inputDescriptor = IoDesc(["data_in", "starts", "ends"], ["axes", "steps"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [], +) + +# Opset: 1 +sliceDescOld = OperatorDescriptor( + inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("axes", IntTupleUnpack, lambda n: range(len(n.attrs["starts"]))), + AttrDesc("ends", IntTupleUnpack), + AttrDesc("starts", IntTupleUnpack), + ], +) + +transposeDesc = OperatorDescriptor( + inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [AttrDesc("perm", IntTupleUnpack)], +) + + +class CeilMode(IntEnum): + floor = 0 + ceil = 1 + + +maxPoolDesc = OperatorDescriptor(inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("ceil_mode", unpacker = CeilMode, default = CeilMode.floor), + AttrDesc("kernel_shape", IntTupleUnpack), + AttrDesc("pads", IntTupleUnpack), + AttrDesc("strides", IntTupleUnpack), + ]) + + +class PadMode(str, Enum): + constant = "constant" + reflect = "reflect" + edge = "edge" + wrap = "wrap" + + +# Opset 24 +padDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["data_in", "pads"], ["constant_value", "axes"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc('mode', unpacker = PadMode, default = PadMode.constant), + ], +) + + +class PadModeOld(str, Enum): + constant = "constant" + reflect = "reflect" + edge = "edge" + + +padDescOld = OperatorDescriptor( + inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("mode", unpacker = PadModeOld, default = PadModeOld.constant), + AttrDesc("pads", IntTupleUnpack), + AttrDesc("value", FloatUnpack), + ], +) + +addDesc = OperatorDescriptor( + inputDescriptor = VariadicIoDesc("data_in", minNumTensors = 2), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [], +) + + +class ReduceMeanDescriptor(OperatorDescriptor): + + def canonicalize(self, node: gs.Node, opset: int) -> bool: + if opset < 18: + if "axes" in node.attrs: + attrToTensor(node, "axes") + return super().canonicalize(node, opset) + + +# Opset 18 +reduceMeanDesc = ReduceMeanDescriptor( + inputDescriptor = IoDesc("data_in", optional = "axes"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("keepdims", unpacker = BoolUnpack, default = True), + AttrDesc("noop_with_empty_axes", unpacker = BoolUnpack, default = False), + ], +) + +reduceSumDesc = OperatorDescriptor( + inputDescriptor = IoDesc("data_in", optional = "axes"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("keepdims", unpacker = BoolUnpack, default = True), + AttrDesc("noop_with_empty_axes", unpacker = BoolUnpack, default = False), + ], +) + +softmaxDesc = OperatorDescriptor( + inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [AttrDesc("axis", IntUnpack, default = -1)], +) + +softmaxGradDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["upstream_grad", "softmax_output"]), + outputDescriptor = IoDesc("softmax_grad"), + attrDescriptors = [AttrDesc("axis", IntUnpack, default = -1)], +) + +iSoftmaxDesc = OperatorDescriptor( + inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("axis", IntUnpack, default = -1), + AttrDesc("coeffA", IntUnpack), + AttrDesc("coeffB", IntUnpack), + AttrDesc("coeffC", IntUnpack), + AttrDesc("log2", IntUnpack), + AttrDesc("n_levels", IntUnpack), + ], +) + +itaMaxDesc = OperatorDescriptor( + inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("axis", IntUnpack, default = -1), + AttrDesc("n_levels", IntUnpack), + ], +) + +itaPartialMaxDesc = OperatorDescriptor( + inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("axis", IntUnpack, default = -1), + AttrDesc("n_levels", IntUnpack), + AttrDesc("group_width", IntUnpack), + ], +) + + +class GeluApprox(str, Enum): + tanh = "tanh" + none = "none" + + +geluDesc = OperatorDescriptor( + inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("approximate", GeluApprox, default = GeluApprox.none), + ], +) + +iGeluDesc = OperatorDescriptor( + inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("b", IntUnpack), + AttrDesc("one", IntUnpack), + ], +) + +requantizedIGeluDesc = OperatorDescriptor(inputDescriptor = IoDesc(["data_in", "mul", "add", "shift"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("b", IntUnpack), + AttrDesc("one", IntUnpack), + ]) + +iHardswishDesc = OperatorDescriptor(inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("one_over_six", IntUnpack), + AttrDesc("six", IntUnpack), + AttrDesc("three", IntUnpack), + ]) + +requantizedIHardswishDesc = OperatorDescriptor(inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("one_over_six", IntUnpack), + AttrDesc("six", IntUnpack), + AttrDesc("three", IntUnpack), + AttrDesc("mul", IntUnpack), + AttrDesc("add", IntUnpack), + AttrDesc("shift", IntUnpack), + ]) + +iNoNormDesc = OperatorDescriptor(inputDescriptor = IoDesc(["data_in", "weights", "bias"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("D", IntUnpack), + AttrDesc("mul", IntUnpack), + AttrDesc("n_levels", IntUnpack), + ]) + +quantDesc = OperatorDescriptor( + inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("scale", FloatUnpack), + AttrDesc("zero_point", FloatUnpack), + AttrDesc("bit_width", IntUnpack), + AttrDesc("signed", BoolUnpack, default = True), + AttrDesc("min_val", + IntUnpack, + default = lambda node: -(2**(node.attrs["bit_width"] - 1)) if node.attrs["signed"] else 0), + AttrDesc("max_val", + IntUnpack, + default = lambda node: 2**(node.attrs["bit_width"] - 1) - 1 + if node.attrs["signed"] else 2**node.attrs["bit_width"] - 1), + ], +) + + +class AutoPad(str, Enum): + NOTSET = "NOTSET" + SAME_UPPER = "SAME_UPPER" + SAME_LOWER = "SAME_LOWER" + VALID = "VALID" + + +def _dilationsDefault(node: gs.Node) -> Tuple[int, ...]: + # Remove 2 dims for input and output channels + nSpatialDims = len(node.inputs[1].shape) - 2 + return tuple([1] * nSpatialDims) + + +def _kernelShapeDefault(node: gs.Node) -> Tuple[int, ...]: + # Remove 2 dims for input and output channels + nSpatialDims = len(node.inputs[1].shape) - 2 + return node.inputs[1].shape[-nSpatialDims:] + + +def _stridesDefault(node: gs.Node) -> Tuple[int, ...]: + # Remove 2 dims for input and output channels + nSpatialDims = len(node.inputs[1].shape) - 2 + return tuple([1] * nSpatialDims) + + +def _padsDefault(node: gs.Node) -> Tuple[int, ...]: + # Remove 2 dims for input and output channels + nSpatialDims = len(node.inputs[1].shape) - 2 + # Two 0's per dimension for begin and end + return tuple([0] * (2 * nSpatialDims)) + + +convDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["data_in", "weight"], optional = "bias"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("auto_pad", AutoPad, default = AutoPad.NOTSET), + AttrDesc("dilations", IntTupleUnpack, default = _dilationsDefault), + AttrDesc("group", IntUnpack, default = 1), + AttrDesc("kernel_shape", IntTupleUnpack, default = _kernelShapeDefault), + AttrDesc("pads", IntTupleUnpack, default = _padsDefault), + AttrDesc("strides", IntTupleUnpack, default = _stridesDefault), + ], +) + + +class RequantizedOperatorDescriptor(OperatorDescriptor): + + def canonicalize(self, node: gs.Node, opset: int) -> bool: + if "n_levels_out" in node.attrs and "n_levels" in node.attrs: + # TODO: Change to log + print("[WARNING] Requantized operator cannot have n_levels_out and n_levels in its attributes") + return False + + if "n_levels_out" in node.attrs: + node.attrs["n_levels"] = node.attrs["n_levels_out"] + node.attrs.pop("n_levels_out") + + return super().canonicalize(node, opset) + + +requantizedConvDesc = RequantizedOperatorDescriptor( + inputDescriptor = IoDesc(["data_in", "weight", "mul", "add"], optional = ["shift"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + # Conv attrs + AttrDesc("auto_pad", AutoPad, default = AutoPad.NOTSET), + AttrDesc("dilations", IntTupleUnpack, default = _dilationsDefault), + AttrDesc("group", IntUnpack, default = 1), + AttrDesc("kernel_shape", IntTupleUnpack, default = _kernelShapeDefault), + AttrDesc("pads", IntTupleUnpack, default = _padsDefault), + AttrDesc("strides", IntTupleUnpack, default = _stridesDefault), + # RequantizedShift attrs + AttrDesc("n_levels", IntUnpack), + AttrDesc("signed", BoolUnpack), + AttrDesc("div", IntUnpack), + ], +) + +dequantDesc = OperatorDescriptor( + inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("scale", FloatUnpack), + AttrDesc("zero_point", FloatUnpack), + AttrDesc("bit_width", IntUnpack), + AttrDesc("signed", BoolUnpack), + ], +) + +divDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["input1", "input2"]), + outputDescriptor = IoDesc("output"), + attrDescriptors = [], +) + +integerDivDescriptor = OperatorDescriptor( + inputDescriptor = IoDesc(["A", "B"]), + outputDescriptor = IoDesc("C"), + attrDescriptors = [ + AttrDesc("Delta", IntUnpack), + AttrDesc("eps", IntUnpack), + AttrDesc("eta", IntUnpack), + ], +) + +requantizedIntegerDivDescriptor = RequantizedOperatorDescriptor( + inputDescriptor = IoDesc(["A", "B", "requant_mul", "requant_add", "requant_div"]), + outputDescriptor = IoDesc("C"), + attrDescriptors = [ + # IntegerDiv attrs + AttrDesc("Delta", IntUnpack), + AttrDesc("eps", IntUnpack), + AttrDesc("eta", IntUnpack), + # RequantizedShift attrs + AttrDesc("n_levels", IntUnpack), + AttrDesc("signed", BoolUnpack), + AttrDesc("div", IntUnpack), + ]) + +debugPrintDesc = OperatorDescriptor( + inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [], +) + +layerNormalizationDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["data_in", "weight", "bias"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [AttrDesc("epsilon", FloatUnpack)], +) + +iLayerNormDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["data_in", "weight", "bias"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [AttrDesc("D", IntUnpack), AttrDesc("n_levels", IntUnpack)], +) + +flattenDesc = OperatorDescriptor( + inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [AttrDesc("axis", IntUnpack, default = 1)], +) + +gatherDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["data_in", "indices"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [AttrDesc("axis", IntUnpack, default = 0)], +) + +# Opset <= 11 +unsqueezeDesc = OperatorDescriptor( + inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [AttrDesc("axes", IntTupleUnpack)], +) + +# Opset <= 11 +squeezeDesc = OperatorDescriptor( + inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [AttrDesc("axes", IntTupleUnpack)], +) + +mulDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["A", "B"]), + outputDescriptor = IoDesc("C"), + attrDescriptors = [], +) + +matMulDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["A", "B"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [], +) + +rqMatMulDesc = RequantizedOperatorDescriptor( + inputDescriptor = IoDesc(["A", "B", "add", "mul"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + # RequantizedShift attrs + AttrDesc("n_levels", IntUnpack), + AttrDesc("signed", BoolUnpack), + AttrDesc("div", IntUnpack), + ], +) + +gemmDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["A", "B"], optional = ["C"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("alpha", FloatUnpack, default = 1.0), + AttrDesc("beta", FloatUnpack, default = 1.0), + AttrDesc("transA", BoolUnpack, default = False), + AttrDesc("transB", BoolUnpack, default = False), + ], +) + +rqGemmDesc = RequantizedOperatorDescriptor( + inputDescriptor = IoDesc(["A", "B", "C", "add", "mul"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("alpha", FloatUnpack, default = 1.0), + AttrDesc("beta", FloatUnpack, default = 1.0), + AttrDesc("transA", BoolUnpack, default = False), + AttrDesc("transB", BoolUnpack, default = False), + # RequantizedShift attrs + AttrDesc("n_levels", IntUnpack), + AttrDesc("signed", BoolUnpack), + AttrDesc("div", IntUnpack), + ]) + +requantizedGemmDesc = RequantizedOperatorDescriptor( + inputDescriptor = IoDesc(["A", "B", "add", "mul"]), # Important diff to RQGemm + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("alpha", FloatUnpack, default = 1.0), + AttrDesc("beta", FloatUnpack, default = 1.0), + AttrDesc("transA", BoolUnpack, default = False), + AttrDesc("transB", BoolUnpack, default = False), + # RequantizedShift attrs + AttrDesc("n_levels", IntUnpack), + AttrDesc("signed", BoolUnpack), + AttrDesc("div", IntUnpack), + ]) + +linearAttentionDesc = OperatorDescriptor( + inputDescriptor = IoDesc( + ["q", "k", "v", "wq_weight", "wq_bias", "wk_weight", "wk_bias", "wv_weight", "wv_bias", "wo_weight", + "wo_bias"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("preattn_requant_mul", IntTupleUnpack), + AttrDesc("preattn_requant_div", IntTupleUnpack), + AttrDesc("normalizer_requant_mul", IntTupleUnpack), + AttrDesc("normalizer_requant_shift", IntTupleUnpack), + AttrDesc("normalizer_requant_div", IntTupleUnpack), + AttrDesc("postattn_requant_mul", IntTupleUnpack), + AttrDesc("postattn_requant_shift", IntTupleUnpack), + AttrDesc("postattn_requant_div", IntTupleUnpack), + AttrDesc("wo_requant_mul", IntTupleUnpack), + AttrDesc("wo_requant_shift", IntTupleUnpack), + AttrDesc("wo_requant_div", IntTupleUnpack), + AttrDesc("wq_requant_mul", IntTupleUnpack), + AttrDesc("wq_requant_shift", IntTupleUnpack), + AttrDesc("wq_requant_div", IntTupleUnpack), + AttrDesc("wk_requant_mul", IntTupleUnpack), + AttrDesc("wk_requant_shift", IntTupleUnpack), + AttrDesc("wk_requant_div", IntTupleUnpack), + AttrDesc("wv_requant_mul", IntTupleUnpack), + AttrDesc("wv_requant_shift", IntTupleUnpack), + AttrDesc("wv_requant_div", IntTupleUnpack), + AttrDesc("Delta", IntUnpack), + AttrDesc("eps", IntUnpack), + AttrDesc("act_type", IntUnpack), + AttrDesc("n_levels", IntUnpack), + AttrDesc("dim", IntUnpack), + AttrDesc("dim_head", IntUnpack), + AttrDesc("heads", IntUnpack), + ], +) + +clcaDesc = OperatorDescriptor( + inputDescriptor = IoDesc([ + "q", "k", "wq_weight", "wq_bias", "wk_weight", "wk_bias", "wo_weight", "wo_bias", "wq_requant_mul", + "wq_requant_add", "wq_requant_div", "wk_requant_mul", "wk_requant_add", "wk_requant_div", "wv_requant_mul", + "wv_requant_add", "wv_requant_div", "kdiv_requant_mul", "kdiv_requant_add", "kdiv_requant_div", + "preattn_requant_mul", "preattn_requant_add", "preattn_requant_div", "postattn_requant_mul", + "postattn_requant_add", "postattn_requant_div", "wo_requant_mul", "wo_requant_add", "wo_requant_div" + ]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("Delta", IntUnpack), + AttrDesc("eps", IntUnpack), + AttrDesc("eta", IntUnpack), + AttrDesc("act_type", IntUnpack), + AttrDesc("n_levels", IntUnpack), + AttrDesc("dim", IntUnpack), + AttrDesc("dim_head", IntUnpack), + AttrDesc("out_dim", IntUnpack), + AttrDesc("heads", IntUnpack), + ], +) + +mhsaDesc = OperatorDescriptor( + inputDescriptor = IoDesc( + ["q", "k", "v", "wq_weight", "wq_bias", "wk_weight", "wk_bias", "wv_weight", "wv_bias", "wo_weight", + "wo_bias"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("preattn_requant_mul", IntTupleIfNotSingleItemUnpack), + AttrDesc("preattn_requant_div", IntTupleIfNotSingleItemUnpack), + AttrDesc("postattn_requant_mul", IntTupleIfNotSingleItemUnpack), + AttrDesc("postattn_requant_div", IntTupleIfNotSingleItemUnpack), + AttrDesc("wo_requant_mul", IntTupleIfNotSingleItemUnpack), + AttrDesc("wo_requant_div", IntTupleIfNotSingleItemUnpack), + AttrDesc("wq_requant_mul", IntTupleIfNotSingleItemUnpack), + AttrDesc("wq_requant_div", IntTupleIfNotSingleItemUnpack), + AttrDesc("wk_requant_mul", IntTupleIfNotSingleItemUnpack), + AttrDesc("wk_requant_div", IntTupleIfNotSingleItemUnpack), + AttrDesc("wv_requant_mul", IntTupleIfNotSingleItemUnpack), + AttrDesc("wv_requant_div", IntTupleIfNotSingleItemUnpack), + AttrDesc("n_levels", IntUnpack), + AttrDesc("dim", IntUnpack), + AttrDesc("dim_head", IntUnpack), + AttrDesc("heads", IntUnpack), + AttrDesc("signed", BoolUnpack), + ], +) + +reluDesc = OperatorDescriptor( + inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [], +) + +reshapeDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["data_in", "shape"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [], +) + +requantShiftDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["data_in", "mul", "add"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [], +) + + +class RequantizedAddDescriptor(OperatorDescriptor): + + def canonicalize(self, node: gs.Node, opset: int) -> bool: + for tensor in ["rqs1", "rqs2", "rqsOut"]: + n_levels = f"{tensor}_n_levels" + n_levels_out = f"{tensor}_n_levels_out" + if n_levels_out in node.attrs and n_levels in node.attrs: + # TODO: Change to log + print( + f"[WARNING] RequantizedAdd tensor {tensor} cannot have {n_levels_out} and {n_levels} in its attributes" + ) + return False + + if n_levels_out in node.attrs: + node.attrs[n_levels] = node.attrs[n_levels_out] + node.attrs.pop(n_levels_out) + + return super().canonicalize(node, opset) + + +requantizedAddDesc = RequantizedAddDescriptor( + inputDescriptor = IoDesc(["data_in_0", "data_in_1"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("rqs1_n_levels", IntUnpack), + AttrDesc("rqs1_mul", IntUnpack), + AttrDesc("rqs1_add", IntUnpack), + AttrDesc("rqs1_div", IntUnpack), + AttrDesc("rqs1_signed", BoolUnpack), + AttrDesc("rqs1_n_levels", IntUnpack), + AttrDesc("rqs2_mul", IntUnpack), + AttrDesc("rqs2_add", IntUnpack), + AttrDesc("rqs2_div", IntUnpack), + AttrDesc("rqs2_signed", BoolUnpack), + AttrDesc("rqs2_n_levels", IntUnpack), + AttrDesc("rqsOut_mul", IntUnpack), + AttrDesc("rqsOut_add", IntUnpack), + AttrDesc("rqsOut_div", IntUnpack), + AttrDesc("rqsOut_signed", BoolUnpack), + AttrDesc("rqsOut_n_levels", IntUnpack), + ], +) + +sgdDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["weight", "grad"]), + outputDescriptor = IoDesc("weight_updated"), + attrDescriptors = [AttrDesc("lr", FloatUnpack)], +) + +softmaxCrossEntropyLossDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["logits", "labels"]), + outputDescriptor = IoDesc("log_prob"), + attrDescriptors = [], +) + +softmaxCrossEntropyLossGradDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["log_prob", "labels"]), + outputDescriptor = IoDesc("grad"), + attrDescriptors = [], +) + +defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = { + "Add": addDesc, + "CLCA": clcaDesc, + "Concat": concatDesc, + "Conv": convDesc, + "DebugPrint": debugPrintDesc, + "Dequant": dequantDesc, + "Div": divDesc, + "Flatten": flattenDesc, + "Gather": gatherDesc, + "Gelu": geluDesc, + "Gemm": gemmDesc, + "ITAMax": itaMaxDesc, + "ITAPartialMax": itaPartialMaxDesc, + "IntegerDiv": integerDivDescriptor, + "IntegerMean": reduceMeanDesc, + "LayerNormalization": layerNormalizationDesc, + "LinearAttention": linearAttentionDesc, + "MHSA": mhsaDesc, + "MatMul": matMulDesc, + "MatMulInteger": matMulDesc, + "MaxPool": maxPoolDesc, + "Mul": mulDesc, + "Pad": padDescOld, + "Quant": quantDesc, + "RQGemm": rqGemmDesc, + "RQIntegerDiv": requantizedIntegerDivDescriptor, + "RQMatMul": rqMatMulDesc, + "ReduceMean": reduceMeanDesc, + "ReduceSum": reduceSumDesc, + "Relu": reluDesc, + "RequantizedAdd": requantizedAddDesc, + "RequantizedConv": requantizedConvDesc, + "RequantizedGemm": requantizedGemmDesc, + "RequantizediGELU": requantizedIGeluDesc, + "RequantizediHardswish": requantizedIHardswishDesc, + "RequantShift": requantShiftDesc, + "Reshape": reshapeDesc, + "SGD": sgdDesc, + "Slice": sliceDesc, + "Softmax": softmaxDesc, + "SoftmaxCrossEntropyLoss": softmaxCrossEntropyLossDesc, + "SoftmaxCrossEntropyLossGrad": softmaxCrossEntropyLossGradDesc, + "SoftmaxGrad": softmaxGradDesc, + "Squeeze": squeezeDesc, + "Transpose": transposeDesc, + "Unsqueeze": unsqueezeDesc, + "iGELU": iGeluDesc, + "iHardswish": iHardswishDesc, + "iLayerNorm": iLayerNormDesc, + "iNoNorm": iNoNormDesc, + "iRMSNorm": iRMSNormDesc, + "iSoftmax": iSoftmaxDesc, +} diff --git a/Deeploy/Targets/Chimera/Deployer.py b/Deeploy/Targets/Chimera/Deployer.py index ba28279b6..85b0496e3 100644 --- a/Deeploy/Targets/Chimera/Deployer.py +++ b/Deeploy/Targets/Chimera/Deployer.py @@ -8,7 +8,7 @@ from Deeploy.AbstractDataTypes import Pointer from Deeploy.CommonExtensions.NetworkDeployers.SignPropDeployer import SignPropDeployer -from Deeploy.DeeployTypes import DeploymentPlatform, TopologyOptimizer +from Deeploy.DeeployTypes import DeploymentPlatform, OperatorDescriptor, TopologyOptimizer class ChimeraDeployer(SignPropDeployer): @@ -18,6 +18,7 @@ def __init__(self, deploymentPlatform: DeploymentPlatform, inputTypes: Dict[str, Type[Pointer]], loweringOptimizer: TopologyOptimizer, + operatorDescriptors: Dict[str, OperatorDescriptor], scheduler: Callable = lambda x: x, name: str = 'DeeployNetwork', default_channels_first = False, @@ -27,6 +28,7 @@ def __init__(self, deploymentPlatform, inputTypes, loweringOptimizer, + operatorDescriptors, scheduler, name, default_channels_first = default_channels_first, diff --git a/Deeploy/Targets/CortexM/Deployer.py b/Deeploy/Targets/CortexM/Deployer.py index bef8fdcf3..9a4f27b06 100644 --- a/Deeploy/Targets/CortexM/Deployer.py +++ b/Deeploy/Targets/CortexM/Deployer.py @@ -11,7 +11,7 @@ from Deeploy.CommonExtensions.OptimizationPasses.TopologyOptimizationPasses.DebugPasses import DebugPrintMergePass from Deeploy.CommonExtensions.OptimizationPasses.TopologyOptimizationPasses.LoweringOptimizationPasses import \ NCHWtoNHWCPass, TransposeMatmulInputsPass -from Deeploy.DeeployTypes import DeploymentPlatform, TopologyOptimizer +from Deeploy.DeeployTypes import DeploymentPlatform, OperatorDescriptor, TopologyOptimizer from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import TransposeConstOptPass, TransposeMergePass @@ -22,6 +22,7 @@ def __init__(self, deploymentPlatform: DeploymentPlatform, inputTypes: Dict[str, Type[Pointer]], loweringOptimizer: TopologyOptimizer, + operatorDescriptors: Dict[str, OperatorDescriptor], scheduler: Callable = lambda x: x, name: str = 'DeeployNetwork', default_channels_first = False, @@ -32,6 +33,7 @@ def __init__(self, deploymentPlatform, inputTypes, loweringOptimizer, + operatorDescriptors, scheduler, name, default_channels_first = default_channels_first, diff --git a/Deeploy/Targets/Generic/Bindings.py b/Deeploy/Targets/Generic/Bindings.py index 24fc8c0d2..c03bcd02f 100644 --- a/Deeploy/Targets/Generic/Bindings.py +++ b/Deeploy/Targets/Generic/Bindings.py @@ -8,7 +8,7 @@ from Deeploy.CommonExtensions.CodeTransformationPasses.MemoryAllocation import ArgumentStructGeneration, \ MemoryManagementGeneration, MemoryPassthroughGeneration from Deeploy.CommonExtensions.DataTypes import FloatDataTypes, IntegerDataTypes, SignedIntegerDataTypes, float32_t, \ - int8_t, int32_t, uint8_t + int8_t, int32_t, int64_t, uint8_t from Deeploy.DeeployTypes import CodeTransformation, NodeBinding from Deeploy.FutureExtension.CodeTransformationPasses.FutureCodeTransformation import FutureGeneration from Deeploy.Targets.Generic.Templates import AddTemplate, ConcatTemplate, ConvTemplate, DebugPrintTemplate, \ @@ -179,13 +179,11 @@ ] BasicReduceMeanBindings = [ - NodeBinding(ReduceMeanChecker([PointerClass(type)], [PointerClass(type)]), ReduceMeanTemplate.referenceTemplate, - BasicTransformer) for type in SignedIntegerDataTypes + NodeBinding(ReduceMeanChecker([PointerClass(ty), PointerClass(int64_t)], [PointerClass(ty)]), + ReduceMeanTemplate.referenceTemplate, BasicTransformer) for ty in SignedIntegerDataTypes ] + [ - NodeBinding(ReduceMeanChecker([PointerClass(float_type), PointerClass(integer_type)], [PointerClass(float_type)]), - FloatReduceMeanTemplate.referenceTemplate, BasicTransformer) - for integer_type in SignedIntegerDataTypes - for float_type in FloatDataTypes + NodeBinding(ReduceMeanChecker([PointerClass(ty), PointerClass(int64_t)], [PointerClass(ty)]), + FloatReduceMeanTemplate.referenceTemplate, BasicTransformer) for ty in FloatDataTypes ] BasicReduceSumBindings = [ diff --git a/Deeploy/Targets/Generic/Deployer.py b/Deeploy/Targets/Generic/Deployer.py index 3cef57a2e..9bf89a8a0 100644 --- a/Deeploy/Targets/Generic/Deployer.py +++ b/Deeploy/Targets/Generic/Deployer.py @@ -11,7 +11,7 @@ from Deeploy.CommonExtensions.OptimizationPasses.TopologyOptimizationPasses.DebugPasses import DebugPrintMergePass from Deeploy.CommonExtensions.OptimizationPasses.TopologyOptimizationPasses.LoweringOptimizationPasses import \ NCHWtoNHWCPass, TransposeMatmulInputsPass -from Deeploy.DeeployTypes import DeploymentPlatform, TopologyOptimizer +from Deeploy.DeeployTypes import DeploymentPlatform, OperatorDescriptor, TopologyOptimizer from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import TransposeConstOptPass, TransposeMergePass @@ -22,6 +22,7 @@ def __init__(self, deploymentPlatform: DeploymentPlatform, inputTypes: Dict[str, Type[Pointer]], loweringOptimizer: TopologyOptimizer, + operatorDescriptors: Dict[str, OperatorDescriptor], scheduler: Callable = lambda x: x, name: str = 'DeeployNetwork', default_channels_first = False, @@ -32,6 +33,7 @@ def __init__(self, deploymentPlatform, inputTypes, loweringOptimizer, + operatorDescriptors, scheduler, name, default_channels_first = default_channels_first, diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py index 39372c514..88ec68e9e 100644 --- a/Deeploy/Targets/Generic/Parsers.py +++ b/Deeploy/Targets/Generic/Parsers.py @@ -52,7 +52,7 @@ def parseNode(self, node: gs.Node) -> (bool): if ret: - self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels']) + self.operatorRepresentation['n_levels'] = node.attrs['n_levels'] self.operatorRepresentation['log2D'] = int(math.log2(node.attrs['D'])) return ret @@ -622,11 +622,11 @@ def parseNode(self, node: gs.Node) -> bool: ]) if wellFormed: - self.operatorRepresentation['coeffA'] = int(node.attrs['coeffA'].values) - self.operatorRepresentation['coeffB'] = int(node.attrs['coeffB'].values) - self.operatorRepresentation['coeffC'] = int(node.attrs['coeffC'].values) - self.operatorRepresentation['log2'] = int(node.attrs['log2'].values) - self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels'].values) + self.operatorRepresentation['coeffA'] = node.attrs['coeffA'] + self.operatorRepresentation['coeffB'] = node.attrs['coeffB'] + self.operatorRepresentation['coeffC'] = node.attrs['coeffC'] + self.operatorRepresentation['log2'] = node.attrs['log2'] + self.operatorRepresentation['n_levels'] = node.attrs['n_levels'] return wellFormed @@ -651,7 +651,7 @@ def parseNode(self, node: gs.Node) -> bool: ret = all(['n_levels' in node.attrs]) if ret and wellFormed: - self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels'].values) + self.operatorRepresentation['n_levels'] = node.attrs['n_levels'] return True return False @@ -678,8 +678,8 @@ def parseNode(self, node: gs.Node) -> bool: ret = all(['group_width' in node.attrs, 'n_levels' in node.attrs]) if ret and wellFormed: - self.operatorRepresentation['group_width'] = int(node.attrs['group_width']) - self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels'].values) + self.operatorRepresentation['group_width'] = node.attrs['group_width'] + self.operatorRepresentation['n_levels'] = node.attrs['n_levels'] return True return False @@ -801,8 +801,8 @@ def parseNode(self, node: gs.Node) -> bool: if ret: self.operatorRepresentation['D'] = node.attrs['D'] - self.operatorRepresentation['log2D'] = int(np.log2(node.attrs['D'].values).tolist()[0]) - self.operatorRepresentation['mul'] = int(node.attrs['mul'].values.tolist()[0]) + self.operatorRepresentation['log2D'] = int(math.log2(node.attrs['D'])) + self.operatorRepresentation['mul'] = node.attrs['mul'] self.operatorRepresentation['n_levels'] = node.attrs['n_levels'] return ret @@ -1358,23 +1358,7 @@ def parseNode(self, node: gs.Node) -> (bool): ]) if ret: - self.operatorRepresentation['preattn_requant_mul'] = node.attrs['preattn_requant_mul'] - self.operatorRepresentation['preattn_requant_div'] = node.attrs['preattn_requant_div'] - self.operatorRepresentation['postattn_requant_mul'] = node.attrs['postattn_requant_mul'] - self.operatorRepresentation['postattn_requant_div'] = node.attrs['postattn_requant_div'] - self.operatorRepresentation['wo_requant_mul'] = node.attrs['wo_requant_mul'] - self.operatorRepresentation['wo_requant_div'] = node.attrs['wo_requant_div'] - self.operatorRepresentation['wq_requant_mul'] = node.attrs['wq_requant_mul'] - self.operatorRepresentation['wq_requant_div'] = node.attrs['wq_requant_div'] - self.operatorRepresentation['wk_requant_mul'] = node.attrs['wk_requant_mul'] - self.operatorRepresentation['wk_requant_div'] = node.attrs['wk_requant_div'] - self.operatorRepresentation['wv_requant_mul'] = node.attrs['wv_requant_mul'] - self.operatorRepresentation['wv_requant_div'] = node.attrs['wv_requant_div'] - self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels']) - self.operatorRepresentation['dim'] = int(node.attrs['dim']) # Sequence Length - self.operatorRepresentation['dim_head'] = int(node.attrs['dim_head']) # Projection Size - self.operatorRepresentation['heads'] = int(node.attrs['heads']) - self.operatorRepresentation['signed'] = int(node.attrs['signed']) + self.operatorRepresentation.update(node.attrs) return ret @@ -1422,37 +1406,26 @@ def parseNode(self, node: gs.Node) -> (bool): ]) if ret: - self.operatorRepresentation['preattn_requant_mul'] = int(node.attrs['preattn_requant_mul'].values) - self.operatorRepresentation['preattn_requant_shift'] = int(node.attrs['preattn_requant_shift'].values) - self.operatorRepresentation['preattn_requant_div'] = int( - math.log2(int(node.attrs['preattn_requant_div'].values))) - self.operatorRepresentation['normalizer_requant_mul'] = int(node.attrs['normalizer_requant_mul'].values) - self.operatorRepresentation['normalizer_requant_shift'] = int(node.attrs['normalizer_requant_shift'].values) - self.operatorRepresentation['normalizer_requant_div'] = int( - math.log2(int(node.attrs['normalizer_requant_div'].values))) - self.operatorRepresentation['postattn_requant_mul'] = int(node.attrs['postattn_requant_mul'].values) - self.operatorRepresentation['postattn_requant_shift'] = int(node.attrs['postattn_requant_shift'].values) - self.operatorRepresentation['postattn_requant_div'] = int( - math.log2(int(node.attrs['postattn_requant_div'].values))) - self.operatorRepresentation['wo_requant_mul'] = int(node.attrs['wo_requant_mul'].values) - self.operatorRepresentation['wo_requant_shift'] = int(node.attrs['wo_requant_shift'].values) - self.operatorRepresentation['wo_requant_div'] = int(math.log2(int(node.attrs['wo_requant_div'].values))) - self.operatorRepresentation['wq_requant_mul'] = int(node.attrs['wq_requant_mul'].values) - self.operatorRepresentation['wq_requant_shift'] = int(node.attrs['wq_requant_shift'].values) - self.operatorRepresentation['wq_requant_div'] = int(math.log2(int(node.attrs['wq_requant_div'].values))) - self.operatorRepresentation['wk_requant_mul'] = int(node.attrs['wk_requant_mul'].values) - self.operatorRepresentation['wk_requant_shift'] = int(node.attrs['wk_requant_shift'].values) - self.operatorRepresentation['wk_requant_div'] = int(math.log2(int(node.attrs['wk_requant_div'].values))) - self.operatorRepresentation['wv_requant_mul'] = int(node.attrs['wv_requant_mul'].values) - self.operatorRepresentation['wv_requant_shift'] = int(node.attrs['wv_requant_shift'].values) - self.operatorRepresentation['wv_requant_div'] = int(math.log2(int(node.attrs['wv_requant_div'].values))) - self.operatorRepresentation['Delta'] = int(node.attrs['Delta']) - self.operatorRepresentation['eps'] = int(node.attrs['eps']) - self.operatorRepresentation['act_type'] = int(node.attrs['act_type']) - self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels'].values) - self.operatorRepresentation['dim'] = int(node.attrs['dim'].values) - self.operatorRepresentation['dim_head'] = int(node.attrs['dim_head'].values) - self.operatorRepresentation['heads'] = int(node.attrs['heads'].values) + self.operatorRepresentation.update(node.attrs) + + # All *_div attrs are log2d-ified + log2Attrs = [ + "preattn_requant_div", + "preattn_requant_div", + "normalizer_requant_div", + "normalizer_requant_div", + "postattn_requant_div", + "postattn_requant_div", + "wo_requant_div", + "wq_requant_div", + "wk_requant_div", + "wv_requant_div", + ] + + for attr in log2Attrs: + value = self.operatorRepresentation[attr] + assert isinstance(value, int) + self.operatorRepresentation[attr] = int(math.log2(value)) return ret @@ -1494,15 +1467,7 @@ def parseNode(self, node: gs.Node) -> (bool): ]) if ret: - self.operatorRepresentation['Delta'] = int(node.attrs['Delta']) - self.operatorRepresentation['eps'] = int(node.attrs['eps']) - self.operatorRepresentation['eta'] = int(node.attrs['eta']) - self.operatorRepresentation['act_type'] = int(node.attrs['act_type']) - self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels'].values) - self.operatorRepresentation['dim'] = int(node.attrs['dim'].values) - self.operatorRepresentation['dim_head'] = int(node.attrs['dim_head'].values) - self.operatorRepresentation['out_dim'] = int(node.attrs['out_dim'].values) - self.operatorRepresentation['heads'] = int(node.attrs['heads'].values) + self.operatorRepresentation.update(node.attrs) return ret @@ -1640,27 +1605,40 @@ def parseNodeCtxt(self, node.inputs.append(zeroTensor) self.operatorRepresentation['C'] = f'{node.name}_C_Tensor' + buffA = ctxt.lookup(node.inputs[0].name) + assert isinstance(buffA, VariableBuffer) + buffB = ctxt.lookup(node.inputs[1].name) + assert isinstance(buffB, VariableBuffer) + buffOut = ctxt.lookup(node.outputs[0].name) + assert isinstance(buffOut, VariableBuffer) + # Store the input and output shapes in the operator representation - self.operatorRepresentation['size'] = np.prod(ctxt.lookup(node.inputs[0].name).shape) - self.operatorRepresentation['A_shape'] = ctxt.lookup(node.inputs[0].name).shape - self.operatorRepresentation['B_shape'] = ctxt.lookup(node.inputs[1].name).shape - self.operatorRepresentation['data_out_shape'] = ctxt.lookup(node.outputs[0].name).shape + self.operatorRepresentation['size'] = np.prod(buffA.shape) + self.operatorRepresentation['A_shape'] = buffA.shape + self.operatorRepresentation['B_shape'] = buffB.shape + self.operatorRepresentation['data_out_shape'] = buffOut.shape + + if self.operatorRepresentation['transA']: + N_A, M = buffA.shape[-2:] + else: + M, N_A = buffA.shape[-2:] + + if self.operatorRepresentation['transB']: + O, N_B = buffB.shape[-2:] + else: + N_B, O = buffB.shape[-2:] # Store the matrix dimensions in the operator representation - self.operatorRepresentation['M'] = ctxt.lookup( - node.inputs[0].name).shape[(-2 + self.operatorRepresentation['transA'])] - self.operatorRepresentation['N'] = ctxt.lookup( - node.inputs[0].name).shape[(-1 - self.operatorRepresentation['transA'])] - self.operatorRepresentation['O'] = ctxt.lookup( - node.inputs[1].name).shape[(-1 - self.operatorRepresentation['transB'])] + self.operatorRepresentation['M'] = M + self.operatorRepresentation['N'] = N_A + self.operatorRepresentation['O'] = O # SCHEREMO: Assert that reduction dimension is the same on both matrices - ret = ret and (self.operatorRepresentation['N'] == ctxt.lookup( - node.inputs[1].name).shape[-2 + self.operatorRepresentation['transB']]) + ret = ret and N_A == N_B # Check if the batch dimensions are compatible - self.operatorRepresentation['batch_A'] = np.prod(ctxt.lookup(node.inputs[0].name).shape[:-2]) - self.operatorRepresentation['batch_B'] = np.prod(ctxt.lookup(node.inputs[1].name).shape[:-2]) + self.operatorRepresentation['batch_A'] = np.prod(buffA.shape[:-2]) + self.operatorRepresentation['batch_B'] = np.prod(buffB.shape[:-2]) self.operatorRepresentation['batch'] = max(self.operatorRepresentation['batch_A'], self.operatorRepresentation['batch_B']) @@ -1672,10 +1650,10 @@ def parseNodeCtxt(self, ), "Incompatible dimensions for input matrices. Broadcasting not yet supported for dimensions larger than 1 on one of the inputs, or equal dimensions between the 2." # Create flags for same dimension between each input matrix and the final batch dimension - self.operatorRepresentation['A_batched'] = (self.operatorRepresentation['batch'] == np.prod( - ctxt.lookup(node.inputs[0].name).shape[:-2])) + self.operatorRepresentation['A_batched'] = ( + self.operatorRepresentation['batch'] == self.operatorRepresentation['batch_A']) self.operatorRepresentation['W_batched'] = self.operatorRepresentation['B_batched'] = ( - self.operatorRepresentation['batch'] == np.prod(ctxt.lookup(node.inputs[1].name).shape[:-2])) + self.operatorRepresentation['batch'] == self.operatorRepresentation['batch_B']) return ctxt, ret @@ -2332,32 +2310,12 @@ def parseNode(self, node: gs.Node) -> bool: ]) if ret: - if 'rqs1_n_levels' in node.attrs: - self.operatorRepresentation['rqs1_n_levels'] = int(node.attrs['rqs1_n_levels'].values) - else: - self.operatorRepresentation['rqs1_n_levels'] = int(node.attrs['rqs1_n_levels_out'].values) - self.operatorRepresentation['rqs1_mul'] = int(node.attrs['rqs1_mul']) - self.operatorRepresentation['rqs1_add'] = int(node.attrs['rqs1_add']) - self.operatorRepresentation['rqs1_signed'] = int(node.attrs['rqs1_signed'].values) - self.operatorRepresentation['rqs1_log2D'] = int(math.log2(node.attrs['rqs1_div'].values)) - - if 'rqs2_n_levels' in node.attrs: - self.operatorRepresentation['rqs2_n_levels'] = int(node.attrs['rqs2_n_levels'].values) - else: - self.operatorRepresentation['rqs2_n_levels'] = int(node.attrs['rqs2_n_levels_out'].values) - self.operatorRepresentation['rqs2_mul'] = int(node.attrs['rqs2_mul']) - self.operatorRepresentation['rqs2_add'] = int(node.attrs['rqs2_add']) - self.operatorRepresentation['rqs2_signed'] = int(node.attrs['rqs2_signed'].values) - self.operatorRepresentation['rqs2_log2D'] = int(math.log2(node.attrs['rqs2_div'].values)) - - if 'rqsOut_n_levels' in node.attrs: - self.operatorRepresentation['rqsOut_n_levels'] = int(node.attrs['rqsOut_n_levels'].values) - else: - self.operatorRepresentation['rqsOut_n_levels'] = int(node.attrs['rqsOut_n_levels_out'].values) - self.operatorRepresentation['rqsOut_mul'] = int(node.attrs['rqsOut_mul']) - self.operatorRepresentation['rqsOut_add'] = int(node.attrs['rqsOut_add']) - self.operatorRepresentation['rqsOut_signed'] = int(node.attrs['rqsOut_signed'].values) - self.operatorRepresentation['rqsOut_log2D'] = int(math.log2(node.attrs['rqsOut_div'].values)) + self.operatorRepresentation.update(node.attrs) + + for tensor in ["rqs1", "rqs2", "rqsOut"]: + value = self.operatorRepresentation[f"{tensor}_div"] + assert isinstance(value, int) + self.operatorRepresentation[f"{tensor}_log2D"] = int(math.log2(value)) return ret @@ -2425,12 +2383,10 @@ def parseNode(self, node: gs.Node) -> bool: ]) if ret: - self.operatorRepresentation['scale'] = float(node.attrs['scale']) - self.operatorRepresentation['zero_point'] = float(node.attrs['zero_point']) - self.operatorRepresentation['bit_width'] = int(node.attrs['bit_width']) - - self.operatorRepresentation['signed'] = bool(node.attrs['signed']) - + self.operatorRepresentation['scale'] = node.attrs['scale'] + self.operatorRepresentation['zero_point'] = node.attrs['zero_point'] + self.operatorRepresentation['bit_width'] = node.attrs['bit_width'] + self.operatorRepresentation['signed'] = node.attrs['signed'] return ret def parseNodeCtxt(self, diff --git a/Deeploy/Targets/Generic/Templates/FloatGemmTemplate.py b/Deeploy/Targets/Generic/Templates/FloatGemmTemplate.py index 69bea8484..ab78e742d 100644 --- a/Deeploy/Targets/Generic/Templates/FloatGemmTemplate.py +++ b/Deeploy/Targets/Generic/Templates/FloatGemmTemplate.py @@ -21,8 +21,8 @@ ${M}, ${N}, ${O}, - ${transA}, - ${transB} + ${int(transA)}, + ${int(transB)} ); % if A_batched: diff --git a/Deeploy/Targets/Generic/Templates/GemmTemplate.py b/Deeploy/Targets/Generic/Templates/GemmTemplate.py index 62d760d15..371004a8e 100644 --- a/Deeploy/Targets/Generic/Templates/GemmTemplate.py +++ b/Deeploy/Targets/Generic/Templates/GemmTemplate.py @@ -56,8 +56,8 @@ def alignToContext(self, ctxt: NetworkContext, ${O}, ${alpha}, ${beta}, - ${transA}, - ${transB}, + ${int(transA)}, + ${int(transB)}, ${A_offset}, ${B_offset}, ${C_offset}, diff --git a/Deeploy/Targets/Generic/TopologyOptimizationPasses/Passes.py b/Deeploy/Targets/Generic/TopologyOptimizationPasses/Passes.py index b881529f7..09ed0b6c7 100644 --- a/Deeploy/Targets/Generic/TopologyOptimizationPasses/Passes.py +++ b/Deeploy/Targets/Generic/TopologyOptimizationPasses/Passes.py @@ -353,44 +353,49 @@ def __init__(self): super().__init__(graph, _split_add_fun, name) -def _extract_padding_fun_conv(graph: gs.Graph, match: Match, name: str, value = 0): +def _extract_padding_fun_conv(graph: gs.Graph, match: Match, name: str, value = 0) -> gs.Graph: + conv = list(match.nodes_map.values())[0] - matched_nodes = [m for k, m in match.nodes_map.items()] - conv = matched_nodes[0] - if 'pads' in conv.attrs and np.sum(conv.attrs['pads']) > 1: - pads = copy.deepcopy(conv.attrs['pads']) - shape = copy.deepcopy(conv.inputs[0].shape) - newPads = np.zeros(2 * len(shape)) - assert len(shape) - 2 == len(pads) / 2, "Conv padding dims do not match!" - newShape = shape + if 'pads' not in conv.attrs: + return graph - beginPads = pads[0:len(pads) // 2] - endPads = pads[len(pads) // 2:] - for idx, i in enumerate(beginPads): - newShape[2 + idx] = newShape[2 + idx] + i - newPads[2 + idx] = i + convPads = conv.attrs['pads'] - for idx, i in enumerate(endPads): - newShape[2 + idx] = newShape[2 + idx] + i - newPads[len(newPads) // 2 + 2 + idx] = i + if all(p == 0 for p in convPads): + return graph - newConvInput = gs.Variable(name + '_padded_input', dtype = np.float32, shape = newShape) - #valConst = gs.Constant('value', np.array(0)) - conv.attrs['pads'] = [0 for pad in conv.attrs['pads']] - newPad = gs.Node(op = 'Pad', - name = name + '_pad', - attrs = { - 'pads': newPads, - 'mode': 'constant', - 'value': value - }, - inputs = [conv.inputs[0]], - outputs = [newConvInput]) + inTensor = conv.inputs[0] + assert isinstance(inTensor, gs.Variable) + convShape = inTensor.shape - conv.inputs[0] = newConvInput - graph.nodes.append(newPad) - graph.cleanup().toposort() + beginConvPads = convPads[0:len(convPads) // 2] + endConvPads = convPads[len(convPads) // 2:] + + nonSpatialDimCount = len(convShape) - (len(convPads) // 2) + pads = [0] * nonSpatialDimCount + beginConvPads + [0] * nonSpatialDimCount + endConvPads + shape = [] + for dim, begin, end in zip(convShape, pads[:len(pads) // 2], pads[len(pads) // 2:]): + shape.append(begin + dim + end) + + paddedInput = gs.Variable(f"{name}_{inTensor.name}", dtype = np.float32, shape = shape) + + newPad = gs.Node(op = 'Pad', + name = name + '_pad', + attrs = { + 'pads': pads, + 'mode': 'constant', + 'value': value + }, + inputs = [conv.inputs[0]], + outputs = [paddedInput]) + + graph.nodes.append(newPad) + + conv.attrs['pads'] = [0] * len(convPads) + conv.inputs[0] = paddedInput + + graph.cleanup().toposort() return graph diff --git a/Deeploy/Targets/Generic/TypeCheckers.py b/Deeploy/Targets/Generic/TypeCheckers.py index 8f3a12ec8..d107bcc33 100644 --- a/Deeploy/Targets/Generic/TypeCheckers.py +++ b/Deeploy/Targets/Generic/TypeCheckers.py @@ -185,10 +185,8 @@ def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[ def _inferNumLevels(self, inputs: List[VariableBuffer], operatorRepresentation: OperatorRepresentation) -> List[int]: - return [ - 2**((self.input_types[0].referencedType.typeWidth) * 2) * - inputs[0].shape[-1 - operatorRepresentation['transA']] - ] + O = inputs[0].shape[-1] if not operatorRepresentation['transA'] else inputs[0].shape[-2] + return [2**((self.input_types[0].referencedType.typeWidth) * 2) * O] def _inferSignedness(self, inputs: List[VariableBuffer], operatorRepresentation: OperatorRepresentation) -> List[bool]: diff --git a/Deeploy/Targets/MemPool/Deployer.py b/Deeploy/Targets/MemPool/Deployer.py index 543132097..968787972 100644 --- a/Deeploy/Targets/MemPool/Deployer.py +++ b/Deeploy/Targets/MemPool/Deployer.py @@ -11,7 +11,7 @@ from Deeploy.CommonExtensions.OptimizationPasses.TopologyOptimizationPasses.DebugPasses import DebugPrintMergePass from Deeploy.CommonExtensions.OptimizationPasses.TopologyOptimizationPasses.LoweringOptimizationPasses import \ NCHWtoNHWCPass, TransposeMatmulInputsPass -from Deeploy.DeeployTypes import DeploymentPlatform, TopologyOptimizer +from Deeploy.DeeployTypes import DeploymentPlatform, OperatorDescriptor, TopologyOptimizer from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import TransposeConstOptPass, TransposeMergePass @@ -22,12 +22,13 @@ def __init__(self, deploymentPlatform: DeploymentPlatform, inputTypes: Dict[str, Type[Pointer]], loweringOptimizer: TopologyOptimizer, + operatorDescriptors: Dict[str, OperatorDescriptor], scheduler: Callable = lambda x: x, name: str = 'DeeployNetwork', default_channels_first: bool = True, deeployStateDir: str = "DeeployState", inputOffsets: Dict[str, int] = {}): - super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, scheduler, name, + super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, operatorDescriptors, scheduler, name, default_channels_first, deeployStateDir) self.inputOffsets = inputOffsets diff --git a/Deeploy/Targets/MemPool/Templates/GemmTemplate.py b/Deeploy/Targets/MemPool/Templates/GemmTemplate.py index e5d53bd25..54cc86f6a 100644 --- a/Deeploy/Targets/MemPool/Templates/GemmTemplate.py +++ b/Deeploy/Targets/MemPool/Templates/GemmTemplate.py @@ -127,8 +127,8 @@ def hoistTransientBuffers(self, ctxt: NetworkContext, ${O}, ${alpha}, ${beta}, - ${transA}, - ${transB}, + ${int(transA)}, + ${int(transB)}, ${A_offset}, ${B_offset}, ${C_offset}, diff --git a/Deeploy/Targets/MemPool/Templates/RQGemmTemplate.py b/Deeploy/Targets/MemPool/Templates/RQGemmTemplate.py index e6a42768e..f544841ac 100644 --- a/Deeploy/Targets/MemPool/Templates/RQGemmTemplate.py +++ b/Deeploy/Targets/MemPool/Templates/RQGemmTemplate.py @@ -145,8 +145,8 @@ def hoistTransientBuffers(self, ctxt: NetworkContext, ${O}, ${alpha}, ${beta}, - ${transA}, - ${transB}, + ${int(transA)}, + ${int(transB)}, ${mul}, ${add}, ${log2Dstring}, @@ -170,8 +170,8 @@ def hoistTransientBuffers(self, ctxt: NetworkContext, ${O}, ${alpha}, ${beta}, - ${transA}, - ${transB}, + ${int(transA)}, + ${int(transB)}, ${mul}, ${add}, ${log2Dstring}, diff --git a/Deeploy/Targets/MemPool/TopologyOptimizationPasses/Passes.py b/Deeploy/Targets/MemPool/TopologyOptimizationPasses/Passes.py index 49f317caa..46bad04ce 100644 --- a/Deeploy/Targets/MemPool/TopologyOptimizationPasses/Passes.py +++ b/Deeploy/Targets/MemPool/TopologyOptimizationPasses/Passes.py @@ -289,7 +289,7 @@ def get_constant_input_or_zeros(n: gs.Node, shape): name = name + "_sum", attrs = { 'axes': [1], - "keepdims": "0" + "keepdims": 0 }) mhsa_out[0].shape = [_output.shape[0]] + [int(H)] + _output.shape[1:] diff --git a/Deeploy/Targets/Neureka/Deployer.py b/Deeploy/Targets/Neureka/Deployer.py index e9b966569..0dfc07d3e 100644 --- a/Deeploy/Targets/Neureka/Deployer.py +++ b/Deeploy/Targets/Neureka/Deployer.py @@ -9,7 +9,7 @@ from Deeploy.AbstractDataTypes import Pointer from Deeploy.CommonExtensions.OptimizationPasses.TopologyOptimizationPasses.LoweringOptimizationPasses import \ NeurekaNCHWtoNHWCPass, PULPNCHWtoNHWCPass -from Deeploy.DeeployTypes import DeploymentPlatform, TopologyOptimizer +from Deeploy.DeeployTypes import DeploymentPlatform, OperatorDescriptor, TopologyOptimizer from Deeploy.Targets.Neureka.TopologyOptimizationPasses.Passes import ConvEngineDiscolorationPass, \ NeurekaOptimizationPass from Deeploy.Targets.PULPOpen.Deployer import PULPDeployer @@ -22,12 +22,13 @@ def __init__(self, deploymentPlatform: DeploymentPlatform, inputTypes: Dict[str, Type[Pointer]], loweringOptimizer: TopologyOptimizer, + operatorDescriptors: Dict[str, OperatorDescriptor], scheduler: Callable = lambda graph: list(graph.nodes), name: str = 'DeeployNetwork', default_channels_first = False, deeployStateDir: str = "DeeployStateDir", inputOffsets = {}): - super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, scheduler, name, + super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, operatorDescriptors, scheduler, name, default_channels_first, deeployStateDir, inputOffsets) if self.Platform.engines[0].enable3x3: diff --git a/Deeploy/Targets/Neureka/Parsers.py b/Deeploy/Targets/Neureka/Parsers.py index 3c564c10b..1d3db0d88 100644 --- a/Deeploy/Targets/Neureka/Parsers.py +++ b/Deeploy/Targets/Neureka/Parsers.py @@ -18,7 +18,7 @@ def parseNode(self, node: gs.Node) -> bool: if not all([ # No dilation support - self.operatorRepresentation['dilations'] == [1, 1], + self.operatorRepresentation['dilations'] == (1, 1), # Channels have to be last 'channels_first' in self.operatorRepresentation and not self.operatorRepresentation['channels_first'], # Expect "weight_offset" attribute in the node @@ -129,7 +129,7 @@ def parseNode(self, node: gs.Node) -> bool: return False if not all([ - self.operatorRepresentation['kernel_shape'] == [1, 1], + self.operatorRepresentation['kernel_shape'] == (1, 1), self.operatorRepresentation['group'] == 1, ]): return False diff --git a/Deeploy/Targets/PULPOpen/Deployer.py b/Deeploy/Targets/PULPOpen/Deployer.py index 86bf02e57..17412c8da 100644 --- a/Deeploy/Targets/PULPOpen/Deployer.py +++ b/Deeploy/Targets/PULPOpen/Deployer.py @@ -12,7 +12,8 @@ from Deeploy.CommonExtensions.OptimizationPasses.BindingsOptimizationPasses.AutoTranspose import AutoTransposeMergePass from Deeploy.CommonExtensions.OptimizationPasses.TopologyOptimizationPasses.LoweringOptimizationPasses import \ PULPNCHWtoNHWCPass, RemoveGlobalOutputReshapePass, TransposeMatmulInputsPass -from Deeploy.DeeployTypes import ConstantBuffer, DeploymentPlatform, NodeTemplate, TopologyOptimizer, VariableBuffer +from Deeploy.DeeployTypes import ConstantBuffer, DeploymentPlatform, NodeTemplate, OperatorDescriptor, \ + TopologyOptimizer, VariableBuffer from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import ReshapeConstOptPass, TransposeConstOptPass, \ TransposeMergePass, TransposeNoPermOptPass, TransposeSplitPass from Deeploy.Targets.PULPOpen.TopologyOptimizationPasses.Passes import RQAddTransposeSquashPass @@ -33,6 +34,7 @@ def __init__(self, deploymentPlatform: DeploymentPlatform, inputTypes: Dict[str, Type[Pointer]], loweringOptimizer: TopologyOptimizer, + operatorDescriptors: Dict[str, OperatorDescriptor], scheduler: Callable = lambda x: x, name: str = 'DeeployNetwork', default_channels_first = False, @@ -42,6 +44,7 @@ def __init__(self, deploymentPlatform, inputTypes, loweringOptimizer, + operatorDescriptors, scheduler, name, default_channels_first = default_channels_first, diff --git a/Deeploy/Targets/PULPOpen/Parsers.py b/Deeploy/Targets/PULPOpen/Parsers.py index e94af6e42..51b26ae54 100644 --- a/Deeploy/Targets/PULPOpen/Parsers.py +++ b/Deeploy/Targets/PULPOpen/Parsers.py @@ -133,13 +133,9 @@ def parseNode(self, node: gs.Node) -> (bool): self.operatorRepresentation['padding_y_bottom'] = int(self.operatorRepresentation['pads'][1]) self.operatorRepresentation['stride_y'] = int(self.operatorRepresentation['strides'][0]) - if 'n_levels' in node.attrs: - self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels'].values) - else: - self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels_out'].values) - - self.operatorRepresentation['signed'] = int(node.attrs['signed'].values) - self.operatorRepresentation['log2D'] = int(math.log2(node.attrs['div'].values)) + self.operatorRepresentation['n_levels'] = node.attrs['n_levels'] + self.operatorRepresentation['signed'] = node.attrs['signed'] + self.operatorRepresentation['log2D'] = int(math.log2(node.attrs['div'])) return ret def parseNodeCtxt(self, @@ -206,12 +202,9 @@ def parseNode(self, node: gs.Node) -> (bool): self.operatorRepresentation['stride_x'] = int(self.operatorRepresentation['strides'][0]) self.operatorRepresentation['stride_y'] = int(self.operatorRepresentation['strides'][1]) - if 'n_levels' in node.attrs: - self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels'].values) - else: - self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels_out'].values) - self.operatorRepresentation['signed'] = int(node.attrs['signed'].values) - self.operatorRepresentation['log2D'] = int(math.log2(node.attrs['div'].values)) + self.operatorRepresentation['n_levels'] = node.attrs['n_levels'] + self.operatorRepresentation['signed'] = node.attrs['signed'] + self.operatorRepresentation['log2D'] = int(math.log2(node.attrs['div'])) return ret return False diff --git a/Deeploy/Targets/PULPOpen/Templates/FloatGemmTemplate.py b/Deeploy/Targets/PULPOpen/Templates/FloatGemmTemplate.py index f4c22b2c2..21044a5ec 100644 --- a/Deeploy/Targets/PULPOpen/Templates/FloatGemmTemplate.py +++ b/Deeploy/Targets/PULPOpen/Templates/FloatGemmTemplate.py @@ -20,8 +20,8 @@ ${M}, ${N}, ${O}, - ${transA}, - ${transB} + ${int(transA)}, + ${int(transB)} ); ref_${data_out}_${A} += ${M} * ${N}; diff --git a/Deeploy/Targets/PULPOpen/TileConstraints/MatMulTileConstraint.py b/Deeploy/Targets/PULPOpen/TileConstraints/MatMulTileConstraint.py index 8b795be88..a9259a15c 100644 --- a/Deeploy/Targets/PULPOpen/TileConstraints/MatMulTileConstraint.py +++ b/Deeploy/Targets/PULPOpen/TileConstraints/MatMulTileConstraint.py @@ -32,13 +32,13 @@ def addGeometricalConstraint(tilerModel: TilerModel, parseDict: Dict, ctxt: Netw tensorsShapeLen = len(bufferA.shape) AFirstDimVar = tilerModel.getTensorDimVar(tensorName = bufferA.name, - dimIdx = (tensorsShapeLen - 2) + parseDict['transA']) + dimIdx = (tensorsShapeLen - 2) + int(parseDict['transA'])) ASecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferA.name, - dimIdx = (tensorsShapeLen - 1) - parseDict['transA']) + dimIdx = (tensorsShapeLen - 1) - int(parseDict['transA'])) BFirstDimVar = tilerModel.getTensorDimVar(tensorName = bufferB.name, - dimIdx = (tensorsShapeLen - 2) + parseDict['transB']) + dimIdx = (tensorsShapeLen - 2) + int(parseDict['transB'])) BSecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferB.name, - dimIdx = (tensorsShapeLen - 1) - parseDict['transB']) + dimIdx = (tensorsShapeLen - 1) - int(parseDict['transB'])) outputFirstDimVar = tilerModel.getTensorDimVar(tensorName = outputBuffer.name, dimIdx = (tensorsShapeLen - 2)) outputSecondDimVar = tilerModel.getTensorDimVar(tensorName = outputBuffer.name, dimIdx = (tensorsShapeLen - 1)) diff --git a/Deeploy/Targets/Snitch/Deployer.py b/Deeploy/Targets/Snitch/Deployer.py index 7c3922a6b..4daab3b9f 100644 --- a/Deeploy/Targets/Snitch/Deployer.py +++ b/Deeploy/Targets/Snitch/Deployer.py @@ -10,7 +10,7 @@ from Deeploy.CommonExtensions.NetworkDeployers.SignPropDeployer import SignPropDeployer from Deeploy.CommonExtensions.OptimizationPasses.TopologyOptimizationPasses.LoweringOptimizationPasses import \ NCHWtoNHWCPass, RemoveGlobalOutputReshapePass, TransposeMatmulInputsPass -from Deeploy.DeeployTypes import DeploymentPlatform, TopologyOptimizer +from Deeploy.DeeployTypes import DeploymentPlatform, OperatorDescriptor, TopologyOptimizer from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import ReshapeConstOptPass, TransposeConstOptPass, \ TransposeMergePass, TransposeSplitPass @@ -22,6 +22,7 @@ def __init__(self, deploymentPlatform: DeploymentPlatform, inputTypes: Dict[str, Type[Pointer]], loweringOptimizer: TopologyOptimizer, + operatorDescriptors: Dict[str, OperatorDescriptor], scheduler: Callable = lambda x: x, name: str = 'DeeployNetwork', default_channels_first = False, @@ -31,6 +32,7 @@ def __init__(self, deploymentPlatform, inputTypes, loweringOptimizer, + operatorDescriptors, scheduler, name, default_channels_first = default_channels_first, diff --git a/Deeploy/Targets/Snitch/Parsers.py b/Deeploy/Targets/Snitch/Parsers.py index 005199468..51b32db21 100644 --- a/Deeploy/Targets/Snitch/Parsers.py +++ b/Deeploy/Targets/Snitch/Parsers.py @@ -18,9 +18,7 @@ def parseNode(self, node: gs.Node) -> bool: if not ret: return False - if not all([ - self.operatorRepresentation['transA'] == 0, - ]): + if self.operatorRepresentation['transA']: return False return True @@ -50,9 +48,7 @@ def parseNode(self, node: gs.Node) -> bool: if not ret: return False - if not all([ - self.operatorRepresentation['transA'] == 0, - ]): + if self.operatorRepresentation['transA']: return False return True diff --git a/Deeploy/Targets/SoftHier/Deployer.py b/Deeploy/Targets/SoftHier/Deployer.py index e4ab37f29..4827ba83b 100644 --- a/Deeploy/Targets/SoftHier/Deployer.py +++ b/Deeploy/Targets/SoftHier/Deployer.py @@ -8,7 +8,7 @@ from Deeploy.AbstractDataTypes import Pointer from Deeploy.CommonExtensions.NetworkDeployers.SignPropDeployer import SignPropDeployer -from Deeploy.DeeployTypes import DeploymentPlatform, TopologyOptimizer +from Deeploy.DeeployTypes import DeploymentPlatform, OperatorDescriptor, TopologyOptimizer class SoftHierDeployer(SignPropDeployer): @@ -18,12 +18,13 @@ def __init__(self, deploymentPlatform: DeploymentPlatform, inputTypes: Dict[str, Type[Pointer]], loweringOptimizer: TopologyOptimizer, + operatorDescriptors: Dict[str, OperatorDescriptor], scheduler: Callable = lambda x: x, name: str = 'DeeployNetwork', default_channels_first: bool = True, deeployStateDir: str = "DeeployState", inputOffsets: Dict[str, int] = {}): - super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, scheduler, name, + super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, operatorDescriptors, scheduler, name, default_channels_first, deeployStateDir) self.inputOffsets = inputOffsets diff --git a/DeeployTest/testMemoryLevelExtension.py b/DeeployTest/testMemoryLevelExtension.py index 0e1ed6cc4..a6a1cf37d 100644 --- a/DeeployTest/testMemoryLevelExtension.py +++ b/DeeployTest/testMemoryLevelExtension.py @@ -18,6 +18,7 @@ from Deeploy.MemoryLevelExtension.MemoryLevels import MemoryHierarchy, MemoryLevel from Deeploy.MemoryLevelExtension.NetworkDeployers.MemoryLevelDeployer import MemoryDeployerWrapper, \ MemoryLevelAwareSignPropDeployer +from Deeploy.OperatorDescriptor import defaultOperatorDescriptors from Deeploy.Targets.CortexM.Platform import CMSISEngine, CMSISMapping, CMSISOptimizer, CMSISPlatform from Deeploy.Targets.Generic.Platform import GenericEngine, GenericMapping, GenericOptimizer, GenericPlatform from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import TransposeConstOptPass, TransposeMergePass @@ -83,6 +84,7 @@ MockPlatform, inputTypes, CMSISOptimizer, + defaultOperatorDescriptors, defaultScheduler, name = "DeeployNetwork", deeployStateDir = _DEEPLOYSTATEDIR, @@ -106,6 +108,7 @@ MockPlatform, inputTypes, MemPoolOptimizer, + defaultOperatorDescriptors, defaultScheduler, name = "DeeployNetwork", deeployStateDir = _DEEPLOYSTATEDIR, @@ -121,6 +124,7 @@ MockPlatform, inputTypes, GenericOptimizer, + defaultOperatorDescriptors, defaultScheduler, name = "DeeployNetworkMock", deeployStateDir = _DEEPLOYSTATEDIRMOCK, @@ -136,6 +140,7 @@ MockPlatform, inputTypes, PULPOptimizer, + defaultOperatorDescriptors, defaultScheduler, name = "DeeployNetworkMock", deeployStateDir = _DEEPLOYSTATEDIRMOCK, diff --git a/DeeployTest/testUtils/dmaUtils.py b/DeeployTest/testUtils/dmaUtils.py index 3266ce512..ba2f6e176 100644 --- a/DeeployTest/testUtils/dmaUtils.py +++ b/DeeployTest/testUtils/dmaUtils.py @@ -10,8 +10,8 @@ from Deeploy.AbstractDataTypes import BaseType, Pointer, PointerClass from Deeploy.CommonExtensions.DataTypes import minimalIntegerType -from Deeploy.DeeployTypes import NetworkContext, NetworkDeployer, NodeParser, NodeTemplate, NodeTypeChecker, \ - ONNXLayer, OperatorRepresentation, VariableBuffer +from Deeploy.DeeployTypes import IoDesc, NetworkContext, NetworkDeployer, NodeParser, NodeTemplate, NodeTypeChecker, \ + ONNXLayer, OperatorDescriptor, OperatorRepresentation, VariableBuffer from Deeploy.MemoryLevelExtension.MemoryLevels import MemoryHierarchy, MemoryLevel from Deeploy.MemoryLevelExtension.NetworkDeployers.MemoryLevelDeployer import MemoryDeployerWrapper, \ MemoryPlatformWrapper @@ -279,6 +279,17 @@ def defaultScheduler(graph: gs.Graph) -> List[List[gs.Node]]: return [[node] for node in graph.nodes] +memcpyDesc = OperatorDescriptor( + inputDescriptor = IoDesc("src"), + outputDescriptor = IoDesc("dest"), + attrDescriptors = [], +) + +dmaTestOperatorDescriptors = { + "Memcpy": memcpyDesc, +} + + def setup_pulp_deployer(defaultMemory: str, targetMemory: str, graph: gs.Graph, inputTypes: Dict[str, Type[Pointer]], doublebuffer: bool, deeployStateDir: str) -> NetworkDeployer: L3 = MemoryLevel(name = "L3", neighbourNames = ["L2"], size = 64000000) @@ -299,6 +310,7 @@ def setup_pulp_deployer(defaultMemory: str, targetMemory: str, graph: gs.Graph, platform, inputTypes, PULPOptimizer, + dmaTestOperatorDescriptors, defaultScheduler, default_channels_first = True, deeployStateDir = deeployStateDir) @@ -340,6 +352,7 @@ def setup_snitch_deployer(defaultMemory: str, targetMemory: str, graph: gs.Graph platform, inputTypes, SnitchOptimizer, + dmaTestOperatorDescriptors, defaultScheduler, deeployStateDir = deeployStateDir) memoryLevelAnnotationPasses = [AnnotateIOMemoryLevel(defaultMemory), AnnotateDefaultMemoryLevel(memoryHierarchy)] diff --git a/DeeployTest/testUtils/platformMapping.py b/DeeployTest/testUtils/platformMapping.py index 48c577790..d02c3da64 100644 --- a/DeeployTest/testUtils/platformMapping.py +++ b/DeeployTest/testUtils/platformMapping.py @@ -7,9 +7,10 @@ import onnx_graphsurgeon as gs from Deeploy.AbstractDataTypes import Pointer -from Deeploy.DeeployTypes import DeploymentPlatform, NetworkDeployer, TopologyOptimizer +from Deeploy.DeeployTypes import DeploymentPlatform, NetworkDeployer, OperatorDescriptor, TopologyOptimizer from Deeploy.MemoryLevelExtension.MemoryLevels import MemoryHierarchy, MemoryLevel from Deeploy.MemoryLevelExtension.NetworkDeployers.MemoryLevelDeployer import MemoryPlatform, MemoryPlatformWrapper +from Deeploy.OperatorDescriptor import defaultOperatorDescriptors from Deeploy.Targets.Chimera.Deployer import ChimeraDeployer from Deeploy.Targets.Chimera.Platform import ChimeraOptimizer, ChimeraPlatform from Deeploy.Targets.CortexM.Deployer import CMSISDeployer @@ -93,6 +94,7 @@ def mapDeployer(platform: DeploymentPlatform, graph: gs.Graph, inputTypes: Dict[str, Type[Pointer]], loweringOptimizer: Optional[TopologyOptimizer] = None, + operatorDescriptors: Optional[Dict[str, OperatorDescriptor]] = None, scheduler: Optional[Callable] = None, name: Optional[str] = None, default_channels_first: Optional[bool] = None, @@ -108,6 +110,9 @@ def mapDeployer(platform: DeploymentPlatform, if name is None: name = "DeeployNetwork" + if operatorDescriptors is None: + operatorDescriptors = defaultOperatorDescriptors + if isinstance(platform, CMSISPlatform): if loweringOptimizer is None: @@ -120,6 +125,7 @@ def mapDeployer(platform: DeploymentPlatform, platform, inputTypes, loweringOptimizer, + operatorDescriptors, scheduler, name = name, default_channels_first = default_channels_first, @@ -138,6 +144,7 @@ def mapDeployer(platform: DeploymentPlatform, platform, inputTypes, loweringOptimizer, + operatorDescriptors, scheduler, name = name, default_channels_first = default_channels_first, @@ -156,6 +163,7 @@ def mapDeployer(platform: DeploymentPlatform, platform, inputTypes, loweringOptimizer, + operatorDescriptors, scheduler, name = name, default_channels_first = default_channels_first, @@ -177,6 +185,7 @@ def mapDeployer(platform: DeploymentPlatform, platform, inputTypes, loweringOptimizer, + operatorDescriptors, scheduler, name = name, default_channels_first = default_channels_first, @@ -195,6 +204,7 @@ def mapDeployer(platform: DeploymentPlatform, platform, inputTypes, loweringOptimizer, + operatorDescriptors, scheduler, name = name, default_channels_first = default_channels_first, @@ -212,6 +222,7 @@ def mapDeployer(platform: DeploymentPlatform, platform, inputTypes, loweringOptimizer, + operatorDescriptors, scheduler, name = name, default_channels_first = default_channels_first, @@ -228,6 +239,7 @@ def mapDeployer(platform: DeploymentPlatform, platform, inputTypes, loweringOptimizer, + operatorDescriptors, scheduler, name = name, default_channels_first = default_channels_first, @@ -244,6 +256,7 @@ def mapDeployer(platform: DeploymentPlatform, platform, inputTypes, loweringOptimizer, + operatorDescriptors, scheduler, name = name, default_channels_first = default_channels_first,