Skip to content

Commit 03ab4c5

Browse files
authored
[optimizer] Replace value.nbytes with value.size (#2399)
To unify the size limitation in terms of the scale. Passes use tensor size: https://github.com/onnx/ir-py/blob/a833ab1e178c70046a414b96c1aafbf78a9b4e17/src/onnx_ir/passes/common/constant_manipulation.py#L124 while optimizer uses nbytes, which could potentially confuse users.
1 parent 038cac7 commit 03ab4c5

File tree

3 files changed

+13
-16
lines changed

3 files changed

+13
-16
lines changed

onnxscript/optimizer/_constant_folding.py

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@
1919
import onnxscript.utils.utils as utils
2020
from onnxscript.ir import _tape
2121

22-
DEFAULT_CONSTANT_FOLD_INPUT_SIZE_LIMIT = 1024
22+
DEFAULT_CONSTANT_FOLD_INPUT_SIZE_LIMIT = 512
2323

24-
DEFAULT_CONSTANT_FOLD_OUTPUT_SIZE_LIMIT = 1024 * 1024
24+
DEFAULT_CONSTANT_FOLD_OUTPUT_SIZE_LIMIT = 512 * 512
2525

2626

2727
_NON_DETERMINISTIC_OPS = frozenset(
@@ -944,21 +944,21 @@ def new_constant(self, node: ir.Node, value) -> ir.Node | None:
944944
tensor.name = irvalue.name
945945
irvalue.const_value = tensor
946946

947-
if value.nbytes > self.output_size_limit:
947+
if value.size > self.output_size_limit:
948948
# Handle examples like Transpose(weight) to be folded even if the size is large,
949949
# as long as weight has no other uses. This won't increase model size.
950950
removed_input_size = 0
951951
for input in node.inputs:
952952
if (input is not None) and (len(input.uses()) == 1):
953953
array = _get_numpy_value(input)
954954
if array is not None:
955-
removed_input_size += array.nbytes
956-
increased_size = value.nbytes - removed_input_size
955+
removed_input_size += array.size
956+
increased_size = value.size - removed_input_size
957957
if increased_size > 0:
958958
logger.info(
959959
"Skip storing constant folded nvalue %s due to large size %s.",
960960
irvalue.name,
961-
value.nbytes,
961+
value.size,
962962
)
963963
return None
964964

@@ -1029,9 +1029,8 @@ def process_node(self, node: ir.Node) -> Replacement | None:
10291029
return None
10301030

10311031
input_tensors = [x.const_value if x is not None else None for x in node.inputs]
1032-
10331032
if any(
1034-
tensor.nbytes > self.input_size_limit
1033+
tensor.size > self.input_size_limit
10351034
for tensor in input_tensors
10361035
if tensor is not None
10371036
):
@@ -1048,7 +1047,7 @@ def process_node(self, node: ir.Node) -> Replacement | None:
10481047
# Skip folding large tensors
10491048
if logger.isEnabledFor(logging.DEBUG):
10501049
input_sizes = [
1051-
tensor.nbytes for tensor in input_tensors if tensor is not None
1050+
tensor.size for tensor in input_tensors if tensor is not None
10521051
]
10531052
logger.debug(
10541053
"Skipping constant folding for node %s due to large input size: %s",
@@ -1190,10 +1189,10 @@ def fold_constants(
11901189
model: The ONNX model to optimize.
11911190
onnx_shape_inference: Whether to enable ONNX shape inference during
11921191
constant folding. Defaults to False.
1193-
input_size_limit: The maximum size (in bytes) of input tensors
1192+
input_size_limit: The maximum size of input tensors
11941193
that can be considered for constant folding. Defaults to
11951194
`DEFAULT_CONSTANT_FOLD_INPUT_SIZE_LIMIT`.
1196-
output_size_limit: The maximum size (in bytes) of output tensors
1195+
output_size_limit: The maximum size of output tensors
11971196
that can be stored after constant folding. Defaults to
11981197
`DEFAULT_CONSTANT_FOLD_OUTPUT_SIZE_LIMIT`.
11991198
always_fold_ops: A collection of op types that should always be folded,

onnxscript/optimizer/_constant_folding_test.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -552,15 +552,13 @@ def test_input_size_limit(self):
552552
w.const_value = ir.tensor(np.random.random((256, 256)).astype(np.float32))
553553

554554
# Input size limit will prevent folding of Mul op
555-
optimized = self._fold(model, input_size_limit=3 * 256 * 256)
555+
optimized = self._fold(model, onnx_shape_inference=False, input_size_limit=128 * 128)
556556
ops = [node.op_type for node in optimized.graph]
557557
self.assertEqual(ops, ["Mul", "Add"])
558558

559559
# Input size limit will allow folding of Mul op
560560
# Since there is no increase in model-size, output-size is not a concern.
561-
optimized = self._fold(
562-
model, input_size_limit=4 * 256 * 256, output_size_limit=4 * 256 * 256
563-
)
561+
optimized = self._fold(model, input_size_limit=256 * 256, output_size_limit=256 * 256)
564562
ops = [node.op_type for node in optimized.graph]
565563
self.assertEqual(ops, ["Constant", "Add"])
566564

onnxscript/version_converter/_c_api_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ def call_onnx_api(func: Callable[[onnx.ModelProto], _R], model: ir.Model) -> _R:
5151
initializer.dtype = initializer.const_value.dtype
5252
if initializer not in model.graph.inputs:
5353
model.graph.inputs.append(initializer)
54-
if initializer.const_value.nbytes > _BIG_TENSOR_SIZE_LIMIT:
54+
if initializer.const_value.size > _BIG_TENSOR_SIZE_LIMIT:
5555
# Temporarily remove the initializer value to reduce model size
5656
# for onnx.shape_inference
5757
initializer.const_value = None

0 commit comments

Comments
 (0)