[optimizer] Replace value.nbytes with value.size (#2399)

titaiwangms · web-flow · commit 03ab4c5e284c · 2025-06-19T11:40:16.000-07:00
To unify the size limitation in terms of the scale. Passes use tensor size: https://github.com/onnx/ir-py/blob/a833ab1e178c70046a414b96c1aafbf78a9b4e17/src/onnx_ir/passes/common/constant_manipulation.py#L124 while optimizer uses nbytes, which could potentially confuse users.
diff --git a/onnxscript/optimizer/_constant_folding.py b/onnxscript/optimizer/_constant_folding.py
@@ -19,9 +19,9 @@
 import onnxscript.utils.utils as utils
 from onnxscript.ir import _tape
 
-DEFAULT_CONSTANT_FOLD_INPUT_SIZE_LIMIT = 1024
+DEFAULT_CONSTANT_FOLD_INPUT_SIZE_LIMIT = 512
 
-DEFAULT_CONSTANT_FOLD_OUTPUT_SIZE_LIMIT = 1024 * 1024
+DEFAULT_CONSTANT_FOLD_OUTPUT_SIZE_LIMIT = 512 * 512
 
 
 _NON_DETERMINISTIC_OPS = frozenset(
@@ -944,21 +944,21 @@ def new_constant(self, node: ir.Node, value) -> ir.Node | None:
         tensor.name = irvalue.name
         irvalue.const_value = tensor
 
-        if value.nbytes > self.output_size_limit:
+        if value.size > self.output_size_limit:
             # Handle examples like Transpose(weight) to be folded even if the size is large,
             # as long as weight has no other uses. This won't increase model size.
             removed_input_size = 0
             for input in node.inputs:
                 if (input is not None) and (len(input.uses()) == 1):
                     array = _get_numpy_value(input)
                     if array is not None:
-                        removed_input_size += array.nbytes
-            increased_size = value.nbytes - removed_input_size
+                        removed_input_size += array.size
+            increased_size = value.size - removed_input_size
             if increased_size > 0:
                 logger.info(
                     "Skip storing constant folded nvalue %s due to large size %s.",
                     irvalue.name,
-                    value.nbytes,
+                    value.size,
                 )
                 return None
 
@@ -1029,9 +1029,8 @@ def process_node(self, node: ir.Node) -> Replacement | None:
             return None
 
         input_tensors = [x.const_value if x is not None else None for x in node.inputs]
-
         if any(
-            tensor.nbytes > self.input_size_limit
+            tensor.size > self.input_size_limit
             for tensor in input_tensors
             if tensor is not None
         ):
@@ -1048,7 +1047,7 @@ def process_node(self, node: ir.Node) -> Replacement | None:
                 # Skip folding large tensors
                 if logger.isEnabledFor(logging.DEBUG):
                     input_sizes = [
-                        tensor.nbytes for tensor in input_tensors if tensor is not None
+                        tensor.size for tensor in input_tensors if tensor is not None
                     ]
                     logger.debug(
                         "Skipping constant folding for node %s due to large input size: %s",
@@ -1190,10 +1189,10 @@ def fold_constants(
         model: The ONNX model to optimize.
         onnx_shape_inference: Whether to enable ONNX shape inference during
             constant folding. Defaults to False.
-        input_size_limit: The maximum size (in bytes) of input tensors
+        input_size_limit: The maximum size of input tensors
             that can be considered for constant folding. Defaults to
             `DEFAULT_CONSTANT_FOLD_INPUT_SIZE_LIMIT`.
-        output_size_limit: The maximum size (in bytes) of output tensors
+        output_size_limit: The maximum size of output tensors
             that can be stored after constant folding. Defaults to
             `DEFAULT_CONSTANT_FOLD_OUTPUT_SIZE_LIMIT`.
         always_fold_ops: A collection of op types that should always be folded,
diff --git a/onnxscript/optimizer/_constant_folding_test.py b/onnxscript/optimizer/_constant_folding_test.py
@@ -552,15 +552,13 @@ def test_input_size_limit(self):
         w.const_value = ir.tensor(np.random.random((256, 256)).astype(np.float32))
 
         # Input size limit will prevent folding of Mul op
-        optimized = self._fold(model, input_size_limit=3 * 256 * 256)
+        optimized = self._fold(model, onnx_shape_inference=False, input_size_limit=128 * 128)
         ops = [node.op_type for node in optimized.graph]
         self.assertEqual(ops, ["Mul", "Add"])
 
         # Input size limit will allow folding of Mul op
         # Since there is no increase in model-size, output-size is not a concern.
-        optimized = self._fold(
-            model, input_size_limit=4 * 256 * 256, output_size_limit=4 * 256 * 256
-        )
+        optimized = self._fold(model, input_size_limit=256 * 256, output_size_limit=256 * 256)
         ops = [node.op_type for node in optimized.graph]
         self.assertEqual(ops, ["Constant", "Add"])
 
diff --git a/onnxscript/version_converter/_c_api_utils.py b/onnxscript/version_converter/_c_api_utils.py
@@ -51,7 +51,7 @@ def call_onnx_api(func: Callable[[onnx.ModelProto], _R], model: ir.Model) -> _R:
             initializer.dtype = initializer.const_value.dtype
         if initializer not in model.graph.inputs:
             model.graph.inputs.append(initializer)
-        if initializer.const_value.nbytes > _BIG_TENSOR_SIZE_LIMIT:
+        if initializer.const_value.size > _BIG_TENSOR_SIZE_LIMIT:
             # Temporarily remove the initializer value to reduce model size
             # for onnx.shape_inference
             initializer.const_value = None