microsoft · jchen351 · Mar 28, 2025 · Mar 28, 2025 · Mar 31, 2025 · Mar 31, 2025
diff --git a/onnxruntime/python/tools/transformers/models/torch_export_patches/__init__.py b/onnxruntime/python/tools/transformers/models/torch_export_patches/__init__.py
@@ -1,12 +1,21 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.  See License.txt in the project root for
+# license information.
+# --------------------------------------------------------------------------
+
 from dataclasses import fields, is_dataclass
 from typing import Any
 
 import numpy as np
 import packaging.version as pv
 import torch
+from onnx import TensorProto
+from onnx.helper import np_dtype_to_tensor_dtype
 from transformers import __version__ as transformers_version
 from transformers.cache_utils import DynamicCache, EncoderDecoderCache
 
+from .cache_helper import make_dynamic_cache, make_encoder_decoder_cache
 from .onnx_export_errors import (
     bypass_export_some_errors,
     register_additional_serialization_functions,
@@ -32,6 +41,52 @@ def is_torchdynamo_exporting() -> bool:
             return False
 
 
+def torch_dtype_to_onnx_dtype(to: torch.dtype) -> int:
+    """
+    Converts a torch dtype into a onnx element type.
+
+    :param to: torch dtype
+    :return: onnx type
+    """
+    import torch
+
+    if to == torch.float32:
+        return TensorProto.FLOAT
+    if to == torch.float16:
+        return TensorProto.FLOAT16
+    if to == torch.bfloat16:
+        return TensorProto.BFLOAT16
+    if to == torch.float64:
+        return TensorProto.DOUBLE
+    if to == torch.int64:
+        return TensorProto.INT64
+    if to == torch.int32:
+        return TensorProto.INT32
+    if to == torch.uint64:
+        return TensorProto.UINT64
+    if to == torch.uint32:
+        return TensorProto.UINT32
+    if to == torch.bool:
+        return TensorProto.BOOL
+    if to == torch.SymInt:
+        return TensorProto.INT64
+    if to == torch.int16:
+        return TensorProto.INT16
+    if to == torch.uint16:
+        return TensorProto.UINT16
+    if to == torch.int8:
+        return TensorProto.INT8
+    if to == torch.uint8:
+        return TensorProto.UINT8
+    if to == torch.SymFloat:
+        return TensorProto.FLOAT
+    if to == torch.complex64:
+        return TensorProto.COMPLEX64
+    if to == torch.complex128:
+        return TensorProto.COMPLEX128
+    raise NotImplementedError(f"Unable to convert torch dtype {to!r} to onnx dtype.")
+
+
 def string_type(
     obj: Any,
     with_shape: bool = False,
@@ -178,8 +233,6 @@ def string_type(
         return f"dict({s})"
     # array
     if isinstance(obj, np.ndarray):
-        from .onnx_helper import np_dtype_to_tensor_dtype
-
         if with_min_max:
             s = string_type(obj, with_shape=with_shape)
             if len(obj.shape) == 0:
@@ -257,16 +310,12 @@ def string_type(
 
     # Tensors
     if isinstance(obj, torch._subclasses.fake_tensor.FakeTensor):
-        from .onnx_helper import torch_dtype_to_onnx_dtype
-
         i = torch_dtype_to_onnx_dtype(obj.dtype)
         prefix = ("G" if obj.get_device() >= 0 else "C") if with_device else ""
         if not with_shape:
             return f"{prefix}F{i}r{len(obj.shape)}"
         return f"{prefix}F{i}s{'x'.join(map(str, obj.shape))}"
     if isinstance(obj, torch.Tensor):
-        from .onnx_helper import torch_dtype_to_onnx_dtype
-
         if with_min_max:
             s = string_type(obj, with_shape=with_shape, with_device=with_device)
             if len(obj.shape) == 0:
@@ -307,6 +356,57 @@ def string_type(
             return f"OV{dt}s{'x'.join(map(str, shape))}"
         return f"OV{dt}r{len(shape)}"
 
+    if obj.__class__.__name__ == "MambaCache":
+        c = string_type(
+            obj.conv_states,
+            with_shape=with_shape,
+            with_min_max=with_min_max,
+            with_device=with_device,
+            limit=limit,
+        )
+        d = string_type(
+            obj.ssm_states,
+            with_shape=with_shape,
+            with_min_max=with_min_max,
+            with_device=with_device,
+            limit=limit,
+        )
+        return f"MambaCache(conv_states={c}, ssm_states={d})"
+
+    if obj.__class__.__name__ == "DynamicCache":
+        kc = string_type(
+            obj.key_cache,
+            with_shape=with_shape,
+            with_min_max=with_min_max,
+            with_device=with_device,
+            limit=limit,
+        )
+        vc = string_type(
+            obj.value_cache,
+            with_shape=with_shape,
+            with_min_max=with_min_max,
+            with_device=with_device,
+            limit=limit,
+        )
+        return f"{obj.__class__.__name__}(key_cache={kc}, value_cache={vc})"
+
+    if obj.__class__.__name__ == "EncoderDecoderCache":
+        att = string_type(
+            obj.self_attention_cache,
+            with_shape=with_shape,
+            with_min_max=with_min_max,
+            with_device=with_device,
+            limit=limit,
+        )
+        cross = string_type(
+            obj.cross_attention_cache,
+            with_shape=with_shape,
+            with_min_max=with_min_max,
+            with_device=with_device,
+            limit=limit,
+        )
+        return f"{obj.__class__.__name__}(self_attention_cache={att}, cross_attention_cache={cross})"
+
     # others classes
 
     if obj.__class__ in torch.utils._pytree.SUPPORTED_NODES:
@@ -367,59 +467,6 @@ def string_type(
     ):
         return repr(obj).replace(" ", "").replace("\n", " ")
 
-    # to avoid failures
-
-    if obj.__class__.__name__ == "MambaCache":
-        c = string_type(
-            obj.conv_states,
-            with_shape=with_shape,
-            with_min_max=with_min_max,
-            with_device=with_device,
-            limit=limit,
-        )
-        d = string_type(
-            obj.ssm_states,
-            with_shape=with_shape,
-            with_min_max=with_min_max,
-            with_device=with_device,
-            limit=limit,
-        )
-        return f"MambaCache(conv_states={c}, ssm_states={d})"
-
-    if obj.__class__.__name__ == "DynamicCache":
-        kc = string_type(
-            obj.key_cache,
-            with_shape=with_shape,
-            with_min_max=with_min_max,
-            with_device=with_device,
-            limit=limit,
-        )
-        vc = string_type(
-            obj.value_cache,
-            with_shape=with_shape,
-            with_min_max=with_min_max,
-            with_device=with_device,
-            limit=limit,
-        )
-        return f"{obj.__class__.__name__}(key_cache={kc}, value_cache={vc})"
-
-    if obj.__class__.__name__ == "EncoderDecoderCache":
-        att = string_type(
-            obj.self_attention_cache,
-            with_shape=with_shape,
-            with_min_max=with_min_max,
-            with_device=with_device,
-            limit=limit,
-        )
-        cross = string_type(
-            obj.cross_attention_cache,
-            with_shape=with_shape,
-            with_min_max=with_min_max,
-            with_device=with_device,
-            limit=limit,
-        )
-        return f"{obj.__class__.__name__}(self_attention_cache={att}, cross_attention_cache={cross})"
-
     raise AssertionError(f"Unsupported type {type(obj).__name__!r} - {type(obj)}")
 
 
@@ -461,3 +508,39 @@ def make_encoder_decoder_cache(
 ) -> EncoderDecoderCache:
     "Creates an EncoderDecoderCache."
     return EncoderDecoderCache(self_attention_cache=self_attention_cache, cross_attention_cache=cross_attention_cache)
+
+
+def torch_deepcopy(value: Any) -> Any:
+    """Makes a deepcopy."""
+    if isinstance(value, (int, float, str)):
+        return value
+    if isinstance(value, tuple):
+        return tuple(torch_deepcopy(v) for v in value)
+    if isinstance(value, list):
+        return [torch_deepcopy(v) for v in value]
+    if isinstance(value, set):
+        return {torch_deepcopy(v) for v in value}
+    if isinstance(value, dict):
+        if type(value) is dict:
+            return {k: torch_deepcopy(v) for k, v in value.items()}
+        # for BaseModelOutput
+        return value.__class__(**{k: torch_deepcopy(v) for k, v in value.items()})
+    if isinstance(value, np.ndarray):
+        return value.copy()
+    if hasattr(value, "clone"):
+        return value.clone()
+    if value.__class__.__name__ == "DynamicCache":
+        return make_dynamic_cache(torch_deepcopy(list(zip(value.key_cache, value.value_cache, strict=False))))
+    if value.__class__.__name__ == "EncoderDecoderCache":
+        return make_encoder_decoder_cache(
+            torch_deepcopy(value.self_attention_cache),
+            torch_deepcopy(value.cross_attention_cache),
+        )
+    if value.__class__ in torch.utils._pytree.SUPPORTED_NODES:
+        args, spec = torch.utils._pytree.tree_flatten(value)
+        new_args = torch_deepcopy(args)
+        return torch.utils._pytree.tree_unflatten(new_args, spec)
+
+    # We should have a code using serialization, deserialization assuming a model
+    # cannot be exported without them.
+    raise NotImplementedError(f"torch_deepcopy not implemented for type {type(value)}")
diff --git a/onnxruntime/python/tools/transformers/models/torch_export_patches/cache_helper.py b/onnxruntime/python/tools/transformers/models/torch_export_patches/cache_helper.py
@@ -1,3 +1,9 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.  See License.txt in the project root for
+# license information.
+# --------------------------------------------------------------------------
+
 import packaging.version as pv
 import torch
 from transformers import __version__ as transformers_version
@@ -30,6 +36,25 @@ def is_cache_dynamic_registered(fast: bool = False) -> bool:
     return len(cache2.key_cache) == len(cache.value_cache)
 
 
+def flatten_unflatten_for_dynamic_shapes(obj):
+    """
+    Returns the object in a different structure similar to what
+    the definition of the dynamic shapes should use.
+
+    :param obj: object from a custom class
+    :return: the serialized object
+    """
+    flat, spec = torch.utils._pytree.tree_flatten(obj)
+    start = 0
+    end = 0
+    subtrees = []
+    for subspec in spec.children_specs:
+        end += subspec.num_leaves
+        subtrees.append(subspec.unflatten(flat[start:end]))
+        start = end
+    return subtrees
+
+
 if pv.Version(transformers_version) > pv.Version("4.49.99999"):
 
     def make_dynamic_cache(