add tests and rebase

zewenli98 · zewenli98 · commit bdec72055e5a · 2025-06-17T12:54:06.000-07:00
diff --git a/examples/dynamo/hierarchical_partitioner_example.py b/examples/dynamo/hierarchical_partitioner_example.py
@@ -73,7 +73,6 @@ def main():
     # 1. Partition the model into blocks that can be executed by different backends
     partitioned_model, op_support = hierarchical_adjacency_partition(
         gm,
-        verbose=True,
         min_block_size=1,
         backend_priority=["inductor", "tensorrt"],
         backend_support_map={
diff --git a/py/torch_tensorrt/dynamo/partitioning/_adjacency_partitioner.py b/py/torch_tensorrt/dynamo/partitioning/_adjacency_partitioner.py
@@ -261,7 +261,6 @@ def partition(
 
     Args:
         gm: FX GraphModule to partition
-        verbose: Bool representing whether to print operator support
         min_block_size: Minimum number of operators per TRT-Engine Block
         torch_executed_ops: Collection of operations to run in Torch, regardless of converter coverage
         require_full_compilation: Require that all computational operators be run in TRT
diff --git a/py/torch_tensorrt/dynamo/partitioning/_global_partitioner.py b/py/torch_tensorrt/dynamo/partitioning/_global_partitioner.py
@@ -210,7 +210,6 @@ def partition(
 
     Args:
         gm: FX GraphModule to partition
-        verbose: Bool representing whether to print operator support
         min_block_size: Minimum number of operators per TRT-Engine Block
         torch_executed_ops: Collection of operations to run in Torch, regardless of converter coverage
         require_full_compilation: Whether to require that all operators be run in TRT
diff --git a/py/torch_tensorrt/dynamo/partitioning/_hierarchical_partitioner.py b/py/torch_tensorrt/dynamo/partitioning/_hierarchical_partitioner.py
@@ -18,7 +18,6 @@
     is_node_output_tensor,
 )
 from torch_tensorrt.dynamo._defaults import (
-    DEBUG,
     MIN_BLOCK_SIZE,
     REQUIRE_FULL_COMPILATION,
 )
@@ -527,7 +526,6 @@ class FxNetSplitterInternalError(Exception):
 
 def hierarchical_adjacency_partition(
     gm: torch.fx.GraphModule,
-    verbose: bool = DEBUG,
     min_block_size: int = MIN_BLOCK_SIZE,
     torch_executed_ops: Collection[Target] = set(),
     backend_support_map: Optional[Dict[str, Collection[Target]]] = None,
@@ -540,7 +538,6 @@ def hierarchical_adjacency_partition(
 
     Args:
         gm: FX GraphModule to partition
-        verbose: Bool representing whether to print operator support
         min_block_size: Minimum number of operators per TRT-Engine Block
         backend_support_map: Dictionary mapping backend names to sets of supported operators
         backend_priority: Ordered list of backend names, from highest to lowest priority
@@ -583,7 +580,6 @@ def hierarchical_adjacency_partition(
 
     partitioned_graph = partitioner.partition_graph()
 
-    if verbose:
-        supported_ops.print_support_overview(partitioner.num_accelerated_subgraphs)
+    supported_ops.print_support_overview(partitioner.num_accelerated_subgraphs)
 
     return partitioned_graph, supported_ops
diff --git a/tests/py/dynamo/partitioning/test_hierarchical_partitioning.py b/tests/py/dynamo/partitioning/test_hierarchical_partitioning.py
@@ -0,0 +1,303 @@
+from copy import deepcopy
+
+import numpy as np
+import torch
+from torch.testing._internal.common_utils import TestCase, run_tests
+from torch_tensorrt.dynamo import partitioning
+
+
+class TestHierarchicalAdjacencyPartitioning(TestCase):
+    def test_hierarchical_adjacency_partition_fully_supported_one_op(self):
+        class FullySupportedOneOp(torch.nn.Module):
+            def __init__(self, *args, **kwargs) -> None:
+                super().__init__(*args, **kwargs)
+
+            def forward(self, x, y):
+                return torch.ops.aten.add.Tensor(x, y)
+
+        fx_graph = torch.fx.symbolic_trace(FullySupportedOneOp())
+        partitioned_graph, _ = partitioning.hierarchical_adjacency_partition(
+            deepcopy(fx_graph),
+        )
+        self.assertEqual(
+            len(
+                [
+                    1
+                    for submod in list(partitioned_graph.named_children())
+                    if "_run_on_acc" in submod[0]
+                ]
+            ),
+            0,
+            "Single operators should not be segmented",
+        )
+
+    def test_hierarchical_adjacency_partition_fully_supported_one_op_require_full_compilation(
+        self,
+    ):
+        class FullySupportedOneOp(torch.nn.Module):
+            def __init__(self, *args, **kwargs) -> None:
+                super().__init__(*args, **kwargs)
+
+            def forward(self, x, y):
+                return torch.ops.aten.add.Tensor(x, y)
+
+        fx_graph = torch.fx.symbolic_trace(FullySupportedOneOp())
+        partitioned_graph, _ = partitioning.hierarchical_adjacency_partition(
+            deepcopy(fx_graph), require_full_compilation=True
+        )
+        self.assertEqual(
+            len(
+                [
+                    1
+                    for submod in list(partitioned_graph.named_children())
+                    if "_run_on_acc" in submod[0]
+                ]
+            ),
+            1,
+            "Single operators can be segmented if full compilation is required",
+        )
+
+    def test_hierarchical_adjacency_partition_fully_supported_multi_op(self):
+        class FullySupportedMultiOp(torch.nn.Module):
+            def __init__(self, *args, **kwargs) -> None:
+                super().__init__(*args, **kwargs)
+
+            def forward(self, x, y):
+                sum_ = torch.ops.aten.sub.Tensor(x, y)
+                concat_ = torch.ops.aten.cat.default(x, sum_)
+                relu_ = torch.ops.aten.relu.default(concat_)
+                pow_ = torch.ops.aten.pow.Tensor_Scalar(relu_, 2)
+                return pow_
+
+        fx_graph = torch.fx.symbolic_trace(FullySupportedMultiOp())
+        partitioned_graph, _ = partitioning.hierarchical_adjacency_partition(
+            deepcopy(fx_graph), min_block_size=2
+        )
+        self.assertEqual(
+            len(
+                [
+                    1
+                    for submod in list(partitioned_graph.named_children())
+                    if "_run_on_acc" in submod[0]
+                ]
+            ),
+            1,
+            "All operators are supported, there should be one segment",
+        )
+
+    def test_hierarchical_adjacency_partition_partially_supported_multi_op(self):
+        class PartiallySupportedMultiOp(torch.nn.Module):
+            def __init__(self, *args, **kwargs) -> None:
+                super().__init__(*args, **kwargs)
+
+            def forward(self, x, y):
+                sum_1 = torch.ops.aten.add.Tensor(x, y)
+                sum_2 = torch.ops.aten.add.Tensor(x, sum_1)
+                sum_ = np.sum(sum_1) + np.sum(sum_2)
+                relu_ = torch.ops.aten.relu.default(sum_)
+                pow_ = torch.ops.aten.pow.Tensor_Scalar(relu_, 2)
+                return pow_
+
+        fx_graph = torch.fx.symbolic_trace(PartiallySupportedMultiOp())
+        partitioned_graph, _ = partitioning.hierarchical_adjacency_partition(
+            deepcopy(fx_graph), min_block_size=2
+        )
+        self.assertEqual(
+            len(
+                [
+                    1
+                    for submod in list(partitioned_graph.named_children())
+                    if "_run_on_acc" in submod[0]
+                ]
+            ),
+            2,
+            "Unsupported operators interleave supported ones, expected 2 segments",
+        )
+
+    def test_hierarchical_adjacency_partition_partially_supported_with_torch_executed_ops(
+        self,
+    ):
+        class PartiallySupportedMultiOp(torch.nn.Module):
+            def __init__(self, *args, **kwargs) -> None:
+                super().__init__(*args, **kwargs)
+
+            def forward(self, x, y):
+                sum_1 = torch.ops.aten.add.Tensor(x, y)
+                sum_2 = torch.ops.aten.add.Tensor(x, sum_1)
+                sum_ = torch.ops.aten.add.Tensor(sum_1, sum_2)
+                relu_ = torch.ops.aten.relu.default(sum_)
+                pow_ = torch.ops.aten.pow.Tensor_Scalar(relu_, 2)
+                return pow_
+
+        torch_executed_ops = {torch.ops.aten.add.Tensor}
+
+        fx_graph = torch.fx.symbolic_trace(PartiallySupportedMultiOp())
+        partitioned_graph, _ = partitioning.hierarchical_adjacency_partition(
+            deepcopy(fx_graph),
+            min_block_size=1,
+            torch_executed_ops=torch_executed_ops,
+        )
+
+        unexpected_ops = torch_executed_ops
+        expected_ops = {torch.ops.aten.relu.default, torch.ops.aten.pow.Tensor_Scalar}
+
+        unexpected_ops_seen = set()
+        expected_ops_seen = set()
+
+        for name, gm in partitioned_graph.named_children():
+            if "_run_on_acc" in name:
+                for node in gm.graph.nodes:
+                    if node.op == "call_function":
+                        if node.target in unexpected_ops:
+                            unexpected_ops_seen.add(node.target)
+                        elif node.target in expected_ops:
+                            expected_ops_seen.add(node.target)
+
+        expected_ops_unseen = expected_ops.difference(expected_ops_seen)
+
+        self.assertEqual(
+            len(unexpected_ops_seen),
+            0,
+            f"The following unexpected ops were encountered: {unexpected_ops_seen}",
+        )
+        self.assertEqual(
+            len(expected_ops_unseen),
+            0,
+            f"The following expected ops were not encountered: {expected_ops_unseen}",
+        )
+
+    class SimpleModel(torch.nn.Module):
+        def __init__(self):
+            super().__init__()
+            self.conv1 = torch.nn.Conv2d(3, 64, kernel_size=3, padding=1)
+            self.conv2 = torch.nn.Conv2d(64, 128, kernel_size=3, padding=1)
+            self.bn1 = torch.nn.BatchNorm2d(64)
+            self.bn2 = torch.nn.BatchNorm2d(128)
+
+        def forward(self, x):
+            x = self.conv1(x)
+            x = self.bn1(x)
+            x = torch.relu(x)
+            x = self.conv2(x)
+            x = self.bn2(x)
+            x = torch.relu(x)
+            return x
+
+    def test_hierarchical_adjacency_partition_with_two_backends(self):
+        from torch_tensorrt.dynamo.conversion._ConverterRegistry import (
+            DYNAMO_CONVERTERS as CONVERTERS,
+        )
+        from torch_tensorrt.dynamo.lowering import (
+            get_decompositions,
+            pre_export_lowering,
+        )
+
+        model = self.SimpleModel().cuda().eval()
+        example_input = torch.randn(1, 3, 224, 224).cuda()
+
+        exported_program = torch.export.export(model, (example_input,))
+        exported_program = pre_export_lowering(exported_program)
+        exported_program = exported_program.run_decompositions(get_decompositions())
+        gm = exported_program.module()
+
+        partitioned_graph, _ = partitioning.hierarchical_adjacency_partition(
+            gm,
+            min_block_size=1,
+            backend_priority=["inductor", "tensorrt"],
+            backend_support_map={
+                "inductor": {
+                    "torch.ops.aten.convolution.default",
+                },
+                "tensorrt": CONVERTERS.keys(),
+            },
+        )
+
+        inductor_subgraphs_num = 0
+        tensorrt_subgraphs_num = 0
+
+        for name, gm in partitioned_graph.named_children():
+            if "_run_on_acc_inductor" in name:
+                inductor_subgraphs_num += 1
+            elif "_run_on_acc_tensorrt" in name:
+                tensorrt_subgraphs_num += 1
+            else:
+                raise ValueError(f"Unknown backend: {name}")
+
+        self.assertEqual(
+            inductor_subgraphs_num,
+            2,
+            "There should be 2 subgraphs running on inductor backend",
+        )
+        self.assertEqual(
+            tensorrt_subgraphs_num,
+            2,
+            "There should be 2 subgraph running on tensorrt backend",
+        )
+
+    def test_hierarchical_adjacency_partition_with_two_backends_with_torch_executed_ops(
+        self,
+    ):
+        from torch_tensorrt.dynamo.conversion._ConverterRegistry import (
+            DYNAMO_CONVERTERS as CONVERTERS,
+        )
+        from torch_tensorrt.dynamo.lowering import (
+            get_decompositions,
+            pre_export_lowering,
+        )
+
+        model = self.SimpleModel().cuda().eval()
+        example_input = torch.randn(1, 3, 224, 224).cuda()
+
+        exported_program = torch.export.export(model, (example_input,))
+        exported_program = pre_export_lowering(exported_program)
+        exported_program = exported_program.run_decompositions(get_decompositions())
+        gm = exported_program.module()
+
+        partitioned_graph, _ = partitioning.hierarchical_adjacency_partition(
+            gm,
+            min_block_size=1,
+            backend_priority=["inductor", "tensorrt"],
+            backend_support_map={
+                "inductor": {
+                    "torch.ops.aten.convolution.default",
+                },
+                "tensorrt": CONVERTERS.keys(),
+            },
+            torch_executed_ops={
+                "torch.ops.aten._native_batch_norm_legit_no_training.default"
+            },
+        )
+
+        inductor_subgraphs_num = 0
+        tensorrt_subgraphs_num = 0
+        torch_gpu_subgraphs_num = 0
+
+        for name, gm in partitioned_graph.named_children():
+            if "_run_on_acc_inductor" in name:
+                inductor_subgraphs_num += 1
+            elif "_run_on_acc_tensorrt" in name:
+                tensorrt_subgraphs_num += 1
+            elif "_run_on_gpu" in name:
+                torch_gpu_subgraphs_num += 1
+            else:
+                raise ValueError(f"Unknown backend: {name}")
+
+        self.assertEqual(
+            torch_gpu_subgraphs_num,
+            2,
+            "There should be 2 subgraphs running on torch gpu backend",
+        )
+        self.assertEqual(
+            inductor_subgraphs_num,
+            2,
+            "There should be 2 subgraphs running on inductor backend",
+        )
+        self.assertEqual(
+            tensorrt_subgraphs_num,
+            2,
+            "There should be 2 subgraph running on tensorrt backend",
+        )
+
+
+if __name__ == "__main__":
+    run_tests()