Fixed the bug of memory overflow

cehongwang · cehongwang · commit 9f75c426fdcc · 2025-06-13T20:58:33.000Z
diff --git a/examples/apps/flux_demo.py b/examples/apps/flux_demo.py
@@ -82,7 +82,7 @@ def forward_loop(mod):
         pipe.transformer = mod
         do_calibrate(
             pipe=pipe,
-            prompt="test",
+            prompt="a dog running in a park",
         )
 
     if args.dtype != "fp16":
diff --git a/py/torch_tensorrt/dynamo/lowering/passes/constant_folding.py b/py/torch_tensorrt/dynamo/lowering/passes/constant_folding.py
@@ -98,20 +98,21 @@ def replace_node_with_constant(
 class _TorchTensorRTConstantFolder(ConstantFolder):  # type: ignore[misc]
     def __init__(self, *args: Any, **kwargs: Any) -> None:
         super().__init__(*args, **kwargs)
-
-    def is_impure(self, node: torch.fx.node.Node) -> bool:
         # Set of known quantization ops to be excluded from constant folding.
         # Currently, we exclude all quantization ops coming from modelopt library.
-        quantization_ops: Set[torch._ops.OpOverload] = set()
+        self.quantization_ops = set()
         try:
             # modelopt import ensures torch.ops.tensorrt.quantize_op.default is registered
-            import modelopt.torch.quantization as mtq  # noqa: F401
+            import modelopt.torch.quantization as mtq
 
             assert torch.ops.tensorrt.quantize_op.default
-            quantization_ops.add(torch.ops.tensorrt.quantize_op.default)
-            quantization_ops.add(torch.ops.tensorrt.dynamic_block_quantize_op.default)
+            self.quantization_ops.add(torch.ops.tensorrt.quantize_op.default)
         except Exception as e:
             pass
-        if quantization_ops and node.target in quantization_ops:
+
+    # TODO: Update this function when quantization is added
+    def is_impure(self, node: torch.fx.node.Node) -> bool:
+
+        if node.target in self.quantization_ops:
             return True
         return False
diff --git a/tools/perf/Flux/flux_perf.py b/tools/perf/Flux/flux_perf.py
@@ -4,7 +4,7 @@
 from time import time
 
 sys.path.append(os.path.join(os.path.dirname(__file__), "../../../examples/apps"))
-from flux_demo import compile_model
+from flux_demo import compile_model, parse_args
 
 
 def benchmark(pipe, prompt, inference_step, batch_size=1, iterations=1):
@@ -56,16 +56,11 @@ def main(args):
         action="store_true",
         help="Use dynamic shapes",
     )
-    parser.add_argument(
-        "--max_batch_size",
-        type=int,
-        default=1,
-        help="Maximum batch size to use",
-    )
     parser.add_argument(
         "--debug",
         action="store_true",
         help="Use debug mode",
     )
+    parser.add_argument("--max_batch_size", type=int, default=1)
     args = parser.parse_args()
     main(args)

Original file line number	Diff line number	Diff line change
`@@ -82,7 +82,7 @@ def forward_loop(mod):`
`82`	`82`	`pipe.transformer = mod`
`83`	`83`	`do_calibrate(`
`84`	`84`	`pipe=pipe,`
`85`		`- prompt="test",`
	`85`	`+ prompt="a dog running in a park",`
`86`	`86`	`)`
`87`	`87`
`88`	`88`	`if args.dtype != "fp16":`