diff --git a/.github/workflows/nightly_tests.yml b/.github/workflows/nightly_tests.yml index 88343a128bb1..7696852ecd44 100644 --- a/.github/workflows/nightly_tests.yml +++ b/.github/workflows/nightly_tests.yml @@ -180,6 +180,55 @@ jobs: pip install slack_sdk tabulate python utils/log_reports.py >> $GITHUB_STEP_SUMMARY + run_torch_compile_tests: + name: PyTorch Compile CUDA tests + + runs-on: + group: aws-g4dn-2xlarge + + container: + image: diffusers/diffusers-pytorch-compile-cuda + options: --gpus 0 --shm-size "16gb" --ipc host + + steps: + - name: Checkout diffusers + uses: actions/checkout@v3 + with: + fetch-depth: 2 + + - name: NVIDIA-SMI + run: | + nvidia-smi + - name: Install dependencies + run: | + python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH" + python -m uv pip install -e [quality,test,training] + - name: Environment + run: | + python utils/print_env.py + - name: Run torch compile tests on GPU + env: + HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }} + RUN_COMPILE: yes + run: | + python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v -k "compile" --make-reports=tests_torch_compile_cuda tests/ + - name: Failure short reports + if: ${{ failure() }} + run: cat reports/tests_torch_compile_cuda_failures_short.txt + + - name: Test suite reports artifacts + if: ${{ always() }} + uses: actions/upload-artifact@v4 + with: + name: torch_compile_test_reports + path: reports + + - name: Generate Report and Notify Channel + if: always() + run: | + pip install slack_sdk tabulate + python utils/log_reports.py >> $GITHUB_STEP_SUMMARY + run_big_gpu_torch_tests: name: Torch tests on big GPU strategy: diff --git a/.github/workflows/release_tests_fast.yml b/.github/workflows/release_tests_fast.yml index 27bd9bd9bb42..9d65db2f0dee 100644 --- a/.github/workflows/release_tests_fast.yml +++ b/.github/workflows/release_tests_fast.yml @@ -335,7 +335,7 @@ jobs: - name: Environment run: | python utils/print_env.py - - name: Run example tests on GPU + - name: Run torch compile tests on GPU env: HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }} RUN_COMPILE: yes diff --git a/tests/models/test_modeling_common.py b/tests/models/test_modeling_common.py index f82a2407f333..a7a42368a84d 100644 --- a/tests/models/test_modeling_common.py +++ b/tests/models/test_modeling_common.py @@ -1714,6 +1714,37 @@ def test_push_to_hub_library_name(self): delete_repo(self.repo_id, token=TOKEN) +class TorchCompileTesterMixin: + def setUp(self): + # clean up the VRAM before each test + super().setUp() + torch._dynamo.reset() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + # clean up the VRAM after each test in case of CUDA runtime errors + super().tearDown() + torch._dynamo.reset() + gc.collect() + backend_empty_cache(torch_device) + + @require_torch_gpu + @require_torch_2 + @is_torch_compile + @slow + def test_torch_compile_recompilation_and_graph_break(self): + torch._dynamo.reset() + init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common() + + model = self.model_class(**init_dict).to(torch_device) + model = torch.compile(model, fullgraph=True) + + with torch._dynamo.config.patch(error_on_recompile=True), torch.no_grad(): + _ = model(**inputs_dict) + _ = model(**inputs_dict) + + @slow @require_torch_2 @require_torch_accelerator diff --git a/tests/models/transformers/test_models_transformer_flux.py b/tests/models/transformers/test_models_transformer_flux.py index c88b3dac8216..f767d2196e7c 100644 --- a/tests/models/transformers/test_models_transformer_flux.py +++ b/tests/models/transformers/test_models_transformer_flux.py @@ -22,7 +22,7 @@ from diffusers.models.embeddings import ImageProjection from diffusers.utils.testing_utils import enable_full_determinism, torch_device -from ..test_modeling_common import ModelTesterMixin +from ..test_modeling_common import ModelTesterMixin, TorchCompileTesterMixin enable_full_determinism() @@ -78,7 +78,7 @@ def create_flux_ip_adapter_state_dict(model): return ip_state_dict -class FluxTransformerTests(ModelTesterMixin, unittest.TestCase): +class FluxTransformerTests(ModelTesterMixin, TorchCompileTesterMixin, unittest.TestCase): model_class = FluxTransformer2DModel main_input_name = "hidden_states" # We override the items here because the transformer under consideration is small. diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py index a950de142740..eb420d1d2f12 100644 --- a/tests/pipelines/test_pipelines_common.py +++ b/tests/pipelines/test_pipelines_common.py @@ -1111,12 +1111,14 @@ def callback_cfg_params(self) -> frozenset: def setUp(self): # clean up the VRAM before each test super().setUp() + torch._dynamo.reset() gc.collect() backend_empty_cache(torch_device) def tearDown(self): # clean up the VRAM after each test in case of CUDA runtime errors super().tearDown() + torch._dynamo.reset() gc.collect() backend_empty_cache(torch_device)