NVIDIA · mdboom · Mar 31, 2026 · Mar 31, 2026 · Apr 1, 2026
diff --git a/.github/workflows/test-wheel-linux.yml b/.github/workflows/test-wheel-linux.yml
@@ -261,16 +261,6 @@ jobs:
           LOCAL_CTK: ${{ matrix.LOCAL_CTK }}
         run: run-tests bindings
 
-      - name: Run cuda.bindings examples
-        if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0' }}
-        env:
-          CUDA_VER: ${{ matrix.CUDA_VER }}
-          LOCAL_CTK: ${{ matrix.LOCAL_CTK }}
-        run: |
-          pushd cuda_bindings
-          ${SANITIZER_CMD} pytest -ra -s -vv examples/
-          popd
-
       - name: Run cuda.core tests
         env:
           CUDA_VER: ${{ matrix.CUDA_VER }}

diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml
@@ -245,17 +245,6 @@ jobs:
         shell: bash --noprofile --norc -xeuo pipefail {0}
         run: run-tests bindings
 
-      - name: Run cuda.bindings examples
-        if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0' }}
-        env:
-          CUDA_VER: ${{ matrix.CUDA_VER }}
-          LOCAL_CTK: ${{ matrix.LOCAL_CTK }}
-        shell: bash --noprofile --norc -xeuo pipefail {0}
-        run: |
-          pushd cuda_bindings
-          ${SANITIZER_CMD} pytest -ra -s -vv examples/
-          popd
-
       - name: Run cuda.core tests
         env:
           CUDA_VER: ${{ matrix.CUDA_VER }}

diff --git a/cuda_bindings/cuda/bindings/_example_helpers/__init__.py b/cuda_bindings/cuda/bindings/_example_helpers/__init__.py
@@ -0,0 +1,17 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
+
+from .common import KernelHelper, check_compute_capability_too_low, requirement_not_met
+from .helper_cuda import check_cuda_errors, find_cuda_device, find_cuda_device_drv
+from .helper_string import check_cmd_line_flag, get_cmd_line_argument_int
+
+__all__ = [
+    "KernelHelper",
+    "check_cmd_line_flag",
+    "check_compute_capability_too_low",
+    "check_cuda_errors",
+    "find_cuda_device",
+    "find_cuda_device_drv",
+    "get_cmd_line_argument_int",
+    "requirement_not_met",
+]
diff --git a/cuda_bindings/examples/common/common.py → .../cuda/bindings/_example_helpers/common.py b/cuda_bindings/examples/common/common.py → .../cuda/bindings/_example_helpers/common.py
@@ -1,19 +1,27 @@
-# Copyright 2021-2025 NVIDIA Corporation.  All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2021-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
 
+import os
+import sys
+
 import numpy as np
-from common.helper_cuda import check_cuda_errors
 
 from cuda import pathfinder
 from cuda.bindings import driver as cuda
 from cuda.bindings import nvrtc
 from cuda.bindings import runtime as cudart
 
+from .helper_cuda import check_cuda_errors
+
+
+def requirement_not_met(message):
+    print(message, file=sys.stderr)  # noqa: T201
+    exitcode = os.environ.get("CUDA_BINDINGS_SKIP_EXAMPLE", "1")
+    return sys.exit(int(exitcode))
 
-def pytest_skipif_compute_capability_too_low(dev_id, required_cc_major_minor):
-    import pytest
 
+def check_compute_capability_too_low(dev_id, required_cc_major_minor):
     cc_major = check_cuda_errors(
         cudart.cudaDeviceGetAttribute(cudart.cudaDeviceAttr.cudaDevAttrComputeCapabilityMajor, dev_id)
     )
@@ -22,7 +30,9 @@ def pytest_skipif_compute_capability_too_low(dev_id, required_cc_major_minor):
     )
     have_cc_major_minor = (cc_major, cc_minor)
     if have_cc_major_minor < required_cc_major_minor:
-        pytest.skip(f"cudaDevAttrComputeCapability too low: {have_cc_major_minor=!r}, {required_cc_major_minor=!r}")
+        requirement_not_met(
+            f"CUDA device compute capability too low: {have_cc_major_minor=!r}, {required_cc_major_minor=!r}"
+        )
 
 
 class KernelHelper:
@@ -31,9 +41,7 @@ def __init__(self, code, dev_id):
         for libname in ("cudart", "cccl"):
             hdr_dir = pathfinder.find_nvidia_header_directory(libname)
             if hdr_dir is None:
-                import pytest
-
-                pytest.skip(f'pathfinder.find_nvidia_header_directory("{libname}") returned None')
+                requirement_not_met(f'pathfinder.find_nvidia_header_directory("{libname}") returned None')
             include_dirs.append(hdr_dir)
 
         prog = check_cuda_errors(nvrtc.nvrtcCreateProgram(str.encode(code), b"sourceCode.cu", 0, None, None))
@@ -69,8 +77,8 @@ def __init__(self, code, dev_id):
             check_cuda_errors(nvrtc.nvrtcGetProgramLog(prog, log))
             import sys
 
-            print(log.decode(), file=sys.stderr)
-            print(err, file=sys.stderr)
+            print(log.decode(), file=sys.stderr)  # noqa: T201
+            print(err, file=sys.stderr)  # noqa: T201
             sys.exit(1)
 
         if use_cubin:

diff --git a/cuda_bindings/examples/common/helper_cuda.py → .../bindings/_example_helpers/helper_cuda.py b/cuda_bindings/examples/common/helper_cuda.py → .../bindings/_example_helpers/helper_cuda.py
@@ -1,12 +1,12 @@
-# Copyright 2021-2025 NVIDIA Corporation.  All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2021-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
-from common.helper_string import check_cmd_line_flag, get_cmd_line_argument_int
-
 from cuda.bindings import driver as cuda
 from cuda.bindings import nvrtc
 from cuda.bindings import runtime as cudart
 
+from .helper_string import check_cmd_line_flag, get_cmd_line_argument_int
+
 
 def _cuda_get_error_enum(error):
     if isinstance(error, cuda.CUresult):

diff --git a/...bindings/examples/common/helper_string.py → ...indings/_example_helpers/helper_string.py b/...bindings/examples/common/helper_string.py → ...indings/_example_helpers/helper_string.py
@@ -1,4 +1,4 @@
-# Copyright 2021-2025 NVIDIA Corporation.  All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2021-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
 import sys

diff --git a/cuda_bindings/cuda/bindings/_test_helpers/pep723.py b/cuda_bindings/cuda/bindings/_test_helpers/pep723.py
@@ -0,0 +1,46 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
+
+
+import importlib.metadata
+import os
+import re
+
+import pytest
+
+
+def has_package_requirements_or_skip(example):
+    example_name = os.path.basename(example)
+
+    with open(example, encoding="utf-8") as f:
+        content = f.read()
+
+    # The canonical regex as defined in PEP 723
+    pep723 = re.search(r"(?m)^# /// (?P<type>[a-zA-Z0-9-]+)$\s(?P<content>(^#(| .*)$\s)+)^# ///$", content)
+    if not pep723:
+        raise ValueError(f"PEP 723 metadata not found in {example_name}")
+
+    metadata = {}
+    for line in pep723.group("content").splitlines():
+        line = line.lstrip("# ").rstrip()
+        if not line:
+            continue
+        key, value = line.split("=", 1)
+        key = key.strip()
+        value = value.strip()
+        metadata[key] = value
+
+    if "dependencies" not in metadata:
+        raise ValueError(f"PEP 723 dependencies not found in {example_name}")
+
+    missing_dependencies = []
+    dependencies = eval(metadata["dependencies"])  # noqa: S307
+    for dependency in dependencies:
+        name = re.match("[a-zA-Z0-9_-]+", dependency)
+        try:
+            importlib.metadata.distribution(name.group(0))
+        except importlib.metadata.PackageNotFoundError:
+            missing_dependencies.append(name.string)
+
+    if missing_dependencies:
+        pytest.skip(f"Skipping {example} due to missing package requirement: {', '.join(missing_dependencies)}")
diff --git a/cuda_bindings/examples/0_Introduction/clock_nvrtc_test.py b/cuda_bindings/examples/0_Introduction/clock_nvrtc_test.py
@@ -8,13 +8,16 @@
 #
 # ################################################################################
 
+# /// script
+# dependencies = ["cuda_bindings>13.2.1", "numpy"]
+# ///
+
 import platform
 
 import numpy as np
-from common import common
-from common.helper_cuda import check_cuda_errors, find_cuda_device
 
 from cuda.bindings import driver as cuda
+from cuda.bindings._example_helpers import KernelHelper, check_cuda_errors, find_cuda_device, requirement_not_met
 
 clock_nvrtc = """\
 extern "C" __global__  void timedReduction(const float *hinput, float *output, clock_t *timer)
@@ -65,11 +68,13 @@ def elems_to_bytes(nelems, dt):
     return nelems * np.dtype(dt).itemsize
 
 
-def main():
-    import pytest
-
+def check_requirements():
     if platform.machine() == "armv7l":
-        pytest.skip("clock_nvrtc is not supported on ARMv7")
+        requirement_not_met("clock_nvrtc is not supported on ARMv7")
+
+
+def main():
+    check_requirements()
 
     timer = np.empty(num_blocks * 2, dtype="int64")
     hinput = np.empty(num_threads * 2, dtype="float32")
@@ -78,7 +83,7 @@ def main():
         hinput[i] = i
 
     dev_id = find_cuda_device()
-    kernel_helper = common.KernelHelper(clock_nvrtc, dev_id)
+    kernel_helper = KernelHelper(clock_nvrtc, dev_id)
     kernel_addr = kernel_helper.get_function(b"timedReduction")
 
     dinput = check_cuda_errors(cuda.cuMemAlloc(hinput.nbytes))

diff --git a/cuda_bindings/examples/0_Introduction/simpleCubemapTexture_test.py b/cuda_bindings/examples/0_Introduction/simpleCubemapTexture_test.py
@@ -7,16 +7,21 @@
 #
 # ################################################################################
 
+
+# /// script
+# dependencies = ["cuda_bindings>13.2.1", "numpy"]
+# ///
+
+
 import ctypes
 import sys
 import time
 
 import numpy as np
-from common import common
-from common.helper_cuda import check_cuda_errors, find_cuda_device
 
 from cuda.bindings import driver as cuda
 from cuda.bindings import runtime as cudart
+from cuda.bindings._example_helpers import KernelHelper, check_cuda_errors, find_cuda_device, requirement_not_met
 
 simple_cubemap_texture = """\
 extern "C"
@@ -97,9 +102,7 @@ def main():
         f"CUDA device [{device_props.name}] has {device_props.multiProcessorCount} Multi-Processors SM {device_props.major}.{device_props.minor}"
     )
     if device_props.major < 2:
-        import pytest
-
-        pytest.skip("Test requires SM 2.0 or higher for support of Texture Arrays.")
+        requirement_not_met("Test requires SM 2.0 or higher for support of Texture Arrays.")
 
     # Generate input data for layered texture
     width = 64
@@ -162,7 +165,7 @@ def main():
         f"Covering Cubemap data array of {width}~3 x {num_layers}: Grid size is {dim_grid.x} x {dim_grid.y}, each block has 8 x 8 threads"
     )
 
-    kernel_helper = common.KernelHelper(simple_cubemap_texture, dev_id)
+    kernel_helper = KernelHelper(simple_cubemap_texture, dev_id)
     _transform_kernel = kernel_helper.get_function(b"transformKernel")
     kernel_args = ((d_data, width, tex), (ctypes.c_void_p, ctypes.c_int, None))
     check_cuda_errors(

diff --git a/cuda_bindings/examples/0_Introduction/simpleP2P_test.py b/cuda_bindings/examples/0_Introduction/simpleP2P_test.py
@@ -8,16 +8,19 @@
 #
 # ################################################################################
 
+# /// script
+# dependencies = ["cuda_bindings>13.2.1", "numpy"]
+# ///
+
 import ctypes
 import platform
 import sys
 
 import numpy as np
-from common import common
-from common.helper_cuda import check_cuda_errors
 
 from cuda.bindings import driver as cuda
 from cuda.bindings import runtime as cudart
+from cuda.bindings._example_helpers import KernelHelper, check_cuda_errors, requirement_not_met
 
 simplep2p = """\
 extern "C"
@@ -32,27 +35,25 @@
 
 
 def main():
-    import pytest
-
     if platform.system() == "Darwin":
-        pytest.skip("simpleP2P is not supported on Mac OSX")
+        requirement_not_met("simpleP2P is not supported on Mac OSX")
 
     if platform.machine() == "armv7l":
-        pytest.skip("simpleP2P is not supported on ARMv7")
+        requirement_not_met("simpleP2P is not supported on ARMv7")
 
     if platform.machine() == "aarch64":
-        pytest.skip("simpleP2P is not supported on aarch64")
+        requirement_not_met("simpleP2P is not supported on aarch64")
 
     if platform.machine() == "sbsa":
-        pytest.skip("simpleP2P is not supported on sbsa")
+        requirement_not_met("simpleP2P is not supported on sbsa")
 
     # Number of GPUs
     print("Checking for multiple GPUs...")
     gpu_n = check_cuda_errors(cudart.cudaGetDeviceCount())
     print(f"CUDA-capable device count: {gpu_n}")
 
     if gpu_n < 2:
-        pytest.skip("Two or more GPUs with Peer-to-Peer access capability are required")
+        requirement_not_met("Two or more GPUs with Peer-to-Peer access capability are required")
 
     prop = [check_cuda_errors(cudart.cudaGetDeviceProperties(i)) for i in range(gpu_n)]
     # Check possibility for peer access
@@ -83,7 +84,7 @@ def main():
             break
 
     if p2p_capable_gp_us[0] == -1 or p2p_capable_gp_us[1] == -1:
-        pytest.skip("Peer to Peer access is not available amongst GPUs in the system")
+        requirement_not_met("Peer to Peer access is not available amongst GPUs in the system")
 
     # Use first pair of p2p capable GPUs detected
     gpuid = [p2p_capable_gp_us[0], p2p_capable_gp_us[1]]
@@ -158,7 +159,7 @@ def main():
     _simple_kernel = [None] * 2
     kernel_args = [None] * 2
 
-    kernel_helper[1] = common.KernelHelper(simplep2p, gpuid[1])
+    kernel_helper[1] = KernelHelper(simplep2p, gpuid[1])
     _simple_kernel[1] = kernel_helper[1].get_function(b"SimpleKernel")
     kernel_args[1] = ((g0, g1), (ctypes.c_void_p, ctypes.c_void_p))
     check_cuda_errors(
@@ -183,7 +184,7 @@ def main():
     # output to the GPU 0 buffer
     print(f"Run kernel on GPU{gpuid[0]}, taking source data from GPU{gpuid[1]} and writing to GPU{gpuid[0]}...")
     check_cuda_errors(cudart.cudaSetDevice(gpuid[0]))
-    kernel_helper[0] = common.KernelHelper(simplep2p, gpuid[0])
+    kernel_helper[0] = KernelHelper(simplep2p, gpuid[0])
     _simple_kernel[0] = kernel_helper[0].get_function(b"SimpleKernel")
     kernel_args[0] = ((g1, g0), (ctypes.c_void_p, ctypes.c_void_p))
     check_cuda_errors(