Skip to content

Commit 682182b

Browse files
authored
Improve cuda_bindings examples (#1842)
* Improve cuda_bindings examples * Fix skip test in KernelHelper
1 parent a81fd07 commit 682182b

26 files changed

+304
-195
lines changed

.github/workflows/test-wheel-linux.yml

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -261,16 +261,6 @@ jobs:
261261
LOCAL_CTK: ${{ matrix.LOCAL_CTK }}
262262
run: run-tests bindings
263263

264-
- name: Run cuda.bindings examples
265-
if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0' }}
266-
env:
267-
CUDA_VER: ${{ matrix.CUDA_VER }}
268-
LOCAL_CTK: ${{ matrix.LOCAL_CTK }}
269-
run: |
270-
pushd cuda_bindings
271-
${SANITIZER_CMD} pytest -ra -s -vv examples/
272-
popd
273-
274264
- name: Run cuda.core tests
275265
env:
276266
CUDA_VER: ${{ matrix.CUDA_VER }}

.github/workflows/test-wheel-windows.yml

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -245,17 +245,6 @@ jobs:
245245
shell: bash --noprofile --norc -xeuo pipefail {0}
246246
run: run-tests bindings
247247

248-
- name: Run cuda.bindings examples
249-
if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0' }}
250-
env:
251-
CUDA_VER: ${{ matrix.CUDA_VER }}
252-
LOCAL_CTK: ${{ matrix.LOCAL_CTK }}
253-
shell: bash --noprofile --norc -xeuo pipefail {0}
254-
run: |
255-
pushd cuda_bindings
256-
${SANITIZER_CMD} pytest -ra -s -vv examples/
257-
popd
258-
259248
- name: Run cuda.core tests
260249
env:
261250
CUDA_VER: ${{ matrix.CUDA_VER }}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
3+
4+
from .common import KernelHelper, check_compute_capability_too_low, requirement_not_met
5+
from .helper_cuda import check_cuda_errors, find_cuda_device, find_cuda_device_drv
6+
from .helper_string import check_cmd_line_flag, get_cmd_line_argument_int
7+
8+
__all__ = [
9+
"KernelHelper",
10+
"check_cmd_line_flag",
11+
"check_compute_capability_too_low",
12+
"check_cuda_errors",
13+
"find_cuda_device",
14+
"find_cuda_device_drv",
15+
"get_cmd_line_argument_int",
16+
"requirement_not_met",
17+
]

cuda_bindings/examples/common/common.py renamed to cuda_bindings/cuda/bindings/_example_helpers/common.py

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,27 @@
1-
# Copyright 2021-2025 NVIDIA Corporation. All rights reserved.
1+
# SPDX-FileCopyrightText: Copyright (c) 2021-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
33

44

5+
import os
6+
import sys
7+
58
import numpy as np
6-
from common.helper_cuda import check_cuda_errors
79

810
from cuda import pathfinder
911
from cuda.bindings import driver as cuda
1012
from cuda.bindings import nvrtc
1113
from cuda.bindings import runtime as cudart
1214

15+
from .helper_cuda import check_cuda_errors
16+
17+
18+
def requirement_not_met(message):
19+
print(message, file=sys.stderr) # noqa: T201
20+
exitcode = os.environ.get("CUDA_BINDINGS_SKIP_EXAMPLE", "1")
21+
return sys.exit(int(exitcode))
1322

14-
def pytest_skipif_compute_capability_too_low(dev_id, required_cc_major_minor):
15-
import pytest
1623

24+
def check_compute_capability_too_low(dev_id, required_cc_major_minor):
1725
cc_major = check_cuda_errors(
1826
cudart.cudaDeviceGetAttribute(cudart.cudaDeviceAttr.cudaDevAttrComputeCapabilityMajor, dev_id)
1927
)
@@ -22,7 +30,9 @@ def pytest_skipif_compute_capability_too_low(dev_id, required_cc_major_minor):
2230
)
2331
have_cc_major_minor = (cc_major, cc_minor)
2432
if have_cc_major_minor < required_cc_major_minor:
25-
pytest.skip(f"cudaDevAttrComputeCapability too low: {have_cc_major_minor=!r}, {required_cc_major_minor=!r}")
33+
requirement_not_met(
34+
f"CUDA device compute capability too low: {have_cc_major_minor=!r}, {required_cc_major_minor=!r}"
35+
)
2636

2737

2838
class KernelHelper:
@@ -31,9 +41,7 @@ def __init__(self, code, dev_id):
3141
for libname in ("cudart", "cccl"):
3242
hdr_dir = pathfinder.find_nvidia_header_directory(libname)
3343
if hdr_dir is None:
34-
import pytest
35-
36-
pytest.skip(f'pathfinder.find_nvidia_header_directory("{libname}") returned None')
44+
requirement_not_met(f'pathfinder.find_nvidia_header_directory("{libname}") returned None')
3745
include_dirs.append(hdr_dir)
3846

3947
prog = check_cuda_errors(nvrtc.nvrtcCreateProgram(str.encode(code), b"sourceCode.cu", 0, None, None))
@@ -69,8 +77,8 @@ def __init__(self, code, dev_id):
6977
check_cuda_errors(nvrtc.nvrtcGetProgramLog(prog, log))
7078
import sys
7179

72-
print(log.decode(), file=sys.stderr)
73-
print(err, file=sys.stderr)
80+
print(log.decode(), file=sys.stderr) # noqa: T201
81+
print(err, file=sys.stderr) # noqa: T201
7482
sys.exit(1)
7583

7684
if use_cubin:

cuda_bindings/examples/common/helper_cuda.py renamed to cuda_bindings/cuda/bindings/_example_helpers/helper_cuda.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
# Copyright 2021-2025 NVIDIA Corporation. All rights reserved.
1+
# SPDX-FileCopyrightText: Copyright (c) 2021-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
33

4-
from common.helper_string import check_cmd_line_flag, get_cmd_line_argument_int
5-
64
from cuda.bindings import driver as cuda
75
from cuda.bindings import nvrtc
86
from cuda.bindings import runtime as cudart
97

8+
from .helper_string import check_cmd_line_flag, get_cmd_line_argument_int
9+
1010

1111
def _cuda_get_error_enum(error):
1212
if isinstance(error, cuda.CUresult):

cuda_bindings/examples/common/helper_string.py renamed to cuda_bindings/cuda/bindings/_example_helpers/helper_string.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2021-2025 NVIDIA Corporation. All rights reserved.
1+
# SPDX-FileCopyrightText: Copyright (c) 2021-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
33

44
import sys
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
3+
4+
5+
import importlib.metadata
6+
import os
7+
import re
8+
9+
import pytest
10+
11+
12+
def has_package_requirements_or_skip(example):
13+
example_name = os.path.basename(example)
14+
15+
with open(example, encoding="utf-8") as f:
16+
content = f.read()
17+
18+
# The canonical regex as defined in PEP 723
19+
pep723 = re.search(r"(?m)^# /// (?P<type>[a-zA-Z0-9-]+)$\s(?P<content>(^#(| .*)$\s)+)^# ///$", content)
20+
if not pep723:
21+
raise ValueError(f"PEP 723 metadata not found in {example_name}")
22+
23+
metadata = {}
24+
for line in pep723.group("content").splitlines():
25+
line = line.lstrip("# ").rstrip()
26+
if not line:
27+
continue
28+
key, value = line.split("=", 1)
29+
key = key.strip()
30+
value = value.strip()
31+
metadata[key] = value
32+
33+
if "dependencies" not in metadata:
34+
raise ValueError(f"PEP 723 dependencies not found in {example_name}")
35+
36+
missing_dependencies = []
37+
dependencies = eval(metadata["dependencies"]) # noqa: S307
38+
for dependency in dependencies:
39+
name = re.match("[a-zA-Z0-9_-]+", dependency)
40+
try:
41+
importlib.metadata.distribution(name.group(0))
42+
except importlib.metadata.PackageNotFoundError:
43+
missing_dependencies.append(name.string)
44+
45+
if missing_dependencies:
46+
pytest.skip(f"Skipping {example} due to missing package requirement: {', '.join(missing_dependencies)}")

cuda_bindings/examples/0_Introduction/clock_nvrtc_test.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,16 @@
88
#
99
# ################################################################################
1010

11+
# /// script
12+
# dependencies = ["cuda_bindings>13.2.1", "numpy"]
13+
# ///
14+
1115
import platform
1216

1317
import numpy as np
14-
from common import common
15-
from common.helper_cuda import check_cuda_errors, find_cuda_device
1618

1719
from cuda.bindings import driver as cuda
20+
from cuda.bindings._example_helpers import KernelHelper, check_cuda_errors, find_cuda_device, requirement_not_met
1821

1922
clock_nvrtc = """\
2023
extern "C" __global__ void timedReduction(const float *hinput, float *output, clock_t *timer)
@@ -65,11 +68,13 @@ def elems_to_bytes(nelems, dt):
6568
return nelems * np.dtype(dt).itemsize
6669

6770

68-
def main():
69-
import pytest
70-
71+
def check_requirements():
7172
if platform.machine() == "armv7l":
72-
pytest.skip("clock_nvrtc is not supported on ARMv7")
73+
requirement_not_met("clock_nvrtc is not supported on ARMv7")
74+
75+
76+
def main():
77+
check_requirements()
7378

7479
timer = np.empty(num_blocks * 2, dtype="int64")
7580
hinput = np.empty(num_threads * 2, dtype="float32")
@@ -78,7 +83,7 @@ def main():
7883
hinput[i] = i
7984

8085
dev_id = find_cuda_device()
81-
kernel_helper = common.KernelHelper(clock_nvrtc, dev_id)
86+
kernel_helper = KernelHelper(clock_nvrtc, dev_id)
8287
kernel_addr = kernel_helper.get_function(b"timedReduction")
8388

8489
dinput = check_cuda_errors(cuda.cuMemAlloc(hinput.nbytes))

cuda_bindings/examples/0_Introduction/simpleCubemapTexture_test.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,21 @@
77
#
88
# ################################################################################
99

10+
11+
# /// script
12+
# dependencies = ["cuda_bindings>13.2.1", "numpy"]
13+
# ///
14+
15+
1016
import ctypes
1117
import sys
1218
import time
1319

1420
import numpy as np
15-
from common import common
16-
from common.helper_cuda import check_cuda_errors, find_cuda_device
1721

1822
from cuda.bindings import driver as cuda
1923
from cuda.bindings import runtime as cudart
24+
from cuda.bindings._example_helpers import KernelHelper, check_cuda_errors, find_cuda_device, requirement_not_met
2025

2126
simple_cubemap_texture = """\
2227
extern "C"
@@ -97,9 +102,7 @@ def main():
97102
f"CUDA device [{device_props.name}] has {device_props.multiProcessorCount} Multi-Processors SM {device_props.major}.{device_props.minor}"
98103
)
99104
if device_props.major < 2:
100-
import pytest
101-
102-
pytest.skip("Test requires SM 2.0 or higher for support of Texture Arrays.")
105+
requirement_not_met("Test requires SM 2.0 or higher for support of Texture Arrays.")
103106

104107
# Generate input data for layered texture
105108
width = 64
@@ -162,7 +165,7 @@ def main():
162165
f"Covering Cubemap data array of {width}~3 x {num_layers}: Grid size is {dim_grid.x} x {dim_grid.y}, each block has 8 x 8 threads"
163166
)
164167

165-
kernel_helper = common.KernelHelper(simple_cubemap_texture, dev_id)
168+
kernel_helper = KernelHelper(simple_cubemap_texture, dev_id)
166169
_transform_kernel = kernel_helper.get_function(b"transformKernel")
167170
kernel_args = ((d_data, width, tex), (ctypes.c_void_p, ctypes.c_int, None))
168171
check_cuda_errors(

cuda_bindings/examples/0_Introduction/simpleP2P_test.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,19 @@
88
#
99
# ################################################################################
1010

11+
# /// script
12+
# dependencies = ["cuda_bindings>13.2.1", "numpy"]
13+
# ///
14+
1115
import ctypes
1216
import platform
1317
import sys
1418

1519
import numpy as np
16-
from common import common
17-
from common.helper_cuda import check_cuda_errors
1820

1921
from cuda.bindings import driver as cuda
2022
from cuda.bindings import runtime as cudart
23+
from cuda.bindings._example_helpers import KernelHelper, check_cuda_errors, requirement_not_met
2124

2225
simplep2p = """\
2326
extern "C"
@@ -32,27 +35,25 @@
3235

3336

3437
def main():
35-
import pytest
36-
3738
if platform.system() == "Darwin":
38-
pytest.skip("simpleP2P is not supported on Mac OSX")
39+
requirement_not_met("simpleP2P is not supported on Mac OSX")
3940

4041
if platform.machine() == "armv7l":
41-
pytest.skip("simpleP2P is not supported on ARMv7")
42+
requirement_not_met("simpleP2P is not supported on ARMv7")
4243

4344
if platform.machine() == "aarch64":
44-
pytest.skip("simpleP2P is not supported on aarch64")
45+
requirement_not_met("simpleP2P is not supported on aarch64")
4546

4647
if platform.machine() == "sbsa":
47-
pytest.skip("simpleP2P is not supported on sbsa")
48+
requirement_not_met("simpleP2P is not supported on sbsa")
4849

4950
# Number of GPUs
5051
print("Checking for multiple GPUs...")
5152
gpu_n = check_cuda_errors(cudart.cudaGetDeviceCount())
5253
print(f"CUDA-capable device count: {gpu_n}")
5354

5455
if gpu_n < 2:
55-
pytest.skip("Two or more GPUs with Peer-to-Peer access capability are required")
56+
requirement_not_met("Two or more GPUs with Peer-to-Peer access capability are required")
5657

5758
prop = [check_cuda_errors(cudart.cudaGetDeviceProperties(i)) for i in range(gpu_n)]
5859
# Check possibility for peer access
@@ -83,7 +84,7 @@ def main():
8384
break
8485

8586
if p2p_capable_gp_us[0] == -1 or p2p_capable_gp_us[1] == -1:
86-
pytest.skip("Peer to Peer access is not available amongst GPUs in the system")
87+
requirement_not_met("Peer to Peer access is not available amongst GPUs in the system")
8788

8889
# Use first pair of p2p capable GPUs detected
8990
gpuid = [p2p_capable_gp_us[0], p2p_capable_gp_us[1]]
@@ -158,7 +159,7 @@ def main():
158159
_simple_kernel = [None] * 2
159160
kernel_args = [None] * 2
160161

161-
kernel_helper[1] = common.KernelHelper(simplep2p, gpuid[1])
162+
kernel_helper[1] = KernelHelper(simplep2p, gpuid[1])
162163
_simple_kernel[1] = kernel_helper[1].get_function(b"SimpleKernel")
163164
kernel_args[1] = ((g0, g1), (ctypes.c_void_p, ctypes.c_void_p))
164165
check_cuda_errors(
@@ -183,7 +184,7 @@ def main():
183184
# output to the GPU 0 buffer
184185
print(f"Run kernel on GPU{gpuid[0]}, taking source data from GPU{gpuid[1]} and writing to GPU{gpuid[0]}...")
185186
check_cuda_errors(cudart.cudaSetDevice(gpuid[0]))
186-
kernel_helper[0] = common.KernelHelper(simplep2p, gpuid[0])
187+
kernel_helper[0] = KernelHelper(simplep2p, gpuid[0])
187188
_simple_kernel[0] = kernel_helper[0].get_function(b"SimpleKernel")
188189
kernel_args[0] = ((g1, g0), (ctypes.c_void_p, ctypes.c_void_p))
189190
check_cuda_errors(

0 commit comments

Comments
 (0)