NVIDIA
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 8 additions & 12 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 8 additions & 12 deletions
diff --git a/‎MANIFEST.in‎
Lines changed: 5 additions & 4 deletions b/‎MANIFEST.in‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎builder/utils.py‎
Lines changed: 40 additions & 44 deletions b/‎builder/utils.py‎
Lines changed: 40 additions & 44 deletions
diff --git a/‎docs/sphinx/_static/switcher.json‎
Lines changed: 32 additions & 28 deletions b/‎docs/sphinx/_static/switcher.json‎
Lines changed: 32 additions & 28 deletions
@@ -20,6 +20,9 @@ repos:
     - id: trailing-whitespace
     - id: mixed-line-ending
       args: ['--fix=lf']
+    - id: pretty-format-json
+      args: ['--no-sort-keys','--autofix']
+      exclude_types: [jupyter]
 
   - repo: https://github.com/abravalheri/validate-pyproject
     rev: v0.24
@@ -42,10 +45,10 @@ repos:
         # Envorce only one source of configuration.
         args: ["--config-file", "pyproject.toml"]
         additional_dependencies:
-          - cuda-python
+          - cuda-core
+          - cuda-bindings>=12.9.1,<13
           - cupy-cuda12x
-          # TODO: https://github.com/mpi4py/mpi4py/issues/630
-          # - mpi4py
+          - mpi4py>=4.1.0
           - numba
           - numba-cuda
           - numpy
@@ -55,15 +58,8 @@ repos:
           - types-cffi
           - types-pywin32
           - invoke
-          - cython<3
-          - wheel
-
-  # FIXME: Prettier pre-commit plugin is no longer supported
-  # Autoformat: YAML, JSON, Markdown, etc.
-  # - repo: https://github.com/pre-commit/mirrors-prettier
-  #   rev: v4.0.0-alpha.8
-  #   hooks:
-  #     - id: prettier
+          - cython>=3.0.4,!=3.1.0,!=3.1.1
+          - tomli
 
   # Spellcheck
   - repo: https://github.com/codespell-project/codespell
 
@@ -1,5 +1,6 @@
-graft nvmath
-global-include *.pyd
-global-include *.pyi
+include builder/__init__.py builder/utils.py # builder package used during build
+global-include *.pxd # cython header (aka .h/.hpp in c++)
+global-include *.pyx # cython implementation (aka .c/.cpp in c++)
+# setuptools includes .cpp automatically, not knowing they are generated from
+# cython. Should be changed if .cpp files included manually
 global-exclude *.cpp
-global-exclude *.pyx
 
@@ -6,7 +6,6 @@
 import sys
 
 from setuptools.command.build_ext import build_ext as _build_ext
-from wheel.bdist_wheel import bdist_wheel as _bdist_wheel
 
 
 def detect_cuda_paths():
@@ -22,6 +21,9 @@ def detect_cuda_paths():
     potential_build_prefixes = (
         [os.path.join(p, "nvidia/cuda_runtime") for p in sys.path]
         + [os.path.join(p, "nvidia/cuda_nvcc") for p in sys.path]
+        # internal/bindings depends on cuda_bindings cydriver,
+        # which introduces dependency on cudaProfiler.h
+        + [os.path.join(p, "nvidia/cuda_profiler_api") for p in sys.path]
         + [os.environ.get("CUDA_PATH", os.environ.get("CUDA_HOME", "")), "/usr/local/cuda"]
     )
     cuda_paths = []
@@ -38,6 +40,9 @@ def check_path(header):
 
     check_path("cuda.h")
     check_path("crt/host_defines.h")
+    # internal/bindings depends on cuda_bindings cydriver,
+    # which introduces dependency on cudaProfiler.h
+    check_path("cudaProfiler.h")
     return cuda_paths
 
 
@@ -50,16 +55,6 @@ def decide_lib_name(ext_name):
         return None
 
 
-building_wheel = False
-
-
-class bdist_wheel(_bdist_wheel):
-    def run(self):
-        global building_wheel
-        building_wheel = True
-        super().run()
-
-
 class build_ext(_build_ext):
     def __init__(self, *args, **kwargs):
         self._nvmath_cuda_paths = detect_cuda_paths()
@@ -74,42 +69,43 @@ def _prep_includes_libs_rpaths(self, lib_name):
         Set cuda_incl_dir and extra_linker_flags.
         """
         cuda_incl_dir = [os.path.join(p, "include") for p in self._nvmath_cuda_paths]
+        extra_linker_flags = []
+
+        site_packages = ["$ORIGIN/../../.."]
+        if self.editable_mode:
+            import site
 
-        if not building_wheel:
-            # Note: with PEP-517 the editable mode would not build a wheel for installation
-            # (and we purposely do not support PEP-660).
-            extra_linker_flags = []
+            site_packages = site.getsitepackages()
         else:
-            # Note: soname = library major version
-            # We need to be able to search for cuBLAS/cuSOLVER/... at run time, in case they
-            # are installed via pip wheels.
-            # The rpaths must be adjusted given the following full-wheel installation:
-            # - $ORIGIN:          site-packages/nvmath/bindings/_internal/
-            # - cublas:           site-packages/nvidia/cublas/lib/
-            # - cusolver:         site-packages/nvidia/cusolver/lib/
-            # -   ...                             ...
-            # strip binaries to remove debug symbols which significantly increase wheel size
+            # strip binaries to remove debug symbols which significantly
+            # increase wheel size
             extra_linker_flags = ["-Wl,--strip-all"]
-            if lib_name is not None:
-                ldflag = "-Wl,--disable-new-dtags"
-                match lib_name:
-                    case "nvpl":
-                        # 1. the nvpl bindings land in
-                        # site-packages/nvmath/bindings/nvpl/_internal/ as opposed to other
-                        # packages that have their bindings in
-                        # site-packages/nvmath/bindings/_internal/, so we need one extra
-                        # `..` to get into `site-packages` and then the lib_name=nvpl is not
-                        # in nvidia dir but directly in the site-packages.
-                        # 2. mkl lib is placed directly in the python `lib` directory, not
-                        # in python{ver}/site-packages
-                        ldflag += f",-rpath,$ORIGIN/../../../../{lib_name}/lib:$ORIGIN/../../../../../../"
-                    case "cufftMp":
-                        ldflag += ",-rpath,$ORIGIN/../../../nvidia/cufftmp/cu12/lib"
-                    case "mathdx" | "cudss":
-                        ldflag += ",-rpath,$ORIGIN/../../../nvidia/cu12/lib"
-                    case _:
-                        ldflag += f",-rpath,$ORIGIN/../../../nvidia/{lib_name}/lib"
-                extra_linker_flags.append(ldflag)
+        nvpl_site_packages = [f"{p}/.." for p in site_packages]
+
+        # Note: soname = library major version
+        # We need to be able to search for cuBLAS/cuSOLVER/... at run time, in case they
+        # are installed via pip wheels.
+        # The rpaths must be adjusted given the following full-wheel installation:
+        # - $ORIGIN:          site-packages/nvmath/bindings/_internal/
+        # - cublas:           site-packages/nvidia/cublas/lib/
+        # - cusolver:         site-packages/nvidia/cusolver/lib/
+        # -   ...                             ...
+        if lib_name is None:
+            return cuda_incl_dir, extra_linker_flags
+
+        ldflag = "-Wl,--disable-new-dtags"
+        if lib_name == "nvpl":
+            # 1. the nvpl bindings land in
+            # site-packages/nvmath/bindings/nvpl/_internal/ as opposed to other
+            # packages that have their bindings in
+            # site-packages/nvmath/bindings/_internal/, so we need one extra
+            # `..` to get into `site-packages` and then the lib_name=nvpl is not
+            # in nvidia dir but directly in the site-packages.
+            # 2. mkl lib is placed directly in the python `lib` directory, not
+            # in python{ver}/site-packages
+            rpath = ":".join([f"{pth}/{lib_name}/lib:{pth}/../../" for pth in nvpl_site_packages])
+            ldflag += f",-rpath,{rpath}"
+        extra_linker_flags.append(ldflag)
 
         return cuda_incl_dir, extra_linker_flags
 
 
@@ -1,30 +1,34 @@
 [
-    {
-        "version": "latest",
-        "url": "https://docs.nvidia.com/cuda/nvmath-python/latest"
-    },
-    {
-        "version": "0.5.0",
-        "url": "https://docs.nvidia.com/cuda/nvmath-python/0.5.0"
-    },
-    {
-        "version": "0.4.0",
-        "url": "https://docs.nvidia.com/cuda/nvmath-python/0.4.0"
-    },
-    {
-        "version": "0.3.0",
-        "url": "https://docs.nvidia.com/cuda/nvmath-python/0.3.0"
-    },
-    {
-        "version": "0.2.1",
-        "url": "https://docs.nvidia.com/cuda/nvmath-python/0.2.1"
-    },
-    {
-        "version": "0.2.0",
-        "url": "https://docs.nvidia.com/cuda/nvmath-python/0.2.0"
-    },
-    {
-        "version": "0.1.0",
-        "url": "https://docs.nvidia.com/cuda/nvmath-python/0.1.0"
-    }
+  {
+    "version": "latest",
+    "url": "https://docs.nvidia.com/cuda/nvmath-python/latest"
+  },
+  {
+    "version": "0.6.0",
+    "url": "https://docs.nvidia.com/cuda/nvmath-python/0.6.0"
+  },
+  {
+    "version": "0.5.0",
+    "url": "https://docs.nvidia.com/cuda/nvmath-python/0.5.0"
+  },
+  {
+    "version": "0.4.0",
+    "url": "https://docs.nvidia.com/cuda/nvmath-python/0.4.0"
+  },
+  {
+    "version": "0.3.0",
+    "url": "https://docs.nvidia.com/cuda/nvmath-python/0.3.0"
+  },
+  {
+    "version": "0.2.1",
+    "url": "https://docs.nvidia.com/cuda/nvmath-python/0.2.1"
+  },
+  {
+    "version": "0.2.0",
+    "url": "https://docs.nvidia.com/cuda/nvmath-python/0.2.0"
+  },
+  {
+    "version": "0.1.0",
+    "url": "https://docs.nvidia.com/cuda/nvmath-python/0.1.0"
+  }
 ]