diff --git a/CMakeLists.txt b/CMakeLists.txt index 4a446c19a9..3aac1d2e74 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,9 +25,15 @@ option(DPCTL_GENERATE_COVERAGE_FOR_PYBIND11_EXTENSIONS "Build dpctl pybind11 offloading extensions with coverage instrumentation" OFF ) -option(DPCTL_TARGET_CUDA - "Build DPCTL to target CUDA devices" - OFF +string(CONCAT _desc_target_cuda + "Build DPCTL to target CUDA device. " + "Set to a truthy value (e.g., ON, TRUE) to use default architecture (sm_50), " + "or to a specific architecture like sm_80." +) +set(DPCTL_TARGET_CUDA + "" + CACHE STRING + "${_desc_target_cuda}" ) set(DPCTL_TARGET_HIP "" @@ -51,15 +57,24 @@ set(_dpctl_sycl_target_compile_options) set(_dpctl_sycl_target_link_options) set(_dpctl_sycl_targets) +set(_dpctl_cuda_arch) set(_dpctl_amd_targets) + if ("x${DPCTL_SYCL_TARGETS}" STREQUAL "x") - if (DPCTL_TARGET_CUDA) - set(_dpctl_sycl_targets "nvptx64-nvidia-cuda,spir64-unknown-unknown") - else() - if (DEFINED ENV{DPCTL_TARGET_CUDA}) - set(_dpctl_sycl_targets "nvptx64-nvidia-cuda,spir64-unknown-unknown") + if (NOT "x${DPCTL_TARGET_CUDA}" STREQUAL "x") + if(DPCTL_TARGET_CUDA MATCHES "^sm_") + set(_dpctl_cuda_arch ${DPCTL_TARGET_CUDA}) + elseif(DPCTL_TARGET_CUDA MATCHES "^(ON|TRUE|YES|Y|1)$") + set(_dpctl_cuda_arch "sm_50") + else() + message(FATAL_ERROR + "Invalid value for DPCTL_TARGET_CUDA: \"${DPCTL_TARGET_CUDA}\". " + "Expected 'ON', 'TRUE', 'YES', 'Y', '1', or a CUDA architecture like 'sm_80'." + ) endif() + set(_dpctl_sycl_targets "nvidia_gpu_${_dpctl_cuda_arch},spir64-unknown-unknown") endif() + if (NOT "x${DPCTL_TARGET_HIP}" STREQUAL "x") set(_dpctl_amd_targets ${DPCTL_TARGET_HIP}) if(_dpctl_sycl_targets) diff --git a/docs/doc_sources/beginners_guides/installation.rst b/docs/doc_sources/beginners_guides/installation.rst index afb9c639a9..44d46797ba 100644 --- a/docs/doc_sources/beginners_guides/installation.rst +++ b/docs/doc_sources/beginners_guides/installation.rst @@ -159,13 +159,33 @@ The following plugins from CodePlay are supported: .. _codeplay_nv_plugin: https://developer.codeplay.com/products/oneapi/nvidia/ .. _codeplay_amd_plugin: https://developer.codeplay.com/products/oneapi/amd/ -``dpctl`` can be built for CUDA devices as follows: +``dpctl`` can be built for CUDA devices using the ``DPCTL_TARGET_CUDA`` CMake option, +which accepts a specific compute architecture string: + +.. code-block:: bash + + python scripts/build_locally.py --verbose --cmake-opts="-DDPCTL_TARGET_CUDA=sm_80" + +To use the default architecture (``sm_50``), +set ``DPCTL_TARGET_CUDA`` to a value such as ``ON``, ``TRUE``, ``YES``, ``Y``, or ``1``: .. code-block:: bash python scripts/build_locally.py --verbose --cmake-opts="-DDPCTL_TARGET_CUDA=ON" -And for AMD devices +Note that kernels are built for ``sm_50`` by default, allowing them to work on a wider +range of architectures, but limiting the usage of more recent CUDA features. + +For reference, compute architecture strings like ``sm_80`` correspond to specific +CUDA Compute Capabilities (e.g., Compute Capability 8.0 corresponds to ``sm_80``). +A complete mapping between NVIDIA GPU models and their respective +Compute Capabilities can be found in the official +`CUDA GPU Compute Capability `_ documentation. + +A full list of available SYCL alias targets is available in the +`DPC++ Compiler User Manual `_. + +To build for AMD devices, use: .. code-block:: bash