From 2fc602a7d448864582ca226073fdbc76d7004a20 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Mon, 16 Mar 2026 12:19:24 -0700 Subject: [PATCH 1/8] Generic atomic operation --- include/RAJA/pattern/atomic.hpp | 14 +++++++++++++- include/RAJA/policy/cuda/atomic.hpp | 12 ++++++++++++ include/RAJA/policy/hip/atomic.hpp | 12 ++++++++++++ 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/include/RAJA/pattern/atomic.hpp b/include/RAJA/pattern/atomic.hpp index 7979fd2d86..c6f6390a75 100644 --- a/include/RAJA/pattern/atomic.hpp +++ b/include/RAJA/pattern/atomic.hpp @@ -280,7 +280,6 @@ RAJA_INLINE RAJA_HOST_DEVICE T atomicExchange(T* acc, T value) * @param compare Value to compare with *acc * @return Returns value at *acc immediately before this operation completed */ - RAJA_SUPPRESS_HD_WARN template RAJA_INLINE RAJA_HOST_DEVICE T atomicCAS(T* acc, T compare, T value) @@ -288,6 +287,19 @@ RAJA_INLINE RAJA_HOST_DEVICE T atomicCAS(T* acc, T compare, T value) return RAJA::atomicCAS(Policy {}, acc, compare, value); } +/*! + * @brief Generic atomic operation implemented using CAS loop + * @param acc Pointer to location to store value + * @param operation Functor that computes a new value from the old value + * @return Returns value at *acc immediately before this operation completed + */ +RAJA_SUPPRESS_HD_WARN +template +RAJA_INLINE RAJA_HOST_DEVICE T atomicOperation(T* acc, Operation&& operation) +{ + return RAJA::atomicOperation(Policy {}, acc, std::forward(operation)); +} + /*! * \brief Atomic wrapper object * diff --git a/include/RAJA/policy/cuda/atomic.hpp b/include/RAJA/policy/cuda/atomic.hpp index 9293c7cb0a..b2a22537c6 100644 --- a/include/RAJA/policy/cuda/atomic.hpp +++ b/include/RAJA/policy/cuda/atomic.hpp @@ -858,6 +858,18 @@ atomicCAS(cuda_atomic_explicit, T* acc, T compare, T value) #endif } +RAJA_SUPPRESS_HD_WARN +template +RAJA_INLINE RAJA_HOST_DEVICE T +atomicOperation(cuda_atomic_explicit, T* acc, Operation&& operation) +{ +#ifdef __CUDA_ARCH__ + return detail::cuda_atomicCAS_loop(acc, std::forward(operation)); +#else + return RAJA::atomicOperation(host_policy {}, acc, std::forward(operation)); +#endif +} + } // namespace RAJA diff --git a/include/RAJA/policy/hip/atomic.hpp b/include/RAJA/policy/hip/atomic.hpp index 13a708d6eb..7aa6a5bd91 100644 --- a/include/RAJA/policy/hip/atomic.hpp +++ b/include/RAJA/policy/hip/atomic.hpp @@ -901,6 +901,18 @@ atomicCAS(hip_atomic_explicit, T* acc, T compare, T value) #endif } +RAJA_SUPPRESS_HD_WARN +template +RAJA_INLINE RAJA_HOST_DEVICE T +atomicOperation(hip_atomic_explicit, T* acc, Operation&& operation) +{ +#if defined(__HIP_DEVICE_COMPILE__) + return detail::hip_atomicCAS_loop(acc, std::forward(operation)); +#else + return RAJA::atomicOperation(host_policy {}, acc, std::forward(value)); +#endif +} + } // namespace RAJA From f69487ae31cac7825e22e23791cb6feda75e2e7b Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Mon, 16 Mar 2026 12:29:41 -0700 Subject: [PATCH 2/8] More implementations --- include/RAJA/policy/atomic_auto.hpp | 6 ++++++ include/RAJA/policy/atomic_builtin.hpp | 7 +++++++ include/RAJA/policy/sequential/atomic.hpp | 9 +++++++++ 3 files changed, 22 insertions(+) diff --git a/include/RAJA/policy/atomic_auto.hpp b/include/RAJA/policy/atomic_auto.hpp index ca61f437cf..45e35e3448 100644 --- a/include/RAJA/policy/atomic_auto.hpp +++ b/include/RAJA/policy/atomic_auto.hpp @@ -156,6 +156,12 @@ atomicCAS(auto_atomic, T* acc, T compare, T value) return atomicCAS(RAJA_AUTO_ATOMIC, acc, compare, value); } +template +RAJA_INLINE RAJA_HOST_DEVICE T +atomicOperation(auto_atomic, T* acc, Operation&& operation) +{ + return atomicOperation(RAJA_AUTO_ATOMIC, acc, std::forward(operation)); +} } // namespace RAJA diff --git a/include/RAJA/policy/atomic_builtin.hpp b/include/RAJA/policy/atomic_builtin.hpp index f2a479bc69..7382adff9a 100644 --- a/include/RAJA/policy/atomic_builtin.hpp +++ b/include/RAJA/policy/atomic_builtin.hpp @@ -1011,6 +1011,13 @@ atomicCAS(builtin_atomic, T* acc, T compare, T value) return detail::builtin_atomicCAS(acc, compare, value); } +template +RAJA_DEVICE_HIP RAJA_INLINE T +atomicOperation(builtin_atomic, T* acc, Operation&& operation) +{ + return detail::builtin_atomicCAS_loop(acc, std::forward(operation)); +} + } // namespace RAJA diff --git a/include/RAJA/policy/sequential/atomic.hpp b/include/RAJA/policy/sequential/atomic.hpp index 32eda4658b..83b91bd3fe 100644 --- a/include/RAJA/policy/sequential/atomic.hpp +++ b/include/RAJA/policy/sequential/atomic.hpp @@ -158,6 +158,15 @@ RAJA_HOST_DEVICE RAJA_INLINE T atomicCAS(seq_atomic, T* acc, T compare, T value) return ret; } +RAJA_SUPPRESS_HD_WARN +template +RAJA_HOST_DEVICE RAJA_INLINE T atomicOperation(seq_atomic, T* acc, Operation&& operation) +{ + T ret = *acc; + *acc = operation(ret); + return ret; +} + } // namespace RAJA From f858624c28c10818aa95400f93613717464cd116 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Mon, 16 Mar 2026 14:29:43 -0700 Subject: [PATCH 3/8] Add openmp atomicOperation --- include/RAJA/policy/atomic_builtin.hpp | 3 ++- include/RAJA/policy/openmp/atomic.hpp | 13 +++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/include/RAJA/policy/atomic_builtin.hpp b/include/RAJA/policy/atomic_builtin.hpp index 7382adff9a..8ec38f494e 100644 --- a/include/RAJA/policy/atomic_builtin.hpp +++ b/include/RAJA/policy/atomic_builtin.hpp @@ -23,6 +23,7 @@ #include "RAJA/config.hpp" #include +#include #if defined(RAJA_COMPILER_MSVC) || \ ((defined(_WIN32) || defined(_WIN64)) && defined(__INTEL_COMPILER)) @@ -1011,7 +1012,7 @@ atomicCAS(builtin_atomic, T* acc, T compare, T value) return detail::builtin_atomicCAS(acc, compare, value); } -template +template RAJA_DEVICE_HIP RAJA_INLINE T atomicOperation(builtin_atomic, T* acc, Operation&& operation) { diff --git a/include/RAJA/policy/openmp/atomic.hpp b/include/RAJA/policy/openmp/atomic.hpp index 2043b36342..c56c225e6c 100644 --- a/include/RAJA/policy/openmp/atomic.hpp +++ b/include/RAJA/policy/openmp/atomic.hpp @@ -24,6 +24,8 @@ #if defined(RAJA_ENABLE_OPENMP) +#include + #include "RAJA/policy/openmp/policy.hpp" #include "RAJA/util/macros.hpp" @@ -230,6 +232,17 @@ RAJA_HOST_DEVICE RAJA_INLINE T atomicCAS(omp_atomic, T* acc, T compare, T value) return RAJA::atomicCAS(builtin_atomic {}, acc, compare, value); } +RAJA_SUPPRESS_HD_WARN +template +RAJA_HOST_DEVICE RAJA_INLINE T +atomicOperation(omp_atomic, T* acc, Operation&& operation) +{ + // OpenMP doesn't define a generic atomic operation, so use builtin atomics + return RAJA::atomicOperation(builtin_atomic {}, + acc, + std::forward(operation)); +} + #endif // not defined RAJA_COMPILER_MSVC From 2183864eff4bd69ce8ef7b494886b79c5a51aa9d Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Mon, 16 Mar 2026 15:11:49 -0700 Subject: [PATCH 4/8] Add test case --- .../tests/test-forall-atomic-basic.hpp | 21 ++++++++++++++++++- test/unit/atomic/CMakeLists.txt | 4 ++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/test/functional/forall/atomic-basic/tests/test-forall-atomic-basic.hpp b/test/functional/forall/atomic-basic/tests/test-forall-atomic-basic.hpp index cdd10d025c..977c0578a5 100644 --- a/test/functional/forall/atomic-basic/tests/test-forall-atomic-basic.hpp +++ b/test/functional/forall/atomic-basic/tests/test-forall-atomic-basic.hpp @@ -63,7 +63,7 @@ template (0); test_array[10] = static_cast(0); test_array[11] = static_cast(0); + test_array[12] = static_cast(1); work_res.memcpy(work_array, test_array, sizeof(T) * len); @@ -109,6 +110,23 @@ void ForallAtomicBasicTestImpl( IdxType seglimit ) RAJA::atomicStore(work_array + 9, static_cast(1)); RAJA::atomicInc(work_array + 10, static_cast(16)); RAJA::atomicDec(work_array + 11, static_cast(16)); + + // Exercise generic atomicOperation with an order-independent update: + // compute factorial(N) by multiplying by (i+1) for i in [0, N). + // + // Choose N small enough that: + // - The result fits in 32-bit signed ints (avoids overflow/UB). + // - The intermediate values are exactly representable in float/double + // (avoids non-associativity issues). + constexpr IdxType factN = static_cast(10); + RAJA::atomicOperation(work_array + 12, + [=] RAJA_HOST_DEVICE(T old) { + if (i < factN) + { + return old * static_cast(i + static_cast(1)); + } + return old; + }); }); work_res.memcpy( check_array, work_array, sizeof(T) * len ); @@ -128,6 +146,7 @@ void ForallAtomicBasicTestImpl( IdxType seglimit ) EXPECT_EQ(static_cast(1), check_array[9]); EXPECT_EQ(static_cast(4), check_array[10]); EXPECT_EQ(static_cast(13), check_array[11]); + EXPECT_EQ(static_cast(3628800), check_array[12]); deallocateForallTestData(work_res, work_array, diff --git a/test/unit/atomic/CMakeLists.txt b/test/unit/atomic/CMakeLists.txt index 8dc841e1f7..ca947a66eb 100644 --- a/test/unit/atomic/CMakeLists.txt +++ b/test/unit/atomic/CMakeLists.txt @@ -34,3 +34,7 @@ raja_add_test( raja_add_test( NAME test-atomic-ref-bitwise SOURCES test-atomic-ref-bitwise.cpp) + +raja_add_test( + NAME test-atomic-operation + SOURCES test-atomic-operation.cpp) From 95f82c7a40096177e6a00965d389f3f87c1cc679 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Mon, 16 Mar 2026 15:20:36 -0700 Subject: [PATCH 5/8] Add desul implementation --- include/RAJA/policy/desul/atomic.hpp | 57 ++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/include/RAJA/policy/desul/atomic.hpp b/include/RAJA/policy/desul/atomic.hpp index 22039283ce..375a698f13 100644 --- a/include/RAJA/policy/desul/atomic.hpp +++ b/include/RAJA/policy/desul/atomic.hpp @@ -14,7 +14,12 @@ #if defined(RAJA_ENABLE_DESUL_ATOMICS) +#include +#include +#include + #include "RAJA/util/macros.hpp" +#include "RAJA/util/TypeConvert.hpp" #include "RAJA/policy/atomic_builtin.hpp" @@ -27,6 +32,50 @@ using raja_default_desul_scope = desul::MemoryScopeDevice; namespace RAJA { +namespace detail +{ + +template +RAJA_HOST_DEVICE RAJA_INLINE bool desul_atomicCAS_equal(const T& a, const T& b) +{ + return a == b; +} + +template::value, bool> = true> +RAJA_HOST_DEVICE RAJA_INLINE bool desul_atomicCAS_equal(const T& a, const T& b) +{ + using R = std::conditional_t; + static_assert(sizeof(T) == sizeof(std::uint32_t) || + sizeof(T) == sizeof(std::uint64_t), + "desul_atomicCAS_equal only supports 32/64-bit floating point"); + + return RAJA::util::reinterp_A_as_B(a) == + RAJA::util::reinterp_A_as_B(b); +} + +template +RAJA_HOST_DEVICE RAJA_INLINE T desul_atomicCAS_loop(T* acc, Oper&& oper) +{ + T old = desul::atomic_load(acc, raja_default_desul_order {}, + raja_default_desul_scope {}); + T expected; + + do + { + expected = old; + old = desul::atomic_compare_exchange(acc, expected, oper(expected), + raja_default_desul_order {}, + raja_default_desul_scope {}); + } while (!desul_atomicCAS_equal(old, expected)); + + return old; +} + +} // namespace detail + RAJA_SUPPRESS_HD_WARN template RAJA_HOST_DEVICE RAJA_INLINE T atomicLoad(AtomicPolicy, T* acc) @@ -153,6 +202,14 @@ atomicCAS(AtomicPolicy, T* acc, T compare, T value) raja_default_desul_scope {}); } +RAJA_SUPPRESS_HD_WARN +template +RAJA_HOST_DEVICE RAJA_INLINE T +atomicOperation(AtomicPolicy, T* acc, Operation&& operation) +{ + return detail::desul_atomicCAS_loop(acc, std::forward(operation)); +} + } // namespace RAJA #endif // RAJA_ENABLE_DESUL_ATOMICS From ae678033bf3c1e6303c2385e243f49ed91bdcbd9 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Mon, 16 Mar 2026 16:48:25 -0700 Subject: [PATCH 6/8] Add release note --- RELEASE_NOTES.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 3a980ee93a..90488a9504 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -15,12 +15,13 @@ This release contains ... Notable changes include: * New features / API changes: + * Added `RAJA::atomicOperation` to enable user-defined atomic update + operations implemented using a compare-and-swap loop. * Build changes/improvements: * Bug fixes/improvements: - Version 2025.12.2 -- Release date 2026-03-04 ============================================ From cd697e2d73ffb3b31add47484d5d2640ad745826 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Mon, 16 Mar 2026 16:54:44 -0700 Subject: [PATCH 7/8] Fix typo in hip atomic --- include/RAJA/policy/hip/atomic.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/RAJA/policy/hip/atomic.hpp b/include/RAJA/policy/hip/atomic.hpp index 7aa6a5bd91..a638546bf7 100644 --- a/include/RAJA/policy/hip/atomic.hpp +++ b/include/RAJA/policy/hip/atomic.hpp @@ -909,7 +909,7 @@ atomicOperation(hip_atomic_explicit, T* acc, Operation&& operation) #if defined(__HIP_DEVICE_COMPILE__) return detail::hip_atomicCAS_loop(acc, std::forward(operation)); #else - return RAJA::atomicOperation(host_policy {}, acc, std::forward(value)); + return RAJA::atomicOperation(host_policy {}, acc, std::forward(operation)); #endif } From fc718fadaf885b6ee25aba5545aebafccfcb45f5 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Tue, 17 Mar 2026 09:15:47 -0700 Subject: [PATCH 8/8] Clean up atomicOperation desul implementation --- include/RAJA/policy/desul/atomic.hpp | 42 +++++++++++++++------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/include/RAJA/policy/desul/atomic.hpp b/include/RAJA/policy/desul/atomic.hpp index 375a698f13..f1dfff43d5 100644 --- a/include/RAJA/policy/desul/atomic.hpp +++ b/include/RAJA/policy/desul/atomic.hpp @@ -49,31 +49,13 @@ RAJA_HOST_DEVICE RAJA_INLINE bool desul_atomicCAS_equal(const T& a, const T& b) std::uint32_t, std::uint64_t>; static_assert(sizeof(T) == sizeof(std::uint32_t) || - sizeof(T) == sizeof(std::uint64_t), + sizeof(T) == sizeof(std::uint64_t), "desul_atomicCAS_equal only supports 32/64-bit floating point"); return RAJA::util::reinterp_A_as_B(a) == RAJA::util::reinterp_A_as_B(b); } -template -RAJA_HOST_DEVICE RAJA_INLINE T desul_atomicCAS_loop(T* acc, Oper&& oper) -{ - T old = desul::atomic_load(acc, raja_default_desul_order {}, - raja_default_desul_scope {}); - T expected; - - do - { - expected = old; - old = desul::atomic_compare_exchange(acc, expected, oper(expected), - raja_default_desul_order {}, - raja_default_desul_scope {}); - } while (!desul_atomicCAS_equal(old, expected)); - - return old; -} - } // namespace detail RAJA_SUPPRESS_HD_WARN @@ -207,7 +189,27 @@ template RAJA_HOST_DEVICE RAJA_INLINE T atomicOperation(AtomicPolicy, T* acc, Operation&& operation) { - return detail::desul_atomicCAS_loop(acc, std::forward(operation)); + T expected = desul::atomic_load(acc, + raja_default_desul_order {}, + raja_default_desul_scope {}); + + while (true) { + const T desired = operation(expected); + + if (desul_atomicCAS_equal(desired, expected)) { + return expected; // no-op + } + + const T old = desul::atomic_compare_exchange(acc, expected, desired, + raja_default_desul_order {}, + raja_default_desul_scope {}); + + if (desul_atomicCAS_equal(old, expected)) { + return old; // success + } + + expected = old; // CAS failed, old is the latest observed value + } } } // namespace RAJA