1- // Copyright (c) 2017-2025 Advanced Micro Devices, Inc. All rights reserved.
1+ // Copyright (c) 2017-2026 Advanced Micro Devices, Inc. All rights reserved.
22//
33// Permission is hereby granted, free of charge, to any person obtaining a copy
44// of this software and associated documentation files (the "Software"), to deal
@@ -162,6 +162,8 @@ namespace detail
162162 return __hip_atomic_load (address, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
163163 }
164164
165+ // atomic_load for uint128 is not supported for unknown targets or spirv
166+ #if !defined(ROCPRIM_TARGET_UNKNOWN) && !defined(ROCPRIM_TARGET_SPIRV)
165167 ROCPRIM_DEVICE ROCPRIM_INLINE
166168 __uint128_t atomic_load (const __uint128_t * address)
167169 {
@@ -197,25 +199,24 @@ namespace detail
197199 #define ROCPRIM_ATOMIC_LOAD_SHARED (ptr ) \
198200 ROCPRIM_ATOMIC_LOAD (" ds_read_b128" , " " , " s_waitcnt lgkmcnt(0)" , ptr)
199201 // This architecture doesn't support atomics on the global AS.
200- #define ROCPRIM_ATOMIC_LOAD_GLOBAL (ptr ) ROCPRIM_ATOMIC_LOAD_FLAT(ptr)
201- #elif ROCPRIM_TARGET_RDNA3 || ROCPRIM_TARGET_CDNA2 || ROCPRIM_TARGET_CDNA1 || ROCPRIM_TARGET_GCN5 \
202- || ROCPRIM_TARGET_SPIRV
203- // We don't really know what architecture we are on when targeting
204- // SPIR-V. Lets just assume it's one of these.
205- #define ROCPRIM_ATOMIC_LOAD_FLAT (ptr ) \
206- ROCPRIM_ATOMIC_LOAD (" flat_load_dwordx4" , " glc" , " s_waitcnt vmcnt(0)" , ptr)
207- #define ROCPRIM_ATOMIC_LOAD_SHARED (ptr ) \
208- ROCPRIM_ATOMIC_LOAD (" ds_read_b128" , " " , " s_waitcnt lgkmcnt(0)" , ptr)
209- #define ROCPRIM_ATOMIC_LOAD_GLOBAL (ptr ) \
210- ROCPRIM_ATOMIC_LOAD (" global_load_dwordx4" , " off glc" , " s_waitcnt vmcnt(0)" , ptr)
211- #elif defined(__HIP_DEVICE_COMPILE__)
212- // Please submit an issue or pull request!
213- #error support for 128-bit atomics not implemented for current architecture
214- #endif
202+ #define ROCPRIM_ATOMIC_LOAD_GLOBAL (ptr ) ROCPRIM_ATOMIC_LOAD_FLAT(ptr)
203+ #elif ROCPRIM_TARGET_RDNA3 || ROCPRIM_TARGET_CDNA2 || ROCPRIM_TARGET_CDNA1 \
204+ || ROCPRIM_TARGET_GCN5
205+ #define ROCPRIM_ATOMIC_LOAD_FLAT (ptr ) \
206+ ROCPRIM_ATOMIC_LOAD (" flat_load_dwordx4" , " glc" , " s_waitcnt vmcnt(0)" , ptr)
207+ #define ROCPRIM_ATOMIC_LOAD_SHARED (ptr ) \
208+ ROCPRIM_ATOMIC_LOAD (" ds_read_b128" , " " , " s_waitcnt lgkmcnt(0)" , ptr)
209+ #define ROCPRIM_ATOMIC_LOAD_GLOBAL (ptr ) \
210+ ROCPRIM_ATOMIC_LOAD (" global_load_dwordx4" , " off glc" , " s_waitcnt vmcnt(0)" , ptr)
211+ #elif defined(__HIP_DEVICE_COMPILE__)
212+ // Please submit an issue or pull request!
213+ #error support for 128-bit atomics not implemented for current architecture
214+ #endif
215215
216- #ifdef __HIP_DEVICE_COMPILE__
217- #if !ROCPRIM_TARGET_SPIRV && defined(__has_builtin) \
218- && __has_builtin (__builtin_amdgcn_is_shared) && __has_builtin (__builtin_amdgcn_is_private)
216+ #ifdef __HIP_DEVICE_COMPILE__
217+ #if !ROCPRIM_TARGET_SPIRV && defined(__has_builtin) \
218+ && __has_builtin (__builtin_amdgcn_is_shared) \
219+ && __has_builtin (__builtin_amdgcn_is_private)
219220
220221 auto * ptr = (const __attribute__ ((address_space (0 /* flat*/ ))) __uint128_t *)address;
221222 if (__builtin_amdgcn_is_shared (ptr))
@@ -234,15 +235,15 @@ namespace detail
234235 = (const __attribute__ ((address_space (1 /* global*/ ))) __uint128_t *)address;
235236 ROCPRIM_ATOMIC_LOAD_GLOBAL (global_ptr);
236237 }
237- #else
238+ #else
238239 // SPIR-V does not like the address-space checks. For now
239240 // lets just do flat loading/storing.
240241 ROCPRIM_ATOMIC_LOAD_FLAT (address);
241- #endif
242- #else
242+ #endif
243+ #else
243244 (void )address;
244245 result = 0 ;
245- #endif
246+ #endif
246247
247248 return result;
248249
@@ -251,6 +252,7 @@ namespace detail
251252#undef ROCPRIM_ATOMIC_LOAD_SHARED
252253#undef ROCPRIM_ATOMIC_LOAD_GLOBAL
253254 }
255+ #endif
254256
255257 ROCPRIM_DEVICE ROCPRIM_INLINE
256258 void atomic_store (unsigned char * address, unsigned char value)
@@ -279,6 +281,8 @@ namespace detail
279281 __hip_atomic_store (address, value, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
280282 }
281283
284+ // atomic_store for uint128 is not supported for unknown targets or spirv
285+ #if !defined(ROCPRIM_TARGET_UNKNOWN) && !defined(ROCPRIM_TARGET_SPIRV)
282286 ROCPRIM_DEVICE ROCPRIM_INLINE
283287 void atomic_store (__uint128_t * address, const __uint128_t value)
284288 {
@@ -355,6 +359,7 @@ namespace detail
355359#undef ROCPRIM_ATOMIC_STORE_SHARED
356360#undef ROCPRIM_ATOMIC_STORE_GLOBAL
357361 }
362+ #endif
358363
359364 // / \brief Wait for all vector memory operations to complete
360365 // /
0 commit comments