diff --git a/cub/cub/block/block_radix_rank.cuh b/cub/cub/block/block_radix_rank.cuh index f774a8a52f8..1db2c693eca 100644 --- a/cub/cub/block/block_radix_rank.cuh +++ b/cub/cub/block/block_radix_rank.cuh @@ -51,6 +51,7 @@ #include #include #include +#include #include #include #include @@ -743,10 +744,10 @@ public: __syncwarp(0xFFFFFFFF); // Number of peers having same digit as me - int32_t digit_count = __popc(peer_mask); + int32_t digit_count = ::cuda::std::popcount(peer_mask); // Number of lower-ranked peers having same digit seen so far - int32_t peer_digit_prefix = __popc(peer_mask & lane_mask_lt); + int32_t peer_digit_prefix = ::cuda::std::popcount(peer_mask & lane_mask_lt); if (peer_digit_prefix == 0) { @@ -1076,7 +1077,7 @@ struct BlockRadixRankMatchEarlyCounts int bin_mask = *p_match_mask; int leader = ::cuda::std::__bit_log2(static_cast(bin_mask)); int warp_offset = 0; - int popc = __popc(bin_mask & ::cuda::ptx::get_sreg_lanemask_le()); + int popc = ::cuda::std::popcount(bin_mask & ::cuda::ptx::get_sreg_lanemask_le()); if (lane == leader) { // atomic is a bit faster @@ -1106,7 +1107,7 @@ struct BlockRadixRankMatchEarlyCounts detail::warp_in_block_matcher_t::match_any(bin, warp); int leader = ::cuda::std::__bit_log2(static_cast(bin_mask)); int warp_offset = 0; - int popc = __popc(bin_mask & ::cuda::ptx::get_sreg_lanemask_le()); + int popc = ::cuda::std::popcount(bin_mask & ::cuda::ptx::get_sreg_lanemask_le()); if (lane == leader) { // atomic is a bit faster diff --git a/libcudacxx/include/cuda/__barrier/barrier_block_scope.h b/libcudacxx/include/cuda/__barrier/barrier_block_scope.h index 8455bda4ac1..988e3f99b37 100644 --- a/libcudacxx/include/cuda/__barrier/barrier_block_scope.h +++ b/libcudacxx/include/cuda/__barrier/barrier_block_scope.h @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -166,7 +167,7 @@ class barrier : public __bl unsigned int __activeA = ::__match_any_sync(__mask, __update); unsigned int __activeB = ::__match_any_sync(__mask, reinterpret_cast<::cuda::std::uintptr_t>(&__barrier)); unsigned int __active = __activeA & __activeB; - int __inc = ::__popc(__active) * __update; + int __inc = ::cuda::std::popcount(__active) * __update; int __leader = ::__ffs(__active) - 1; // All threads in mask synchronize here, establishing cummulativity to the __leader: diff --git a/libcudacxx/include/cuda/pipeline b/libcudacxx/include/cuda/pipeline index c6d7a68f291..b37dde4b82e 100644 --- a/libcudacxx/include/cuda/pipeline +++ b/libcudacxx/include/cuda/pipeline @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -112,7 +113,7 @@ public: ::__match_any_sync(::__activemask(), reinterpret_cast(__shared_state_get_refcount())); const uint32_t __elected_id = ::__ffs(__match_mask) - 1; __elected = (::cuda::ptx::get_sreg_laneid() == __elected_id); - __sub_count = ::__popc(__match_mask); + __sub_count = ::cuda::std::popcount(__match_mask); , __elected = true; __sub_count = 1;) bool __released = false; @@ -301,7 +302,7 @@ make_pipeline(const _Group& __group, pipeline_shared_state<_Scope, _Stages_count ::__match_any_sync(::__activemask(), reinterpret_cast(&__shared_state->__refcount)); const uint32_t __elected_id = ::__ffs(__match_mask) - 1; __elected = (::cuda::ptx::get_sreg_laneid() == __elected_id); - __add_count = ::__popc(__match_mask); + __add_count = ::cuda::std::popcount(__match_mask); , __elected = true; __add_count = 1;) if (__elected) diff --git a/thrust/thrust/system/cuda/detail/set_operations.h b/thrust/thrust/system/cuda/detail/set_operations.h index b7353e919b0..b60aa2f2c99 100644 --- a/thrust/thrust/system/cuda/detail/set_operations.h +++ b/thrust/thrust/system/cuda/detail/set_operations.h @@ -55,6 +55,7 @@ # include # include +# include # include THRUST_NAMESPACE_BEGIN @@ -560,7 +561,7 @@ struct SetOpAgent Size tile_output_count = 0; Size thread_output_prefix = 0; Size tile_output_prefix = 0; - Size thread_output_count = static_cast(__popc(active_mask)); + Size thread_output_count = static_cast(::cuda::std::popcount(static_cast(active_mask))); if (tile_idx == 0) // first tile {