Skip to content

Commit 5303f65

Browse files
viralbhadeshiyamisccofbusato
authored
Dropped duplicated math function from Thrust (#6188)
Co-authored-by: Michael Schellenberger Costa <[email protected]> Co-authored-by: Federico Busato <[email protected]>
1 parent 9415711 commit 5303f65

File tree

9 files changed

+54
-142
lines changed

9 files changed

+54
-142
lines changed

cub/cub/device/dispatch/dispatch_merge_sort.cuh

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,8 @@
4545
#include <cub/util_namespace.cuh>
4646
#include <cub/util_vsmem.cuh>
4747

48-
#include <thrust/detail/integer_math.h>
49-
5048
#include <cuda/__cmath/ceil_div.h>
49+
#include <cuda/__cmath/ilog.h>
5150
#include <cuda/std/__algorithm/max.h>
5251
#include <cuda/std/__type_traits/is_same.h>
5352
#include <cuda/std/cstdint>
@@ -282,7 +281,7 @@ struct DispatchMergeSort
282281
break;
283282
}
284283

285-
const int num_passes = static_cast<int>(THRUST_NS_QUALIFIER::detail::log2_ri(num_tiles));
284+
const int num_passes = ::cuda::ceil_ilog2(num_tiles);
286285

287286
/*
288287
* The algorithm consists of stages. At each stage, there are input and output arrays. There are two pairs of

thrust/thrust/detail/execute_with_allocator.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,12 @@
2828

2929
#include <thrust/detail/allocator/allocator_traits.h>
3030
#include <thrust/detail/execute_with_allocator_fwd.h>
31-
#include <thrust/detail/integer_math.h>
3231
#include <thrust/detail/raw_pointer_cast.h>
3332
#include <thrust/detail/type_traits/pointer_traits.h>
3433
#include <thrust/pair.h>
3534

35+
#include <cuda/__cmath/ceil_div.h>
36+
3637
THRUST_NAMESPACE_BEGIN
3738

3839
namespace detail
@@ -50,7 +51,7 @@ get_temporary_buffer(thrust::detail::execute_with_allocator<Allocator, BaseSyste
5051

5152
// How many elements of type value_type do we need to accommodate n elements
5253
// of type T?
53-
size_type num_elements = divide_ri(sizeof(T) * n, sizeof(value_type));
54+
const size_type num_elements = static_cast<size_type>(::cuda::ceil_div(sizeof(T) * n, sizeof(value_type)));
5455

5556
void_pointer ptr = alloc_traits::allocate(system.get_allocator(), num_elements);
5657

@@ -69,7 +70,7 @@ _CCCL_HOST void return_temporary_buffer(
6970
using value_type = typename alloc_traits::value_type;
7071
using T = typename thrust::detail::pointer_traits<Pointer>::element_type;
7172

72-
size_type num_elements = divide_ri(sizeof(T) * n, sizeof(value_type));
73+
size_type num_elements = ::cuda::ceil_div(sizeof(T) * n, sizeof(value_type));
7374

7475
pointer to_ptr = thrust::reinterpret_pointer_cast<pointer>(p);
7576
alloc_traits::deallocate(system.get_allocator(), to_ptr, num_elements);

thrust/thrust/detail/integer_math.h

Lines changed: 0 additions & 113 deletions
This file was deleted.

thrust/thrust/mr/disjoint_pool.h

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@
4141
#include <thrust/mr/memory_resource.h>
4242
#include <thrust/mr/pool_options.h>
4343

44+
#include <cuda/__cmath/ilog.h>
45+
#include <cuda/__cmath/pow2.h>
4446
#include <cuda/std/__algorithm/max.h>
4547
#include <cuda/std/__algorithm/min.h>
4648
#include <cuda/std/__cccl/algorithm_wrapper.h>
@@ -123,7 +125,7 @@ class disjoint_unsynchronized_pool_resource final
123125
: m_upstream(upstream)
124126
, m_bookkeeper(bookkeeper)
125127
, m_options(options)
126-
, m_smallest_block_log2(detail::log2_ri(m_options.smallest_block_size))
128+
, m_smallest_block_log2(::cuda::ceil_ilog2(m_options.smallest_block_size))
127129
, m_pools(m_bookkeeper)
128130
, m_allocated(m_bookkeeper)
129131
, m_cached_oversized(m_bookkeeper)
@@ -133,7 +135,7 @@ class disjoint_unsynchronized_pool_resource final
133135

134136
pointer_vector free(m_bookkeeper);
135137
pool p(free);
136-
m_pools.resize(detail::log2_ri(m_options.largest_block_size) - m_smallest_block_log2 + 1, p);
138+
m_pools.resize(::cuda::ceil_ilog2(m_options.largest_block_size) - m_smallest_block_log2 + 1, p);
137139
}
138140

139141
// TODO: C++11: use delegating constructors
@@ -146,7 +148,7 @@ class disjoint_unsynchronized_pool_resource final
146148
: m_upstream(get_global_resource<Upstream>())
147149
, m_bookkeeper(get_global_resource<Bookkeeper>())
148150
, m_options(options)
149-
, m_smallest_block_log2(detail::log2_ri(m_options.smallest_block_size))
151+
, m_smallest_block_log2(::cuda::ceil_ilog2(m_options.smallest_block_size))
150152
, m_pools(m_bookkeeper)
151153
, m_allocated(m_bookkeeper)
152154
, m_cached_oversized(m_bookkeeper)
@@ -156,7 +158,7 @@ class disjoint_unsynchronized_pool_resource final
156158

157159
pointer_vector free(m_bookkeeper);
158160
pool p(free);
159-
m_pools.resize(detail::log2_ri(m_options.largest_block_size) - m_smallest_block_log2 + 1, p);
161+
m_pools.resize(::cuda::ceil_ilog2(m_options.largest_block_size) - m_smallest_block_log2 + 1, p);
160162
}
161163

162164
/*! Destructor. Releases all held memory to upstream.
@@ -370,7 +372,7 @@ class disjoint_unsynchronized_pool_resource final
370372
[[nodiscard]] void_ptr do_allocate_impl(std::size_t bytes, std::size_t alignment)
371373
{
372374
bytes = (std::max) (bytes, m_options.smallest_block_size);
373-
assert(detail::is_power_of_2(alignment));
375+
assert(::cuda::is_power_of_two(alignment));
374376

375377
// an oversized and/or overaligned allocation requested; needs to be allocated separately
376378
if (bytes > m_options.largest_block_size || alignment > m_options.alignment)
@@ -430,7 +432,7 @@ class disjoint_unsynchronized_pool_resource final
430432

431433
// the request is NOT for oversized and/or overaligned memory
432434
// allocate a block from an appropriate bucket
433-
std::size_t bytes_log2 = thrust::detail::log2_ri(bytes);
435+
std::size_t bytes_log2 = ::cuda::ceil_ilog2(bytes);
434436
std::size_t pool_idx = bytes_log2 - m_smallest_block_log2;
435437
pool& bucket = m_pools[pool_idx];
436438

@@ -482,7 +484,7 @@ class disjoint_unsynchronized_pool_resource final
482484
virtual void do_deallocate(void_ptr p, std::size_t n, std::size_t alignment = THRUST_MR_DEFAULT_ALIGNMENT) override
483485
{
484486
n = (std::max) (n, m_options.smallest_block_size);
485-
assert(detail::is_power_of_2(alignment));
487+
assert(::cuda::is_power_of_two(alignment));
486488

487489
// verify that the pointer is at least as aligned as claimed
488490
assert(reinterpret_cast<::cuda::std::intmax_t>(detail::pointer_traits<void_ptr>::get(p)) % alignment == 0);
@@ -511,7 +513,7 @@ class disjoint_unsynchronized_pool_resource final
511513
}
512514

513515
// push the block to the front of the appropriate bucket's free list
514-
std::size_t n_log2 = thrust::detail::log2_ri(n);
516+
std::size_t n_log2 = ::cuda::ceil_ilog2(n);
515517
std::size_t pool_idx = n_log2 - m_smallest_block_log2;
516518
pool& bucket = m_pools[pool_idx];
517519

thrust/thrust/mr/pool.h

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@
3737
#include <thrust/mr/memory_resource.h>
3838
#include <thrust/mr/pool_options.h>
3939

40+
#include <cuda/__cmath/ilog.h>
41+
#include <cuda/__cmath/pow2.h>
4042
#include <cuda/std/__cccl/algorithm_wrapper.h>
4143
#include <cuda/std/cassert>
4244
#include <cuda/std/cstdint>
@@ -112,7 +114,7 @@ class unsynchronized_pool_resource final
112114
unsynchronized_pool_resource(Upstream* upstream, pool_options options = get_default_options())
113115
: m_upstream(upstream)
114116
, m_options(options)
115-
, m_smallest_block_log2(detail::log2_ri(m_options.smallest_block_size))
117+
, m_smallest_block_log2(::cuda::ceil_ilog2(m_options.smallest_block_size))
116118
, m_pools(upstream)
117119
, m_allocated()
118120
, m_oversized()
@@ -121,7 +123,7 @@ class unsynchronized_pool_resource final
121123
assert(m_options.validate());
122124

123125
pool p = {block_descriptor_ptr(), 0};
124-
m_pools.resize(detail::log2_ri(m_options.largest_block_size) - m_smallest_block_log2 + 1, p);
126+
m_pools.resize(::cuda::ceil_ilog2(m_options.largest_block_size) - m_smallest_block_log2 + 1, p);
125127
}
126128

127129
// TODO: C++11: use delegating constructors
@@ -133,7 +135,7 @@ class unsynchronized_pool_resource final
133135
unsynchronized_pool_resource(pool_options options = get_default_options())
134136
: m_upstream(get_global_resource<Upstream>())
135137
, m_options(options)
136-
, m_smallest_block_log2(detail::log2_ri(m_options.smallest_block_size))
138+
, m_smallest_block_log2(::cuda::ceil_ilog2(m_options.smallest_block_size))
137139
, m_pools(get_global_resource<Upstream>())
138140
, m_allocated()
139141
, m_oversized()
@@ -142,7 +144,7 @@ class unsynchronized_pool_resource final
142144
assert(m_options.validate());
143145

144146
pool p = {block_descriptor_ptr(), 0};
145-
m_pools.resize(detail::log2_ri(m_options.largest_block_size) - m_smallest_block_log2 + 1, p);
147+
m_pools.resize(::cuda::ceil_ilog2(m_options.largest_block_size) - m_smallest_block_log2 + 1, p);
146148
}
147149

148150
/*! Destructor. Releases all held memory to upstream.
@@ -260,7 +262,7 @@ class unsynchronized_pool_resource final
260262
do_allocate(std::size_t bytes, std::size_t alignment = THRUST_MR_DEFAULT_ALIGNMENT) override
261263
{
262264
bytes = (std::max) (bytes, m_options.smallest_block_size);
263-
assert(detail::is_power_of_2(alignment));
265+
assert(::cuda::is_power_of_two(alignment));
264266

265267
// an oversized and/or overaligned allocation requested; needs to be allocated separately
266268
if (bytes > m_options.largest_block_size || alignment > m_options.alignment)
@@ -371,7 +373,7 @@ class unsynchronized_pool_resource final
371373

372374
// the request is NOT for oversized and/or overaligned memory
373375
// allocate a block from an appropriate bucket
374-
std::size_t bytes_log2 = thrust::detail::log2_ri(bytes);
376+
std::size_t bytes_log2 = ::cuda::ceil_ilog2(bytes);
375377
std::size_t bucket_idx = bytes_log2 - m_smallest_block_log2;
376378
pool& bucket = thrust::raw_reference_cast(m_pools[bucket_idx]);
377379

@@ -439,7 +441,7 @@ class unsynchronized_pool_resource final
439441
virtual void do_deallocate(void_ptr p, std::size_t n, std::size_t alignment = THRUST_MR_DEFAULT_ALIGNMENT) override
440442
{
441443
n = (std::max) (n, m_options.smallest_block_size);
442-
assert(detail::is_power_of_2(alignment));
444+
assert(::cuda::is_power_of_two(alignment));
443445

444446
// verify that the pointer is at least as aligned as claimed
445447
assert(reinterpret_cast<::cuda::std::intmax_t>(void_ptr_traits::get(p)) % alignment == 0);
@@ -504,7 +506,7 @@ class unsynchronized_pool_resource final
504506
}
505507

506508
// push the block to the front of the appropriate bucket's free list
507-
std::size_t n_log2 = thrust::detail::log2_ri(n);
509+
std::size_t n_log2 = ::cuda::ceil_ilog2(n);
508510
std::size_t bucket_idx = n_log2 - m_smallest_block_log2;
509511
pool& bucket = thrust::raw_reference_cast(m_pools[bucket_idx]);
510512

thrust/thrust/mr/pool_options.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,8 @@
3232
#endif // no system header
3333

3434
#include <thrust/detail/config/memory_resource.h>
35-
#include <thrust/detail/integer_math.h>
3635

36+
#include <cuda/__cmath/pow2.h>
3737
#include <cuda/std/cstddef>
3838

3939
THRUST_NAMESPACE_BEGIN
@@ -104,15 +104,15 @@ struct pool_options
104104
*/
105105
bool validate() const
106106
{
107-
if (!detail::is_power_of_2(smallest_block_size))
107+
if (smallest_block_size != 0 && !::cuda::is_power_of_two(smallest_block_size))
108108
{
109109
return false;
110110
}
111-
if (!detail::is_power_of_2(largest_block_size))
111+
if (largest_block_size != 0 && !::cuda::is_power_of_two(largest_block_size))
112112
{
113113
return false;
114114
}
115-
if (!detail::is_power_of_2(alignment))
115+
if (alignment != 0 && !::cuda::is_power_of_two(alignment))
116116
{
117117
return false;
118118
}

thrust/thrust/system/cuda/detail/dispatch.h

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,16 +26,39 @@
2626
# pragma system_header
2727
#endif // no system header
2828

29-
#include <thrust/detail/integer_math.h>
3029
#include <thrust/detail/preprocessor.h>
3130

31+
#include <cuda/std/__concepts/concept_macros.h>
32+
#include <cuda/std/__type_traits/is_arithmetic.h>
33+
#include <cuda/std/__type_traits/is_unsigned.h>
3234
#include <cuda/std/cstdint>
3335
#include <cuda/std/detail/libcxx/include/stdexcept>
3436
#include <cuda/std/limits>
3537
#include <cuda/std/type_traits>
3638

3739
#include <string>
3840

41+
THRUST_NAMESPACE_BEGIN
42+
namespace detail
43+
{
44+
45+
_CCCL_TEMPLATE(typename T)
46+
_CCCL_REQUIRES(::cuda::std::is_arithmetic_v<T>)
47+
[[nodiscard]] _CCCL_API constexpr bool is_negative([[maybe_unused]] T x) noexcept
48+
{
49+
if constexpr (::cuda::std::is_unsigned_v<T>)
50+
{
51+
return false;
52+
}
53+
else
54+
{
55+
return x < 0;
56+
}
57+
}
58+
59+
} // namespace detail
60+
THRUST_NAMESPACE_END
61+
3962
#if defined(THRUST_FORCE_32_BIT_OFFSET_TYPE) && defined(THRUST_FORCE_64_BIT_OFFSET_TYPE)
4063
# error "Only THRUST_FORCE_32_BIT_OFFSET_TYPE or THRUST_FORCE_64_BIT_OFFSET_TYPE may be defined!"
4164
#endif // THRUST_FORCE_32_BIT_OFFSET_TYPE && THRUST_FORCE_64_BIT_OFFSET_TYPE

0 commit comments

Comments
 (0)