Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
1a37095
fixed duplication for log2_ri
viralbhadeshiya Oct 10, 2025
87b98f1
changed cub for log2_ri deplication
viralbhadeshiya Oct 10, 2025
96a8c4d
fix ceil_ilog2 import
viralbhadeshiya Oct 10, 2025
8b1eadd
fix sytax error
viralbhadeshiya Oct 10, 2025
8eec94e
Drop all duplicated math function from Thrust
viralbhadeshiya Oct 10, 2025
70938ba
fix syntax issue for divide_rz
viralbhadeshiya Oct 10, 2025
1a883b7
remove duplicate declaration
viralbhadeshiya Oct 10, 2025
0b4597d
fix pair error on thrust
viralbhadeshiya Oct 10, 2025
237f908
Merge remote-tracking branch 'remote/main' into FEA/6104
viralbhadeshiya Oct 10, 2025
5a43614
changes for pre-commit hook
viralbhadeshiya Oct 10, 2025
01a27de
PR review changes
viralbhadeshiya Oct 11, 2025
f0d7b38
fixes for PR review suggestion 2
viralbhadeshiya Oct 11, 2025
1c3060f
Drop unused `is_odd`
miscco Oct 13, 2025
701af01
Improve `is_negative` implementation and drop unused headers
miscco Oct 13, 2025
07b811c
Drop unused includes
miscco Oct 13, 2025
8f713c8
Silence unused variable warning
miscco Oct 13, 2025
b90a202
Merge branch 'main' into FEA/6104
miscco Oct 13, 2025
cfc2b0b
Merge branch 'main' into FEA/6104
viralbhadeshiya Oct 14, 2025
79f9059
Merge branch 'FEA/6104' of github.com:viralbhadeshiya/cccl into FEA/6104
viralbhadeshiya Oct 14, 2025
1806c12
Commit changes from @fbusato
viralbhadeshiya Oct 15, 2025
4261f1f
drop integer_math
viralbhadeshiya Oct 15, 2025
8a80170
Merge branch 'main' into FEA/6104
viralbhadeshiya Oct 15, 2025
34fc385
Merge branch 'main' into FEA/6104
miscco Oct 16, 2025
d147cd1
bug: fix different behavior for ::cuda::is_power_of_two
viralbhadeshiya Oct 17, 2025
e117d67
Merge branch 'main' into FEA/6104
viralbhadeshiya Oct 17, 2025
16d4048
fix for ::cuda::is_power_of_two
viralbhadeshiya Oct 17, 2025
0ab0b72
Merge branch 'FEA/6104' of github.com:viralbhadeshiya/cccl into FEA/6104
viralbhadeshiya Oct 17, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions cub/cub/device/dispatch/dispatch_merge_sort.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,8 @@
#include <cub/util_namespace.cuh>
#include <cub/util_vsmem.cuh>

#include <thrust/detail/integer_math.h>

#include <cuda/__cmath/ceil_div.h>
#include <cuda/__cmath/ilog.h>
#include <cuda/std/__algorithm/max.h>
#include <cuda/std/__type_traits/is_same.h>
#include <cuda/std/cstdint>
Expand Down Expand Up @@ -282,7 +281,7 @@ struct DispatchMergeSort
break;
}

const int num_passes = static_cast<int>(THRUST_NS_QUALIFIER::detail::log2_ri(num_tiles));
const int num_passes = ::cuda::ceil_ilog2(num_tiles);

/*
* The algorithm consists of stages. At each stage, there are input and output arrays. There are two pairs of
Expand Down
7 changes: 4 additions & 3 deletions thrust/thrust/detail/execute_with_allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,12 @@

#include <thrust/detail/allocator/allocator_traits.h>
#include <thrust/detail/execute_with_allocator_fwd.h>
#include <thrust/detail/integer_math.h>
#include <thrust/detail/raw_pointer_cast.h>
#include <thrust/detail/type_traits/pointer_traits.h>
#include <thrust/pair.h>

#include <cuda/__cmath/ceil_div.h>

THRUST_NAMESPACE_BEGIN

namespace detail
Expand All @@ -50,7 +51,7 @@ get_temporary_buffer(thrust::detail::execute_with_allocator<Allocator, BaseSyste

// How many elements of type value_type do we need to accommodate n elements
// of type T?
size_type num_elements = divide_ri(sizeof(T) * n, sizeof(value_type));
const size_type num_elements = static_cast<size_type>(::cuda::ceil_div(sizeof(T) * n, sizeof(value_type)));

void_pointer ptr = alloc_traits::allocate(system.get_allocator(), num_elements);

Expand All @@ -69,7 +70,7 @@ _CCCL_HOST void return_temporary_buffer(
using value_type = typename alloc_traits::value_type;
using T = typename thrust::detail::pointer_traits<Pointer>::element_type;

size_type num_elements = divide_ri(sizeof(T) * n, sizeof(value_type));
size_type num_elements = ::cuda::ceil_div(sizeof(T) * n, sizeof(value_type));

pointer to_ptr = thrust::reinterpret_pointer_cast<pointer>(p);
alloc_traits::deallocate(system.get_allocator(), to_ptr, num_elements);
Expand Down
113 changes: 0 additions & 113 deletions thrust/thrust/detail/integer_math.h

This file was deleted.

18 changes: 10 additions & 8 deletions thrust/thrust/mr/disjoint_pool.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@
#include <thrust/mr/memory_resource.h>
#include <thrust/mr/pool_options.h>

#include <cuda/__cmath/ilog.h>
#include <cuda/__cmath/pow2.h>
#include <cuda/std/__algorithm/max.h>
#include <cuda/std/__algorithm/min.h>
#include <cuda/std/__cccl/algorithm_wrapper.h>
Expand Down Expand Up @@ -123,7 +125,7 @@ class disjoint_unsynchronized_pool_resource final
: m_upstream(upstream)
, m_bookkeeper(bookkeeper)
, m_options(options)
, m_smallest_block_log2(detail::log2_ri(m_options.smallest_block_size))
, m_smallest_block_log2(::cuda::ceil_ilog2(m_options.smallest_block_size))
, m_pools(m_bookkeeper)
, m_allocated(m_bookkeeper)
, m_cached_oversized(m_bookkeeper)
Expand All @@ -133,7 +135,7 @@ class disjoint_unsynchronized_pool_resource final

pointer_vector free(m_bookkeeper);
pool p(free);
m_pools.resize(detail::log2_ri(m_options.largest_block_size) - m_smallest_block_log2 + 1, p);
m_pools.resize(::cuda::ceil_ilog2(m_options.largest_block_size) - m_smallest_block_log2 + 1, p);
}

// TODO: C++11: use delegating constructors
Expand All @@ -146,7 +148,7 @@ class disjoint_unsynchronized_pool_resource final
: m_upstream(get_global_resource<Upstream>())
, m_bookkeeper(get_global_resource<Bookkeeper>())
, m_options(options)
, m_smallest_block_log2(detail::log2_ri(m_options.smallest_block_size))
, m_smallest_block_log2(::cuda::ceil_ilog2(m_options.smallest_block_size))
, m_pools(m_bookkeeper)
, m_allocated(m_bookkeeper)
, m_cached_oversized(m_bookkeeper)
Expand All @@ -156,7 +158,7 @@ class disjoint_unsynchronized_pool_resource final

pointer_vector free(m_bookkeeper);
pool p(free);
m_pools.resize(detail::log2_ri(m_options.largest_block_size) - m_smallest_block_log2 + 1, p);
m_pools.resize(::cuda::ceil_ilog2(m_options.largest_block_size) - m_smallest_block_log2 + 1, p);
}

/*! Destructor. Releases all held memory to upstream.
Expand Down Expand Up @@ -370,7 +372,7 @@ class disjoint_unsynchronized_pool_resource final
[[nodiscard]] void_ptr do_allocate_impl(std::size_t bytes, std::size_t alignment)
{
bytes = (std::max) (bytes, m_options.smallest_block_size);
assert(detail::is_power_of_2(alignment));
assert(::cuda::is_power_of_two(alignment));

// an oversized and/or overaligned allocation requested; needs to be allocated separately
if (bytes > m_options.largest_block_size || alignment > m_options.alignment)
Expand Down Expand Up @@ -430,7 +432,7 @@ class disjoint_unsynchronized_pool_resource final

// the request is NOT for oversized and/or overaligned memory
// allocate a block from an appropriate bucket
std::size_t bytes_log2 = thrust::detail::log2_ri(bytes);
std::size_t bytes_log2 = ::cuda::ceil_ilog2(bytes);
std::size_t pool_idx = bytes_log2 - m_smallest_block_log2;
pool& bucket = m_pools[pool_idx];

Expand Down Expand Up @@ -482,7 +484,7 @@ class disjoint_unsynchronized_pool_resource final
virtual void do_deallocate(void_ptr p, std::size_t n, std::size_t alignment = THRUST_MR_DEFAULT_ALIGNMENT) override
{
n = (std::max) (n, m_options.smallest_block_size);
assert(detail::is_power_of_2(alignment));
assert(::cuda::is_power_of_two(alignment));

// verify that the pointer is at least as aligned as claimed
assert(reinterpret_cast<::cuda::std::intmax_t>(detail::pointer_traits<void_ptr>::get(p)) % alignment == 0);
Expand Down Expand Up @@ -511,7 +513,7 @@ class disjoint_unsynchronized_pool_resource final
}

// push the block to the front of the appropriate bucket's free list
std::size_t n_log2 = thrust::detail::log2_ri(n);
std::size_t n_log2 = ::cuda::ceil_ilog2(n);
std::size_t pool_idx = n_log2 - m_smallest_block_log2;
pool& bucket = m_pools[pool_idx];

Expand Down
18 changes: 10 additions & 8 deletions thrust/thrust/mr/pool.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@
#include <thrust/mr/memory_resource.h>
#include <thrust/mr/pool_options.h>

#include <cuda/__cmath/ilog.h>
#include <cuda/__cmath/pow2.h>
#include <cuda/std/__cccl/algorithm_wrapper.h>
#include <cuda/std/cassert>
#include <cuda/std/cstdint>
Expand Down Expand Up @@ -112,7 +114,7 @@ class unsynchronized_pool_resource final
unsynchronized_pool_resource(Upstream* upstream, pool_options options = get_default_options())
: m_upstream(upstream)
, m_options(options)
, m_smallest_block_log2(detail::log2_ri(m_options.smallest_block_size))
, m_smallest_block_log2(::cuda::ceil_ilog2(m_options.smallest_block_size))
, m_pools(upstream)
, m_allocated()
, m_oversized()
Expand All @@ -121,7 +123,7 @@ class unsynchronized_pool_resource final
assert(m_options.validate());

pool p = {block_descriptor_ptr(), 0};
m_pools.resize(detail::log2_ri(m_options.largest_block_size) - m_smallest_block_log2 + 1, p);
m_pools.resize(::cuda::ceil_ilog2(m_options.largest_block_size) - m_smallest_block_log2 + 1, p);
}

// TODO: C++11: use delegating constructors
Expand All @@ -133,7 +135,7 @@ class unsynchronized_pool_resource final
unsynchronized_pool_resource(pool_options options = get_default_options())
: m_upstream(get_global_resource<Upstream>())
, m_options(options)
, m_smallest_block_log2(detail::log2_ri(m_options.smallest_block_size))
, m_smallest_block_log2(::cuda::ceil_ilog2(m_options.smallest_block_size))
, m_pools(get_global_resource<Upstream>())
, m_allocated()
, m_oversized()
Expand All @@ -142,7 +144,7 @@ class unsynchronized_pool_resource final
assert(m_options.validate());

pool p = {block_descriptor_ptr(), 0};
m_pools.resize(detail::log2_ri(m_options.largest_block_size) - m_smallest_block_log2 + 1, p);
m_pools.resize(::cuda::ceil_ilog2(m_options.largest_block_size) - m_smallest_block_log2 + 1, p);
}

/*! Destructor. Releases all held memory to upstream.
Expand Down Expand Up @@ -260,7 +262,7 @@ class unsynchronized_pool_resource final
do_allocate(std::size_t bytes, std::size_t alignment = THRUST_MR_DEFAULT_ALIGNMENT) override
{
bytes = (std::max) (bytes, m_options.smallest_block_size);
assert(detail::is_power_of_2(alignment));
assert(::cuda::is_power_of_two(alignment));

// an oversized and/or overaligned allocation requested; needs to be allocated separately
if (bytes > m_options.largest_block_size || alignment > m_options.alignment)
Expand Down Expand Up @@ -371,7 +373,7 @@ class unsynchronized_pool_resource final

// the request is NOT for oversized and/or overaligned memory
// allocate a block from an appropriate bucket
std::size_t bytes_log2 = thrust::detail::log2_ri(bytes);
std::size_t bytes_log2 = ::cuda::ceil_ilog2(bytes);
std::size_t bucket_idx = bytes_log2 - m_smallest_block_log2;
pool& bucket = thrust::raw_reference_cast(m_pools[bucket_idx]);

Expand Down Expand Up @@ -439,7 +441,7 @@ class unsynchronized_pool_resource final
virtual void do_deallocate(void_ptr p, std::size_t n, std::size_t alignment = THRUST_MR_DEFAULT_ALIGNMENT) override
{
n = (std::max) (n, m_options.smallest_block_size);
assert(detail::is_power_of_2(alignment));
assert(::cuda::is_power_of_two(alignment));

// verify that the pointer is at least as aligned as claimed
assert(reinterpret_cast<::cuda::std::intmax_t>(void_ptr_traits::get(p)) % alignment == 0);
Expand Down Expand Up @@ -504,7 +506,7 @@ class unsynchronized_pool_resource final
}

// push the block to the front of the appropriate bucket's free list
std::size_t n_log2 = thrust::detail::log2_ri(n);
std::size_t n_log2 = ::cuda::ceil_ilog2(n);
std::size_t bucket_idx = n_log2 - m_smallest_block_log2;
pool& bucket = thrust::raw_reference_cast(m_pools[bucket_idx]);

Expand Down
8 changes: 4 additions & 4 deletions thrust/thrust/mr/pool_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@
#endif // no system header

#include <thrust/detail/config/memory_resource.h>
#include <thrust/detail/integer_math.h>

#include <cuda/__cmath/pow2.h>
#include <cuda/std/cstddef>

THRUST_NAMESPACE_BEGIN
Expand Down Expand Up @@ -104,15 +104,15 @@ struct pool_options
*/
bool validate() const
{
if (!detail::is_power_of_2(smallest_block_size))
if (smallest_block_size != 0 && !::cuda::is_power_of_two(smallest_block_size))
{
return false;
}
if (!detail::is_power_of_2(largest_block_size))
if (largest_block_size != 0 && !::cuda::is_power_of_two(largest_block_size))
{
return false;
}
if (!detail::is_power_of_2(alignment))
if (alignment != 0 && !::cuda::is_power_of_two(alignment))
{
return false;
}
Expand Down
25 changes: 24 additions & 1 deletion thrust/thrust/system/cuda/detail/dispatch.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,39 @@
# pragma system_header
#endif // no system header

#include <thrust/detail/integer_math.h>
#include <thrust/detail/preprocessor.h>

#include <cuda/std/__concepts/concept_macros.h>
#include <cuda/std/__type_traits/is_arithmetic.h>
#include <cuda/std/__type_traits/is_unsigned.h>
#include <cuda/std/cstdint>
#include <cuda/std/detail/libcxx/include/stdexcept>
#include <cuda/std/limits>
#include <cuda/std/type_traits>

#include <string>

THRUST_NAMESPACE_BEGIN
namespace detail
{

_CCCL_TEMPLATE(typename T)
_CCCL_REQUIRES(::cuda::std::is_arithmetic_v<T>)
[[nodiscard]] _CCCL_API constexpr bool is_negative([[maybe_unused]] T x) noexcept
{
if constexpr (::cuda::std::is_unsigned_v<T>)
{
return false;
}
else
{
return x < 0;
}
}

} // namespace detail
THRUST_NAMESPACE_END

#if defined(THRUST_FORCE_32_BIT_OFFSET_TYPE) && defined(THRUST_FORCE_64_BIT_OFFSET_TYPE)
# error "Only THRUST_FORCE_32_BIT_OFFSET_TYPE or THRUST_FORCE_64_BIT_OFFSET_TYPE may be defined!"
#endif // THRUST_FORCE_32_BIT_OFFSET_TYPE && THRUST_FORCE_64_BIT_OFFSET_TYPE
Expand Down
Loading