Skip to content
Permalink

Comparing changes

This is a direct comparison between two commits made in this repository or its related repositories. View the default comparison for this range or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: uxlfoundation/oneDPL
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 67dd56d40e1db885ad273a92fb986e134d986d59
Choose a base ref
..
head repository: uxlfoundation/oneDPL
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: 8ddc816233cc2c7af7ee00ccb3852816503da93e
Choose a head ref
Original file line number Diff line number Diff line change
@@ -292,15 +292,15 @@ __histogram_general_registers_local_reduction(oneapi::dpl::__internal::__device_
const sycl::event& __init_event, ::std::uint16_t __work_group_size,
_Range1&& __input, _Range2&& __bins, const _BinHashMgr& __binhash_manager)
{
using _kernel_base_name = typename ::std::decay_t<_ExecutionPolicy>::kernel_name;
using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>;

using _iters_per_work_item_t = ::std::integral_constant<::std::uint16_t, __iters_per_work_item>;

// Required to include _iters_per_work_item_t in kernel name because we compile multiple kernels and decide between
// them at runtime. Other compile time arguments aren't required as it is the user's responsibility to provide a
// unique kernel name to the policy for each call when using no-unamed-lambdas
using _RegistersLocalReducName = oneapi::dpl::__par_backend_hetero::__internal::__kernel_name_provider<
__histo_kernel_register_local_red<_iters_per_work_item_t, _kernel_base_name>>;
__histo_kernel_register_local_red<_iters_per_work_item_t, _CustomName>>;

return __histogram_general_registers_local_reduction_submitter<__iters_per_work_item, __bins_per_work_item,
_RegistersLocalReducName>()(
@@ -387,15 +387,15 @@ __histogram_general_local_atomics(oneapi::dpl::__internal::__device_backend_tag,
const sycl::event& __init_event, ::std::uint16_t __work_group_size, _Range1&& __input,
_Range2&& __bins, const _BinHashMgr& __binhash_manager)
{
using _kernel_base_name = typename ::std::decay_t<_ExecutionPolicy>::kernel_name;
using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>;

using _iters_per_work_item_t = ::std::integral_constant<::std::uint16_t, __iters_per_work_item>;

// Required to include _iters_per_work_item_t in kernel name because we compile multiple kernels and decide between
// them at runtime. Other compile time arguments aren't required as it is the user's responsibility to provide a
// unique kernel name to the policy for each call when using no-unamed-lambdas
using _local_atomics_name = oneapi::dpl::__par_backend_hetero::__internal::__kernel_name_provider<
__histo_kernel_local_atomics<_iters_per_work_item_t, _kernel_base_name>>;
__histo_kernel_local_atomics<_iters_per_work_item_t, _CustomName>>;

return __histogram_general_local_atomics_submitter<__iters_per_work_item, _local_atomics_name>()(
::std::forward<_ExecutionPolicy>(__exec), __init_event, __work_group_size, ::std::forward<_Range1>(__input),
@@ -486,10 +486,10 @@ __histogram_general_private_global_atomics(oneapi::dpl::__internal::__device_bac
::std::uint16_t __work_group_size, _Range1&& __input, _Range2&& __bins,
const _BinHashMgr& __binhash_manager)
{
using _kernel_base_name = typename ::std::decay_t<_ExecutionPolicy>::kernel_name;
using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>;

using _global_atomics_name = oneapi::dpl::__par_backend_hetero::__internal::__kernel_name_provider<
__histo_kernel_private_glocal_atomics<_kernel_base_name>>;
__histo_kernel_private_glocal_atomics<_CustomName>>;

return __histogram_general_private_global_atomics_submitter<_global_atomics_name>()(
oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __init_event,
150 changes: 98 additions & 52 deletions test/parallel_api/ranges/std_ranges_test.h
Original file line number Diff line number Diff line change
@@ -34,11 +34,27 @@ static_assert(ONEDPL_HAS_RANGE_ALGORITHMS >= 202409L);
#include <ranges>
#include <algorithm>
#include <memory>
#include <array>

namespace test_std_ranges
{

inline constexpr std::size_t big_sz = (1<<25) + 10; //32M
// The largest specializations of algorithms with device policies handle 16M+ elements.
inline constexpr int big_size = (1<<24) + 10; //16M

// ~100K is sufficient for parallel policies.
// It also usually results in using multiple-work-group specializations for device policies.
inline constexpr int medium_size = (1<<17) + 10; //128K

// It is a sufficient size for sequential policies.
// It also usually results in using single-work-group specializations for device policies.
inline constexpr int small_size = 2025;

#if TEST_DPCPP_BACKEND_PRESENT
inline constexpr std::array<int, 3> big_sz = {/*serial*/ small_size, /*par*/ medium_size, /*device*/ big_size};
#else
inline constexpr std::array<int, 2> big_sz = {/*serial*/ small_size, /*par*/ medium_size};
#endif

#if TEST_DPCPP_BACKEND_PRESENT
template<int call_id = 0>
@@ -50,8 +66,6 @@ auto dpcpp_policy()
}
#endif //TEST_DPCPP_BACKEND_PRESENT

auto host_policies() { return std::true_type{};}

enum TestDataMode
{
data_in,
@@ -147,20 +161,18 @@ bool is_range<T, std::void_t<decltype(std::declval<T&>().begin())>> = true;
template<typename DataType, typename Container, TestDataMode test_mode = data_in>
struct test
{
const int max_n = 10;
template<typename Policy>
std::enable_if_t<std::is_same_v<Policy, std::true_type>>
operator()(Policy, auto algo, auto& checker, auto... args)
void
host_policies(int n_serial, int n_parallel, auto algo, auto& checker, auto... args)
{
operator()(oneapi::dpl::execution::seq, algo, checker, args...);
operator()(oneapi::dpl::execution::unseq, algo, checker, args...);
operator()(oneapi::dpl::execution::par, algo, checker, args...);
operator()(oneapi::dpl::execution::par_unseq, algo, checker, args...);
operator()(n_serial, oneapi::dpl::execution::seq, algo, checker, args...);
operator()(n_serial, oneapi::dpl::execution::unseq, algo, checker, args...);
operator()(n_parallel, oneapi::dpl::execution::par, algo, checker, args...);
operator()(n_parallel, oneapi::dpl::execution::par_unseq, algo, checker, args...);
}

template<typename Policy, typename Algo, typename Checker, typename TransIn, typename TransOut, TestDataMode mode = test_mode>
std::enable_if_t<!std::is_same_v<Policy, std::true_type> && mode == data_in>
operator()(Policy&& exec, Algo algo, Checker& checker, TransIn tr_in, TransOut, auto... args)
std::enable_if_t<mode == data_in>
operator()(int max_n, Policy&& exec, Algo algo, Checker& checker, TransIn tr_in, TransOut, auto... args)
{
Container cont_in(exec, max_n, [](auto i) { return i;});
Container cont_exp(exec, max_n, [](auto i) { return i;});
@@ -176,7 +188,7 @@ struct test
static_assert(std::is_same_v<decltype(res), decltype(checker(r_in, args...))>, "Wrong return type");

auto bres = ret_in_val(expected_res, expected_view.begin()) == ret_in_val(res, r_in.begin());
EXPECT_TRUE(bres, (std::string("wrong return value from algo with ranges: ") + typeid(Algo).name() +
EXPECT_TRUE(bres, (std::string("wrong return value from algo with ranges: ") + typeid(Algo).name() +
typeid(decltype(tr_in(std::declval<Container&>()()))).name()).c_str());

//check result
@@ -188,7 +200,7 @@ struct test
private:
template<typename Policy, typename Algo, typename Checker, typename TransIn, typename TransOut, TestDataMode mode = test_mode>
void
process_data_in_out(int n_in, int n_out, Policy&& exec, Algo algo, Checker& checker, TransIn tr_in,
process_data_in_out(int max_n, int n_in, int n_out, Policy&& exec, Algo algo, Checker& checker, TransIn tr_in,
TransOut tr_out, auto... args)
{
static_assert(mode == data_in_out || mode == data_in_out_lim);
@@ -225,28 +237,28 @@ struct test

public:
template<typename Policy, typename Algo, typename Checker, TestDataMode mode = test_mode>
std::enable_if_t<!std::is_same_v<Policy, std::true_type> && mode == data_in_out>
operator()(Policy&& exec, Algo algo, Checker& checker, auto... args)
std::enable_if_t<mode == data_in_out>
operator()(int max_n, Policy&& exec, Algo algo, Checker& checker, auto... args)
{
const int r_size = max_n;
process_data_in_out(r_size, r_size, std::forward<Policy>(exec), algo, checker, args...);
process_data_in_out(max_n, r_size, r_size, std::forward<Policy>(exec), algo, checker, args...);
}

template<typename Policy, typename Algo, typename Checker, TestDataMode mode = test_mode>
std::enable_if_t<!std::is_same_v<Policy, std::true_type> && mode == data_in_out_lim>
operator()(Policy&& exec, Algo algo, Checker& checker, auto... args)
std::enable_if_t<mode == data_in_out_lim>
operator()(int max_n, Policy&& exec, Algo algo, Checker& checker, auto... args)
{
const int r_size = max_n;
process_data_in_out(r_size, r_size, std::forward<Policy>(exec), algo, checker, args...);
process_data_in_out(max_n, r_size, r_size, std::forward<Policy>(exec), algo, checker, args...);

//test case size of input range is less than size of output and vice-versa
process_data_in_out(r_size/2, r_size, exec, algo, checker, args...);
process_data_in_out(r_size, r_size/2, std::forward<Policy>(exec), algo, checker, args...);
process_data_in_out(max_n, r_size/2, r_size, exec, algo, checker, args...);
process_data_in_out(max_n, r_size, r_size/2, std::forward<Policy>(exec), algo, checker, args...);
}

template<typename Policy, typename Algo, typename Checker, typename TransIn, typename TransOut, TestDataMode mode = test_mode>
std::enable_if_t<!std::is_same_v<Policy, std::true_type> && mode == data_in_in>
operator()(Policy&& exec, Algo algo, Checker& checker, TransIn tr_in, TransOut, auto... args)
std::enable_if_t<mode == data_in_in>
operator()(int max_n, Policy&& exec, Algo algo, Checker& checker, TransIn tr_in, TransOut, auto... args)
{
Container cont_in1(exec, max_n, [](auto i) { return i;});
Container cont_in2(exec, max_n, [](auto i) { return i % 5 ? i : 0;});
@@ -270,7 +282,8 @@ struct test
private:
template<typename Policy, typename Algo, typename Checker, typename TransIn, typename TransOut, TestDataMode mode = test_mode>
void
process_data_in_in_out(int n_in1, int n_in2, int n_out, Policy&& exec, Algo algo, Checker& checker, TransIn tr_in, TransOut tr_out, auto... args)
process_data_in_in_out(int max_n, int n_in1, int n_in2, int n_out, Policy&& exec, Algo algo, Checker& checker,
TransIn tr_in, TransOut tr_out, auto... args)
{
static_assert(mode == data_in_in_out || mode == data_in_in_out_lim);

@@ -307,23 +320,23 @@ struct test

public:
template<typename Policy, typename Algo, typename Checker, TestDataMode mode = test_mode>
std::enable_if_t<!std::is_same_v<Policy, std::true_type> && mode == data_in_in_out>
operator()(Policy&& exec, Algo algo, Checker& checker, auto... args)
std::enable_if_t<mode == data_in_in_out>
operator()(int max_n, Policy&& exec, Algo algo, Checker& checker, auto... args)
{
const int r_size = max_n;
process_data_in_in_out(r_size, r_size, r_size*2, std::forward<Policy>(exec), algo, checker, args...);
process_data_in_in_out(max_n, r_size, r_size, r_size*2, std::forward<Policy>(exec), algo, checker, args...);
}

template<typename Policy, typename Algo, typename Checker, TestDataMode mode = test_mode>
std::enable_if_t<!std::is_same_v<Policy, std::true_type> && mode == data_in_in_out_lim>
operator()(Policy&& exec, Algo algo, Checker& checker, auto... args)
std::enable_if_t<mode == data_in_in_out_lim>
operator()(int max_n, Policy&& exec, Algo algo, Checker& checker, auto... args)
{
const int r_size = max_n;
process_data_in_in_out(r_size, r_size, r_size, exec, algo, checker, args...);
process_data_in_in_out(r_size, r_size, r_size*2, exec, algo, checker, args...);
process_data_in_in_out(r_size/2, r_size, r_size, exec, algo, checker, args...);
process_data_in_in_out(r_size, r_size/2, r_size, exec, algo, checker, args...);
process_data_in_in_out(r_size, r_size, r_size/2, std::forward<Policy>(exec), algo, checker, args...);
process_data_in_in_out(max_n, r_size, r_size, r_size, exec, algo, checker, args...);
process_data_in_in_out(max_n, r_size, r_size, r_size*2, exec, algo, checker, args...);
process_data_in_in_out(max_n, r_size/2, r_size, r_size, exec, algo, checker, args...);
process_data_in_in_out(max_n, r_size, r_size/2, r_size, exec, algo, checker, args...);
process_data_in_in_out(max_n, r_size, r_size, r_size/2, std::forward<Policy>(exec), algo, checker, args...);
}
private:

@@ -361,7 +374,7 @@ struct test
template<typename T, typename ViewType>
struct host_subrange_impl
{
static_assert(std::is_trivially_copyable_v<T>,
static_assert(std::is_trivially_copyable_v<T>,
"Memory initialization within the class relies on trivially copyability of the type T");

using type = ViewType;
@@ -409,9 +422,9 @@ struct host_vector

template<typename Policy>
host_vector(Policy&&, T* data, int n): vec(data, data + n), p(data) {}

template<typename Policy, typename DataGen>
host_vector(Policy&&, int n, DataGen gen): vec(n)
host_vector(Policy&&, int n, DataGen gen): vec(n)
{
for(int i = 0; i < n; ++i)
vec[i] = gen(i);
@@ -514,12 +527,35 @@ using usm_span = usm_subrange_impl<T, std::span<T>>;
template<int call_id = 0, typename T = int, TestDataMode mode = data_in>
struct test_range_algo
{
const int max_n = 10;
const int n_serial = small_size;
const int n_parallel = small_size;
#if TEST_DPCPP_BACKEND_PRESENT
const int n_device = small_size;
#endif

test_range_algo() = default;

// Mode with a uniform number of elements for each policy type
#if TEST_DPCPP_BACKEND_PRESENT
test_range_algo(int n) : n_serial(n), n_parallel(n), n_device(n) {}
#else
test_range_algo(int n) : n_serial(n), n_parallel(n) {}
#endif

// Mode that tests different policy types with different sizes.
// Serial (seq/unseq), parallel (par/par_unseq), and device policies
// specialize algorithms for different number of elements, which this mode covers.
#if TEST_DPCPP_BACKEND_PRESENT
test_range_algo(std::array<int, 3> sizes) : n_serial(sizes[0]), n_parallel(sizes[1]), n_device(sizes[2]) {}
#else
test_range_algo(std::array<int, 2> sizes) : n_serial(sizes[0]), n_parallel(sizes[1]) {}
#endif

void test_view(auto view, auto algo, auto& checker, auto... args)
{
test<T, host_subrange<T>, mode>{max_n}(host_policies(), algo, checker, view, std::identity{}, args...);
test<T, host_subrange<T>, mode>{}.host_policies(n_serial, n_parallel, algo, checker, view, std::identity{}, args...);
#if TEST_DPCPP_BACKEND_PRESENT
test<T, usm_subrange<T>, mode>{max_n}(dpcpp_policy<call_id>(), algo, checker, view, std::identity{}, args...);
test<T, usm_subrange<T>, mode>{}(n_device, dpcpp_policy<call_id>(), algo, checker, view, std::identity{}, args...);
#endif //TEST_DPCPP_BACKEND_PRESENT
}

@@ -531,13 +567,19 @@ struct test_range_algo
auto span_view = [](auto&& v) { return std::span(v); };
#endif

test<T, host_vector<T>, mode>{max_n}(host_policies(), algo, checker, std::identity{}, std::identity{}, args...);
test<T, host_vector<T>, mode>{max_n}(host_policies(), algo, checker, subrange_view, std::identity{}, args...);
test<T, host_vector<T>, mode>{max_n}(host_policies(), algo, checker, std::views::all, std::identity{}, args...);
test<T, host_subrange<T>, mode>{max_n}(host_policies(), algo, checker, std::views::all, std::identity{}, args...);
test<T, host_vector<T>, mode>{}.host_policies(
n_serial, n_parallel, algo, checker, std::identity{}, std::identity{}, args...);
test<T, host_vector<T>, mode>{}.host_policies(
n_serial, n_parallel, algo, checker, subrange_view, std::identity{}, args...);
test<T, host_vector<T>, mode>{}.host_policies(
n_serial, n_parallel, algo, checker, std::views::all, std::identity{}, args...);
test<T, host_subrange<T>, mode>{}.host_policies(
n_serial, n_parallel, algo, checker, std::views::all, std::identity{}, args...);
#if TEST_CPP20_SPAN_PRESENT
test<T, host_vector<T>, mode>{max_n}(host_policies(), algo, checker, span_view, std::identity{}, args...);
test<T, host_span<T>, mode>{max_n}(host_policies(), algo, checker, std::views::all, std::identity{}, args...);
test<T, host_vector<T>, mode>{}.host_policies(
n_serial, n_parallel, algo, checker, span_view, std::identity{}, args...);
test<T, host_span<T>, mode>{}.host_policies(
n_serial, n_parallel, algo, checker, std::views::all, std::identity{}, args...);
#endif

#if TEST_DPCPP_BACKEND_PRESENT
@@ -548,11 +590,15 @@ struct test_range_algo
if constexpr(!std::disjunction_v<std::is_member_pointer<decltype(args)>...>)
#endif
{
test<T, usm_vector<T>, mode>{max_n}(dpcpp_policy<call_id + 10>(), algo, checker, subrange_view, subrange_view, args...);
test<T, usm_subrange<T>, mode>{max_n}(dpcpp_policy<call_id + 30>(), algo, checker, std::identity{}, std::identity{}, args...);
test<T, usm_vector<T>, mode>{}(
n_device, dpcpp_policy<call_id + 10>(), algo, checker, subrange_view, subrange_view, args...);
test<T, usm_subrange<T>, mode>{}(
n_device, dpcpp_policy<call_id + 30>(), algo, checker, std::identity{}, std::identity{}, args...);
#if TEST_CPP20_SPAN_PRESENT
test<T, usm_vector<T>, mode>{max_n}(dpcpp_policy<call_id + 20>(), algo, checker, span_view, subrange_view, args...);
test<T, usm_span<T>, mode>{max_n}(dpcpp_policy<call_id + 40>(), algo, checker, std::identity{}, std::identity{}, args...);
test<T, usm_vector<T>, mode>{}(
n_device, dpcpp_policy<call_id + 20>(), algo, checker, span_view, subrange_view, args...);
test<T, usm_span<T>, mode>{}(
n_device, dpcpp_policy<call_id + 40>(), algo, checker, std::identity{}, std::identity{}, args...);
#endif
}
}
4 changes: 2 additions & 2 deletions test/parallel_api/ranges/std_ranges_test_views.pass.cpp
Original file line number Diff line number Diff line change
@@ -22,10 +22,10 @@ main()
using namespace test_std_ranges;
namespace dpl_ranges = oneapi::dpl::ranges;

const int n = 1<<25; //32M
const int n = medium_size;

//transform view
test_range_algo<0>{n}.test_view(std::views::transform([](const auto a) { return a*2; }),
test_range_algo<0>{n}.test_view(std::views::transform([](const auto a) { return a*2; }),
dpl_ranges::find_if, std::ranges::find_if, pred, proj);

//reverse view
Original file line number Diff line number Diff line change
@@ -23,7 +23,7 @@ main()
namespace dpl_ranges = oneapi::dpl::ranges;
const char* err_msg = "Wrong effect algo transform with unsized ranges.";

const int n = 1<<25; //32M
const int n = medium_size;
std::ranges::iota_view view1(0, n); //size range
std::ranges::iota_view view2(0, std::unreachable_sentinel_t{}); //unsized

Original file line number Diff line number Diff line change
@@ -23,7 +23,7 @@ main()
namespace dpl_ranges = oneapi::dpl::ranges;
const char* err_msg = "Wrong effect algo transform with unsized ranges.";

const int n = 1<<25; //32M
const int n = big_size;
std::ranges::iota_view view1(0, n); //size range
std::ranges::iota_view view2(0, std::unreachable_sentinel_t{}); //unsized