Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid specializations of the same submitters with the some policy type but with different type qualifiers (l-value, r-value) #2093

Open
wants to merge 52 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
dc4981d
Fix __sycl_scan_by_segment_impl::operator()
SergeyKopienko Mar 3, 2025
5966a4e
Fix __parallel_reduce_then_scan_reduce_submitter::operator()
SergeyKopienko Mar 3, 2025
d414227
Fix __parallel_reduce_then_scan_scan_submitter::operator()
SergeyKopienko Mar 3, 2025
846b038
Fix __parallel_transform_reduce_device_kernel_submitter::operator()
SergeyKopienko Mar 3, 2025
2f6dbaf
Fix __parallel_transform_reduce_work_group_kernel_submitter::operator()
SergeyKopienko Mar 3, 2025
b2f39ca
Fix __parallel_transform_reduce_small_submitter::operator()
SergeyKopienko Mar 3, 2025
995ede7
Fix __parallel_radix_sort_iteration::operator()
SergeyKopienko Mar 3, 2025
94e9b9b
Fix __parallel_merge_submitter::operator()
SergeyKopienko Mar 3, 2025
803436d
Fix __parallel_for_fpga_submitter::operator()
SergeyKopienko Mar 3, 2025
de644f0
Fix __parallel_for_small_submitter::operator()
SergeyKopienko Mar 3, 2025
db3c851
Fix __parallel_scan_submitter::operator()
SergeyKopienko Mar 3, 2025
3574179
Fix __parallel_copy_if_static_single_group_submitter::operator()
SergeyKopienko Mar 3, 2025
eba0a89
Fix __histogram_general_registers_local_reduction_submitter::operator()
SergeyKopienko Mar 3, 2025
50c4f63
Fix __histogram_general_local_atomics_submitter::operator()
SergeyKopienko Mar 3, 2025
824bb72
Fix __parallel_transform_reduce_impl::operator()
SergeyKopienko Mar 3, 2025
282c852
Fix __parallel_transform_reduce_then_scan
SergeyKopienko Mar 3, 2025
5da82b1
Fix __parallel_reduce_by_segment_fallback
SergeyKopienko Mar 3, 2025
51f6dda
Fix __scan_by_segment_impl_common -> __sycl_scan_by_segment_impl::ope…
SergeyKopienko Mar 3, 2025
f633c9e
Fix __parallel_transform_reduce_small_impl -> __parallel_transform_re…
SergeyKopienko Mar 3, 2025
8841284
Fix __parallel_transform_reduce_mid_impl -> __parallel_transform_redu…
SergeyKopienko Mar 3, 2025
7d7414d
Fix radix_sort and stuff
SergeyKopienko Mar 3, 2025
629986e
Fix __parallel_merge_submitter_large::eval_split_points_for_groups
SergeyKopienko Mar 3, 2025
a4412d1
Fix __parallel_merge_submitter_large::run_parallel_merge
SergeyKopienko Mar 3, 2025
47f5ee5
Fix __parallel_merge -> __parallel_merge_submitter::operator(), __par…
SergeyKopienko Mar 3, 2025
ac1b9b8
Fix __merge_sort and stuff
SergeyKopienko Mar 3, 2025
be8fe91
Fix __radix_sort_count_submit
SergeyKopienko Mar 3, 2025
50288d6
Fix __radix_sort_scan_submit
SergeyKopienko Mar 3, 2025
874a10e
Fix __radix_sort_reorder_submit
SergeyKopienko Mar 3, 2025
9185dbe
Fix __parallel_transform_scan_single_group -> __parallel_transform_sc…
SergeyKopienko Mar 3, 2025
02903da
Fix __parallel_transform_scan_base -> __parallel_scan_submitter::oper…
SergeyKopienko Mar 3, 2025
d81d171
Fix __parallel_transform_scan -> __parallel_transform_scan_single_gro…
SergeyKopienko Mar 3, 2025
6559b0c
Fix __invoke_single_group_copy_if::operator() -> __parallel_copy_if_s…
SergeyKopienko Mar 3, 2025
47d6c93
Fix __parallel_reduce_then_scan_copy -> __parallel_transform_reduce_t…
SergeyKopienko Mar 3, 2025
6f30401
Fix __parallel_reduce_by_segment_reduce_then_scan -> __parallel_trans…
SergeyKopienko Mar 3, 2025
c2daf0a
Fix __parallel_set_reduce_then_scan -> __parallel_transform_reduce_th…
SergeyKopienko Mar 3, 2025
99ce9b5
Fix __parallel_for -> __parallel_for_fpga_submitter::operator()
SergeyKopienko Mar 3, 2025
638c2ff
Fix __histogram_general_registers_local_reduction -> __histogram_gene…
SergeyKopienko Mar 3, 2025
ebbb160
Fix __histogram_general_private_global_atomics_submitter::operator()
SergeyKopienko Mar 3, 2025
d3e6fb2
Fix __histogram_general_local_atomics -> __histogram_general_local_at…
SergeyKopienko Mar 3, 2025
a266177
Fix __histogram_general_private_global_atomics -> __histogram_general…
SergeyKopienko Mar 3, 2025
59f1831
Fix __parallel_histogram_select_kernel -> __histogram_general_registe…
SergeyKopienko Mar 3, 2025
fab428e
Fix __parallel_histogram -> __parallel_histogram_select_kernel
SergeyKopienko Mar 3, 2025
d99a2cd
Fix __parallel_transform_reduce -> __parallel_transform_reduce_small_…
SergeyKopienko Mar 3, 2025
b150823
Fix __parallel_radix_sort -> __parallel_radix_sort_iteration::submit(…
SergeyKopienko Mar 3, 2025
6327040
Fix __parallel_reduce_then_scan_copy
SergeyKopienko Mar 3, 2025
89370f8
fix __parallel_reduce_by_segment_reduce_then_scan -> __parallel_trans…
SergeyKopienko Mar 3, 2025
d2cd3a2
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort…
SergeyKopienko Mar 4, 2025
85bc86f
Apply GitHUB clang format
SergeyKopienko Mar 4, 2025
bdb73a2
Update include/oneapi/dpl/internal/scan_by_segment_impl.h
SergeyKopienko Mar 5, 2025
83339c8
Fix review comments
SergeyKopienko Mar 5, 2025
1b13d6f
test/general/lambda_naming.pass.cpp - expand test coverage
SergeyKopienko Mar 5, 2025
ca5f2da
include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h - fix broken tes…
SergeyKopienko Mar 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 13 additions & 10 deletions include/oneapi/dpl/internal/scan_by_segment_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,17 +108,20 @@ struct __sycl_scan_by_segment_impl
template <typename _BackendTag, typename _ExecutionPolicy, typename _Range1, typename _Range2, typename _Range3,
typename _BinaryPredicate, typename _BinaryOperator, typename _T>
void
operator()(_BackendTag, _ExecutionPolicy&& __exec, _Range1&& __keys, _Range2&& __values, _Range3&& __out_values,
_BinaryPredicate __binary_pred, _BinaryOperator __binary_op, _T __init, _T __identity)
operator()(_BackendTag, const _ExecutionPolicy& __exec, _Range1&& __keys, _Range2&& __values,
_Range3&& __out_values, _BinaryPredicate __binary_pred, _BinaryOperator __binary_op, _T __init,
_T __identity)
{
using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>;

// We should avoid using _ExecutionPolicy in __kernel_name_generator template params
// because we always specialize this operator() calls only by _ExecutionPolicy as "const reference".
// So, from this template param point of view, only one specialization is possible per concrete _ExecutionPolicy type.
// _ExecutionPolicy type information is embedded in _CustomName to distinguish between concrete policy types.
using _SegScanWgKernel = oneapi::dpl::__par_backend_hetero::__internal::__kernel_name_generator<
_SegScanWgPhase, _CustomName, _ExecutionPolicy, _Range1, _Range2, _Range3, _BinaryPredicate,
_BinaryOperator>;
_SegScanWgPhase, _CustomName, _Range1, _Range2, _Range3, _BinaryPredicate, _BinaryOperator>;
using _SegScanPrefixKernel = oneapi::dpl::__par_backend_hetero::__internal::__kernel_name_generator<
_SegScanPrefixPhase, _CustomName, _ExecutionPolicy, _Range1, _Range2, _Range3, _BinaryPredicate,
_BinaryOperator>;
_SegScanPrefixPhase, _CustomName, _Range1, _Range2, _Range3, _BinaryPredicate, _BinaryOperator>;

using __val_type = oneapi::dpl::__internal::__value_t<_Range2>;

Expand Down Expand Up @@ -371,7 +374,7 @@ struct __sycl_scan_by_segment_impl
template <typename _BackendTag, typename Policy, typename InputIterator1, typename InputIterator2,
typename OutputIterator, typename T, typename BinaryPredicate, typename Operator, typename Inclusive>
OutputIterator
__scan_by_segment_impl_common(__internal::__hetero_tag<_BackendTag>, Policy&& policy, InputIterator1 first1,
__scan_by_segment_impl_common(__internal::__hetero_tag<_BackendTag>, const Policy& policy, InputIterator1 first1,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need to change this signature as well? A const lvalue ref can accept whatever policy type is forwarded along from this function.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I changed this signature because the initial idea was to change signature of submitters and on one level up (their callers).
This __scan_by_segment_impl_common function calls some submitter __sycl_scan_by_segment_impl :

    __sycl_scan_by_segment_impl<Inclusive::value>()(_BackendTag{}, policy, key_buf.all_view(), value_buf.all_view(),
                                                    value_output_buf.all_view(), binary_pred, binary_op, init,
                                                    identity);

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, I don't have a strong objection, but I also don't think this part is necessary. I believe both options should have the same number of "copies" of the execution policy, to switch over from rvalue and bind to const lvalue ref, then stay as a lvalue ref without copies the rest of the way.

In theory there could be some other function call here which we could forward perfectly to, but that is not the case in practice.

Both options provide a single template instantiation of the submitter.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please see my comment at #2093 (comment)

InputIterator1 last1, InputIterator2 first2, OutputIterator result, T init,
BinaryPredicate binary_pred, Operator binary_op, Inclusive)
{
Expand All @@ -394,9 +397,9 @@ __scan_by_segment_impl_common(__internal::__hetero_tag<_BackendTag>, Policy&& po

constexpr iter_value_t identity = unseq_backend::__known_identity<Operator, iter_value_t>;

__sycl_scan_by_segment_impl<Inclusive::value>()(_BackendTag{}, ::std::forward<Policy>(policy), key_buf.all_view(),
value_buf.all_view(), value_output_buf.all_view(), binary_pred,
binary_op, init, identity);
__sycl_scan_by_segment_impl<Inclusive::value>()(_BackendTag{}, policy, key_buf.all_view(), value_buf.all_view(),
value_output_buf.all_view(), binary_pred, binary_op, init,
identity);
return result + n;
}

Expand Down
154 changes: 78 additions & 76 deletions include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h

Large diffs are not rendered by default.

10 changes: 4 additions & 6 deletions include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_for.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ struct __parallel_for_small_submitter<__internal::__optional_kernel_name<_Name..
{
template <typename _ExecutionPolicy, typename _Fp, typename _Index, typename... _Ranges>
auto
operator()(_ExecutionPolicy&& __exec, _Fp __brick, _Index __count, _Ranges&&... __rngs) const
operator()(const _ExecutionPolicy& __exec, _Fp __brick, _Index __count, _Ranges&&... __rngs) const
{
assert(oneapi::dpl::__ranges::__get_first_range_size(__rngs...) > 0);
_PRINT_INFO_IN_DEBUG_MODE(__exec);
Expand Down Expand Up @@ -138,7 +138,7 @@ struct __parallel_for_large_submitter<__internal::__optional_kernel_name<_Name..

template <typename _ExecutionPolicy, typename _Fp, typename _Index, typename... _Ranges>
auto
operator()(_ExecutionPolicy&& __exec, _Fp __brick, _Index __count, _Ranges&&... __rngs) const
operator()(const _ExecutionPolicy& __exec, _Fp __brick, _Index __count, _Ranges&&... __rngs) const
{
assert(oneapi::dpl::__ranges::__get_first_range_size(__rngs...) > 0);
const std::size_t __work_group_size =
Expand Down Expand Up @@ -194,12 +194,10 @@ __parallel_for(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&&
{
if (__count >= __large_submitter::__estimate_best_start_size(__exec, __brick))
{
return __large_submitter{}(std::forward<_ExecutionPolicy>(__exec), __brick, __count,
std::forward<_Ranges>(__rngs)...);
return __large_submitter{}(__exec, __brick, __count, std::forward<_Ranges>(__rngs)...);
}
}
return __small_submitter{}(std::forward<_ExecutionPolicy>(__exec), __brick, __count,
std::forward<_Ranges>(__rngs)...);
return __small_submitter{}(__exec, __brick, __count, std::forward<_Ranges>(__rngs)...);
}

} // namespace __par_backend_hetero
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ struct __parallel_for_fpga_submitter<__internal::__optional_kernel_name<_Name...
{
template <typename _ExecutionPolicy, typename _Fp, typename _Index, typename... _Ranges>
auto
operator()(_ExecutionPolicy&& __exec, _Fp __brick, _Index __count, _Ranges&&... __rngs) const
operator()(const _ExecutionPolicy& __exec, _Fp __brick, _Index __count, _Ranges&&... __rngs) const
{
assert(oneapi::dpl::__ranges::__get_first_range_size(__rngs...) > 0);

Expand Down Expand Up @@ -86,8 +86,8 @@ __parallel_for(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& _
using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>;
using __parallel_for_name = __internal::__kernel_name_provider<_CustomName>;

return __parallel_for_fpga_submitter<__parallel_for_name>()(std::forward<_ExecutionPolicy>(__exec), __brick,
__count, std::forward<_Ranges>(__rngs)...);
return __parallel_for_fpga_submitter<__parallel_for_name>()(__exec, __brick, __count,
std::forward<_Ranges>(__rngs)...);
}

//------------------------------------------------------------------------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ struct __histogram_general_registers_local_reduction_submitter<__iters_per_work_
{
template <typename _ExecutionPolicy, typename _Range1, typename _Range2, typename _BinHashMgr>
auto
operator()(_ExecutionPolicy&& __exec, const sycl::event& __init_event, ::std::uint16_t __work_group_size,
operator()(const _ExecutionPolicy& __exec, const sycl::event& __init_event, ::std::uint16_t __work_group_size,
_Range1&& __input, _Range2&& __bins, const _BinHashMgr& __binhash_manager)
{
const ::std::size_t __n = __input.size();
Expand Down Expand Up @@ -288,9 +288,10 @@ struct __histogram_general_registers_local_reduction_submitter<__iters_per_work_
template <::std::uint16_t __iters_per_work_item, ::std::uint8_t __bins_per_work_item, typename _ExecutionPolicy,
typename _Range1, typename _Range2, typename _BinHashMgr>
auto
__histogram_general_registers_local_reduction(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec,
const sycl::event& __init_event, ::std::uint16_t __work_group_size,
_Range1&& __input, _Range2&& __bins, const _BinHashMgr& __binhash_manager)
__histogram_general_registers_local_reduction(oneapi::dpl::__internal::__device_backend_tag,
const _ExecutionPolicy& __exec, const sycl::event& __init_event,
::std::uint16_t __work_group_size, _Range1&& __input, _Range2&& __bins,
const _BinHashMgr& __binhash_manager)
{
using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>;

Expand All @@ -304,8 +305,8 @@ __histogram_general_registers_local_reduction(oneapi::dpl::__internal::__device_

return __histogram_general_registers_local_reduction_submitter<__iters_per_work_item, __bins_per_work_item,
_RegistersLocalReducName>()(
::std::forward<_ExecutionPolicy>(__exec), __init_event, __work_group_size, ::std::forward<_Range1>(__input),
::std::forward<_Range2>(__bins), __binhash_manager);
__exec, __init_event, __work_group_size, ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins),
__binhash_manager);
}

template <::std::uint16_t __iters_per_work_item, typename _KernelName>
Expand All @@ -317,7 +318,7 @@ struct __histogram_general_local_atomics_submitter<__iters_per_work_item,
{
template <typename _ExecutionPolicy, typename _Range1, typename _Range2, typename _BinHashMgr>
auto
operator()(_ExecutionPolicy&& __exec, const sycl::event& __init_event, ::std::uint16_t __work_group_size,
operator()(const _ExecutionPolicy& __exec, const sycl::event& __init_event, ::std::uint16_t __work_group_size,
_Range1&& __input, _Range2&& __bins, const _BinHashMgr& __binhash_manager)
{
using _local_histogram_type = ::std::uint32_t;
Expand Down Expand Up @@ -383,7 +384,7 @@ struct __histogram_general_local_atomics_submitter<__iters_per_work_item,
template <::std::uint16_t __iters_per_work_item, typename _ExecutionPolicy, typename _Range1, typename _Range2,
typename _BinHashMgr>
auto
__histogram_general_local_atomics(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec,
__histogram_general_local_atomics(oneapi::dpl::__internal::__device_backend_tag, const _ExecutionPolicy& __exec,
const sycl::event& __init_event, ::std::uint16_t __work_group_size, _Range1&& __input,
_Range2&& __bins, const _BinHashMgr& __binhash_manager)
{
Expand All @@ -398,8 +399,8 @@ __histogram_general_local_atomics(oneapi::dpl::__internal::__device_backend_tag,
__histo_kernel_local_atomics<_iters_per_work_item_t, _CustomName>>;

return __histogram_general_local_atomics_submitter<__iters_per_work_item, _local_atomics_name>()(
::std::forward<_ExecutionPolicy>(__exec), __init_event, __work_group_size, ::std::forward<_Range1>(__input),
::std::forward<_Range2>(__bins), __binhash_manager);
__exec, __init_event, __work_group_size, ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins),
__binhash_manager);
}

template <typename _KernelName>
Expand All @@ -410,7 +411,7 @@ struct __histogram_general_private_global_atomics_submitter<__internal::__option
{
template <typename _BackendTag, typename _ExecutionPolicy, typename _Range1, typename _Range2, typename _BinHashMgr>
auto
operator()(_BackendTag, _ExecutionPolicy&& __exec, const sycl::event& __init_event,
operator()(_BackendTag, const _ExecutionPolicy& __exec, const sycl::event& __init_event,
::std::uint16_t __min_iters_per_work_item, ::std::uint16_t __work_group_size, _Range1&& __input,
_Range2&& __bins, const _BinHashMgr& __binhash_manager)
{
Expand Down Expand Up @@ -481,27 +482,26 @@ struct __histogram_general_private_global_atomics_submitter<__internal::__option
};
template <typename _ExecutionPolicy, typename _Range1, typename _Range2, typename _BinHashMgr>
auto
__histogram_general_private_global_atomics(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec,
const sycl::event& __init_event, ::std::uint16_t __min_iters_per_work_item,
::std::uint16_t __work_group_size, _Range1&& __input, _Range2&& __bins,
const _BinHashMgr& __binhash_manager)
__histogram_general_private_global_atomics(oneapi::dpl::__internal::__device_backend_tag,
const _ExecutionPolicy& __exec, const sycl::event& __init_event,
::std::uint16_t __min_iters_per_work_item, ::std::uint16_t __work_group_size,
_Range1&& __input, _Range2&& __bins, const _BinHashMgr& __binhash_manager)
{
using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>;

using _global_atomics_name = oneapi::dpl::__par_backend_hetero::__internal::__kernel_name_provider<
__histo_kernel_private_glocal_atomics<_CustomName>>;

return __histogram_general_private_global_atomics_submitter<_global_atomics_name>()(
oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __init_event,
__min_iters_per_work_item, __work_group_size, ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins),
__binhash_manager);
oneapi::dpl::__internal::__device_backend_tag{}, __exec, __init_event, __min_iters_per_work_item,
__work_group_size, ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), __binhash_manager);
}

template <::std::uint16_t __iters_per_work_item, typename _ExecutionPolicy, typename _Range1, typename _Range2,
typename _BinHashMgr>
auto
__parallel_histogram_select_kernel(oneapi::dpl::__internal::__device_backend_tag __backend_tag,
_ExecutionPolicy&& __exec, const sycl::event& __init_event, _Range1&& __input,
const _ExecutionPolicy& __exec, const sycl::event& __init_event, _Range1&& __input,
_Range2&& __bins, const _BinHashMgr& __binhash_manager)
{
using _private_histogram_type = ::std::uint16_t;
Expand All @@ -520,17 +520,17 @@ __parallel_histogram_select_kernel(oneapi::dpl::__internal::__device_backend_tag
{
return __future(
__histogram_general_registers_local_reduction<__iters_per_work_item, __max_work_item_private_bins>(
__backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __init_event, __work_group_size,
::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), __binhash_manager));
__backend_tag, __exec, __init_event, __work_group_size, ::std::forward<_Range1>(__input),
::std::forward<_Range2>(__bins), __binhash_manager));
}
// if bins fit into SLM, use local atomics
else if (__num_bins * sizeof(_local_histogram_type) +
__binhash_manager.get_required_SLM_elements() * sizeof(_extra_memory_type) <
__local_mem_size)
{
return __future(__histogram_general_local_atomics<__iters_per_work_item>(
__backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __init_event, __work_group_size,
::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), __binhash_manager));
__backend_tag, __exec, __init_event, __work_group_size, ::std::forward<_Range1>(__input),
::std::forward<_Range2>(__bins), __binhash_manager));
}
else // otherwise, use global atomics (private copies per workgroup)
{
Expand All @@ -540,8 +540,8 @@ __parallel_histogram_select_kernel(oneapi::dpl::__internal::__device_backend_tag
// private copies of the histogram bins in global memory. No unrolling is taken advantage of here because it
// is a runtime argument.
return __future(__histogram_general_private_global_atomics(
__backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __init_event, __iters_per_work_item,
__work_group_size, ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), __binhash_manager));
__backend_tag, __exec, __init_event, __iters_per_work_item, __work_group_size,
::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), __binhash_manager));
}
}

Expand All @@ -554,14 +554,14 @@ __parallel_histogram(oneapi::dpl::__internal::__device_backend_tag __backend_tag
if (__input.size() < 1048576) // 2^20
{
return __parallel_histogram_select_kernel</*iters_per_workitem = */ 4>(
__backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __init_event, ::std::forward<_Range1>(__input),
::std::forward<_Range2>(__bins), __binhash_manager);
__backend_tag, __exec, __init_event, ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins),
__binhash_manager);
}
else
{
return __parallel_histogram_select_kernel</*iters_per_workitem = */ 32>(
__backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __init_event, ::std::forward<_Range1>(__input),
::std::forward<_Range2>(__bins), __binhash_manager);
__backend_tag, __exec, __init_event, ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins),
__binhash_manager);
}
}

Expand Down
Loading
Loading