Skip to content

Commit de56024

Browse files
committed
moving back to forwarding references
(reverting part of #1997) Signed-off-by: Dan Hoeflinger <[email protected]>
1 parent 3b7f0e0 commit de56024

File tree

1 file changed

+38
-34
lines changed

1 file changed

+38
-34
lines changed

include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h

+38-34
Original file line numberDiff line numberDiff line change
@@ -1093,7 +1093,7 @@ struct __write_to_id_if_else
10931093
template <typename _ExecutionPolicy, typename _Range1, typename _Range2, typename _UnaryOperation, typename _InitType,
10941094
typename _BinaryOperation, typename _Inclusive>
10951095
auto
1096-
__parallel_transform_scan(oneapi::dpl::__internal::__device_backend_tag __backend_tag, const _ExecutionPolicy& __exec,
1096+
__parallel_transform_scan(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec,
10971097
_Range1&& __in_rng, _Range2&& __out_rng, std::size_t __n, _UnaryOperation __unary_op,
10981098
_InitType __init, _BinaryOperation __binary_op, _Inclusive)
10991099
{
@@ -1122,9 +1122,9 @@ __parallel_transform_scan(oneapi::dpl::__internal::__device_backend_tag __backen
11221122
std::size_t __single_group_upper_limit = __use_reduce_then_scan ? 2048 : 16384;
11231123
if (__group_scan_fits_in_slm<_Type>(__exec.queue(), __n, __n_uniform, __single_group_upper_limit))
11241124
{
1125-
return __parallel_transform_scan_single_group(__backend_tag, __exec, std::forward<_Range1>(__in_rng),
1126-
std::forward<_Range2>(__out_rng), __n, __unary_op, __init,
1127-
__binary_op, _Inclusive{});
1125+
return __parallel_transform_scan_single_group(
1126+
__backend_tag, std::forward<_ExecutionPolicy>(__exec), std::forward<_Range1>(__in_rng),
1127+
std::forward<_Range2>(__out_rng), __n, __unary_op, __init, __binary_op, _Inclusive{});
11281128
}
11291129
}
11301130
#if _ONEDPL_COMPILE_KERNEL
@@ -1137,10 +1137,10 @@ __parallel_transform_scan(oneapi::dpl::__internal::__device_backend_tag __backen
11371137
_GenInput __gen_transform{__unary_op};
11381138
try
11391139
{
1140-
return __parallel_transform_reduce_then_scan(__backend_tag, __exec, __in_rng, __out_rng,
1141-
__gen_transform, __binary_op, __gen_transform,
1142-
_ScanInputTransform{}, _WriteOp{}, __init, _Inclusive{},
1143-
/*_IsUniquePattern=*/std::false_type{});
1140+
return __parallel_transform_reduce_then_scan(
1141+
__backend_tag, std::forward<_ExecutionPolicy>(__exec), __in_rng, __out_rng, __gen_transform,
1142+
__binary_op, __gen_transform, _ScanInputTransform{}, _WriteOp{}, __init, _Inclusive{},
1143+
/*_IsUniquePattern=*/std::false_type{});
11441144
}
11451145
catch (const sycl::exception& __e)
11461146
{
@@ -1161,7 +1161,8 @@ __parallel_transform_scan(oneapi::dpl::__internal::__device_backend_tag __backen
11611161
_NoOpFunctor __get_data_op;
11621162

11631163
return __parallel_transform_scan_base(
1164-
__backend_tag, __exec, std::forward<_Range1>(__in_rng), std::forward<_Range2>(__out_rng), __init,
1164+
__backend_tag, std::forward<_ExecutionPolicy>(__exec), std::forward<_Range1>(__in_rng),
1165+
std::forward<_Range2>(__out_rng), __init,
11651166
// local scan
11661167
unseq_backend::__scan<_Inclusive, _ExecutionPolicy, _BinaryOperation, _UnaryFunctor, _Assigner, _Assigner,
11671168
_NoOpFunctor, _InitType>{__binary_op, _UnaryFunctor{__unary_op}, __assign_op, __assign_op,
@@ -1283,7 +1284,7 @@ __parallel_scan_copy(oneapi::dpl::__internal::__device_backend_tag __backend_tag
12831284

12841285
template <typename _ExecutionPolicy, typename _Range1, typename _Range2, typename _BinaryPredicate>
12851286
auto
1286-
__parallel_unique_copy(oneapi::dpl::__internal::__device_backend_tag __backend_tag, const _ExecutionPolicy& __exec,
1287+
__parallel_unique_copy(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec,
12871288
_Range1&& __rng, _Range2&& __result, _BinaryPredicate __pred)
12881289
{
12891290
using _Assign = oneapi::dpl::__internal::__pstl_assign;
@@ -1300,8 +1301,8 @@ __parallel_unique_copy(oneapi::dpl::__internal::__device_backend_tag __backend_t
13001301
using _WriteOp = oneapi::dpl::__par_backend_hetero::__write_to_id_if<1, _Assign>;
13011302
try
13021303
{
1303-
return __parallel_reduce_then_scan_copy(__backend_tag, __exec, __rng, __result, __n, _GenMask{__pred},
1304-
_WriteOp{_Assign{}},
1304+
return __parallel_reduce_then_scan_copy(__backend_tag, std::forward<_ExecutionPolicy>(__exec), __rng,
1305+
__result, __n, _GenMask{__pred}, _WriteOp{_Assign{}},
13051306
/*_IsUniquePattern=*/std::true_type{});
13061307
}
13071308
catch (const sycl::exception& __e)
@@ -1316,8 +1317,9 @@ __parallel_unique_copy(oneapi::dpl::__internal::__device_backend_tag __backend_t
13161317
decltype(__n)>;
13171318
using _CopyOp = unseq_backend::__copy_by_mask<_ReduceOp, _Assign, /*inclusive*/ std::true_type, 1>;
13181319

1319-
return __parallel_scan_copy(__backend_tag, __exec, std::forward<_Range1>(__rng), std::forward<_Range2>(__result),
1320-
__n, _CreateOp{oneapi::dpl::__internal::__not_pred<_BinaryPredicate>{__pred}},
1320+
return __parallel_scan_copy(__backend_tag, std::forward<_ExecutionPolicy>(__exec), std::forward<_Range1>(__rng),
1321+
std::forward<_Range2>(__result), __n,
1322+
_CreateOp{oneapi::dpl::__internal::__not_pred<_BinaryPredicate>{__pred}},
13211323
_CopyOp{_ReduceOp{}, _Assign{}});
13221324
}
13231325

@@ -1357,7 +1359,7 @@ __parallel_reduce_by_segment_reduce_then_scan(oneapi::dpl::__internal::__device_
13571359

13581360
template <typename _ExecutionPolicy, typename _Range1, typename _Range2, typename _UnaryPredicate>
13591361
auto
1360-
__parallel_partition_copy(oneapi::dpl::__internal::__device_backend_tag __backend_tag, const _ExecutionPolicy& __exec,
1362+
__parallel_partition_copy(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec,
13611363
_Range1&& __rng, _Range2&& __result, _UnaryPredicate __pred)
13621364
{
13631365
oneapi::dpl::__internal::__difference_t<_Range1> __n = __rng.size();
@@ -1369,8 +1371,8 @@ __parallel_partition_copy(oneapi::dpl::__internal::__device_backend_tag __backen
13691371
oneapi::dpl::__par_backend_hetero::__write_to_id_if_else<oneapi::dpl::__internal::__pstl_assign>;
13701372
try
13711373
{
1372-
return __parallel_reduce_then_scan_copy(__backend_tag, __exec, __rng, __result, __n, _GenMask{__pred},
1373-
_WriteOp{},
1374+
return __parallel_reduce_then_scan_copy(__backend_tag, std::forward<_ExecutionPolicy>(__exec), __rng,
1375+
__result, __n, _GenMask{__pred}, _WriteOp{},
13741376
/*_IsUniquePattern=*/std::false_type{});
13751377
}
13761378
catch (const sycl::exception& __e)
@@ -1383,14 +1385,14 @@ __parallel_partition_copy(oneapi::dpl::__internal::__device_backend_tag __backen
13831385
using _CreateOp = unseq_backend::__create_mask<_UnaryPredicate, decltype(__n)>;
13841386
using _CopyOp = unseq_backend::__partition_by_mask<_ReduceOp, /*inclusive*/ std::true_type>;
13851387

1386-
return __parallel_scan_copy(__backend_tag, __exec, std::forward<_Range1>(__rng), std::forward<_Range2>(__result),
1387-
__n, _CreateOp{__pred}, _CopyOp{_ReduceOp{}});
1388+
return __parallel_scan_copy(__backend_tag, std::forward<_ExecutionPolicy>(__exec), std::forward<_Range1>(__rng),
1389+
std::forward<_Range2>(__result), __n, _CreateOp{__pred}, _CopyOp{_ReduceOp{}});
13881390
}
13891391

13901392
template <typename _ExecutionPolicy, typename _InRng, typename _OutRng, typename _Size, typename _Pred,
13911393
typename _Assign = oneapi::dpl::__internal::__pstl_assign>
13921394
auto
1393-
__parallel_copy_if(oneapi::dpl::__internal::__device_backend_tag __backend_tag, const _ExecutionPolicy& __exec,
1395+
__parallel_copy_if(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec,
13941396
_InRng&& __in_rng, _OutRng&& __out_rng, _Size __n, _Pred __pred, _Assign __assign = _Assign{})
13951397
{
13961398
using _SingleGroupInvoker = __invoke_single_group_copy_if<_Size>;
@@ -1425,8 +1427,8 @@ __parallel_copy_if(oneapi::dpl::__internal::__device_backend_tag __backend_tag,
14251427
using _WriteOp = oneapi::dpl::__par_backend_hetero::__write_to_id_if<0, _Assign>;
14261428
try
14271429
{
1428-
return __parallel_reduce_then_scan_copy(__backend_tag, __exec, __in_rng, __out_rng, __n, _GenMask{__pred},
1429-
_WriteOp{__assign},
1430+
return __parallel_reduce_then_scan_copy(__backend_tag, std::forward<_ExecutionPolicy>(__exec), __in_rng,
1431+
__out_rng, __n, _GenMask{__pred}, _WriteOp{__assign},
14301432
/*_IsUniquePattern=*/std::false_type{});
14311433
}
14321434
catch (const sycl::exception& __e)
@@ -1440,8 +1442,9 @@ __parallel_copy_if(oneapi::dpl::__internal::__device_backend_tag __backend_tag,
14401442
using _CopyOp = unseq_backend::__copy_by_mask<_ReduceOp, _Assign,
14411443
/*inclusive*/ std::true_type, 1>;
14421444

1443-
return __parallel_scan_copy(__backend_tag, __exec, std::forward<_InRng>(__in_rng), std::forward<_OutRng>(__out_rng),
1444-
__n, _CreateOp{__pred}, _CopyOp{_ReduceOp{}, __assign});
1445+
return __parallel_scan_copy(__backend_tag, std::forward<_ExecutionPolicy>(__exec), std::forward<_InRng>(__in_rng),
1446+
std::forward<_OutRng>(__out_rng), __n, _CreateOp{__pred},
1447+
_CopyOp{_ReduceOp{}, __assign});
14451448
}
14461449

14471450
#if _ONEDPL_COMPILE_KERNEL
@@ -1534,7 +1537,7 @@ __parallel_set_scan(oneapi::dpl::__internal::__device_backend_tag __backend_tag,
15341537
template <typename _ExecutionPolicy, typename _Range1, typename _Range2, typename _Range3, typename _Compare,
15351538
typename _IsOpDifference>
15361539
auto
1537-
__parallel_set_op(oneapi::dpl::__internal::__device_backend_tag __backend_tag, const _ExecutionPolicy& __exec,
1540+
__parallel_set_op(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec,
15381541
_Range1&& __rng1, _Range2&& __rng2, _Range3&& __result, _Compare __comp,
15391542
_IsOpDifference __is_op_difference)
15401543
{
@@ -1543,17 +1546,18 @@ __parallel_set_op(oneapi::dpl::__internal::__device_backend_tag __backend_tag, c
15431546
{
15441547
try
15451548
{
1546-
return __parallel_set_reduce_then_scan(__backend_tag, __exec, __rng1, __rng2, __result, __comp,
1547-
__is_op_difference);
1549+
return __parallel_set_reduce_then_scan(__backend_tag, std::forward<_ExecutionPolicy>(__exec), __rng1,
1550+
__rng2, __result, __comp, __is_op_difference);
15481551
}
15491552
catch (const sycl::exception& __e)
15501553
{
15511554
__bypass_sycl_kernel_not_supported(__e);
15521555
}
15531556
}
15541557
#endif
1555-
return __parallel_set_scan(__backend_tag, __exec, std::forward<_Range1>(__rng1), std::forward<_Range2>(__rng2),
1556-
std::forward<_Range3>(__result), __comp, __is_op_difference);
1558+
return __parallel_set_scan(__backend_tag, std::forward<_ExecutionPolicy>(__exec), std::forward<_Range1>(__rng1),
1559+
std::forward<_Range2>(__rng2), std::forward<_Range3>(__result), __comp,
1560+
__is_op_difference);
15571561
}
15581562

15591563
//------------------------------------------------------------------------
@@ -2468,8 +2472,8 @@ __parallel_reduce_by_segment_fallback(oneapi::dpl::__internal::__device_backend_
24682472
template <typename _ExecutionPolicy, typename _Range1, typename _Range2, typename _Range3, typename _Range4,
24692473
typename _BinaryPredicate, typename _BinaryOperator>
24702474
oneapi::dpl::__internal::__difference_t<_Range3>
2471-
__parallel_reduce_by_segment(oneapi::dpl::__internal::__device_backend_tag, const _ExecutionPolicy& __exec,
2472-
_Range1&& __keys, _Range2&& __values, _Range3&& __out_keys, _Range4&& __out_values,
2475+
__parallel_reduce_by_segment(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Range1&& __keys,
2476+
_Range2&& __values, _Range3&& __out_keys, _Range4&& __out_values,
24732477
_BinaryPredicate __binary_pred, _BinaryOperator __binary_op)
24742478
{
24752479
// The algorithm reduces values in __values where the
@@ -2493,8 +2497,8 @@ __parallel_reduce_by_segment(oneapi::dpl::__internal::__device_backend_tag, cons
24932497
try
24942498
{
24952499
auto __res = oneapi::dpl::__par_backend_hetero::__parallel_reduce_by_segment_reduce_then_scan(
2496-
oneapi::dpl::__internal::__device_backend_tag{}, __exec, __keys, __values, __out_keys, __out_values,
2497-
__binary_pred, __binary_op);
2500+
oneapi::dpl::__internal::__device_backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __keys,
2501+
__values, __out_keys, __out_values, __binary_pred, __binary_op);
24982502
// Because our init type ends up being tuple<std::size_t, ValType>, return the first component which is the write index. Add 1 to return the
24992503
// past-the-end iterator pair of segmented reduction.
25002504
return std::get<0>(__res.get()) + 1;
@@ -2507,7 +2511,7 @@ __parallel_reduce_by_segment(oneapi::dpl::__internal::__device_backend_tag, cons
25072511
}
25082512
#endif
25092513
return __parallel_reduce_by_segment_fallback(
2510-
oneapi::dpl::__internal::__device_backend_tag{}, __exec,
2514+
oneapi::dpl::__internal::__device_backend_tag{}, std::forward<_ExecutionPolicy>(__exec),
25112515
std::forward<_Range1>(__keys), std::forward<_Range2>(__values), std::forward<_Range3>(__out_keys),
25122516
std::forward<_Range4>(__out_values), __binary_pred, __binary_op,
25132517
oneapi::dpl::unseq_backend::__has_known_identity<_BinaryOperator, __val_type>{});

0 commit comments

Comments
 (0)