@@ -615,8 +615,7 @@ __parallel_transform_scan_single_group(oneapi::dpl::__internal::__device_backend
615
615
616
616
// Although we do not actually need result storage in this case, we need to construct
617
617
// a placeholder here to match the return type of the non-single-work-group implementation
618
- using __result_and_scratch_storage_t = __result_and_scratch_storage<_ExecutionPolicy, _ValueType>;
619
- __result_and_scratch_storage_t __dummy_result_and_scratch{__exec, 0 , 0 };
618
+ __result_and_scratch_storage<_ExecutionPolicy, _ValueType> __dummy_result_and_scratch{__exec, 0 , 0 };
620
619
621
620
if (__max_wg_size >= __targeted_wg_size)
622
621
{
@@ -1093,7 +1092,7 @@ struct __write_to_id_if_else
1093
1092
template <typename _ExecutionPolicy, typename _Range1, typename _Range2, typename _UnaryOperation, typename _InitType,
1094
1093
typename _BinaryOperation, typename _Inclusive>
1095
1094
auto
1096
- __parallel_transform_scan (oneapi::dpl::__internal::__device_backend_tag __backend_tag, const _ExecutionPolicy& __exec,
1095
+ __parallel_transform_scan (oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy& & __exec,
1097
1096
_Range1&& __in_rng, _Range2&& __out_rng, std::size_t __n, _UnaryOperation __unary_op,
1098
1097
_InitType __init, _BinaryOperation __binary_op, _Inclusive)
1099
1098
{
@@ -1122,9 +1121,9 @@ __parallel_transform_scan(oneapi::dpl::__internal::__device_backend_tag __backen
1122
1121
std::size_t __single_group_upper_limit = __use_reduce_then_scan ? 2048 : 16384 ;
1123
1122
if (__group_scan_fits_in_slm<_Type>(__exec.queue (), __n, __n_uniform, __single_group_upper_limit))
1124
1123
{
1125
- return __parallel_transform_scan_single_group (__backend_tag, __exec, std::forward<_Range1>(__in_rng),
1126
- std::forward<_Range2>(__out_rng ), __n, __unary_op, __init ,
1127
- __binary_op, _Inclusive{});
1124
+ return __parallel_transform_scan_single_group (
1125
+ __backend_tag, std::forward<_ExecutionPolicy>(__exec ), std::forward<_Range1>(__in_rng) ,
1126
+ std::forward<_Range2>(__out_rng), __n, __unary_op, __init, __binary_op, _Inclusive{});
1128
1127
}
1129
1128
}
1130
1129
#if _ONEDPL_COMPILE_KERNEL
@@ -1161,7 +1160,8 @@ __parallel_transform_scan(oneapi::dpl::__internal::__device_backend_tag __backen
1161
1160
_NoOpFunctor __get_data_op;
1162
1161
1163
1162
return __parallel_transform_scan_base (
1164
- __backend_tag, __exec, std::forward<_Range1>(__in_rng), std::forward<_Range2>(__out_rng), __init,
1163
+ __backend_tag, std::forward<_ExecutionPolicy>(__exec), std::forward<_Range1>(__in_rng),
1164
+ std::forward<_Range2>(__out_rng), __init,
1165
1165
// local scan
1166
1166
unseq_backend::__scan<_Inclusive, _ExecutionPolicy, _BinaryOperation, _UnaryFunctor, _Assigner, _Assigner,
1167
1167
_NoOpFunctor, _InitType>{__binary_op, _UnaryFunctor{__unary_op}, __assign_op, __assign_op,
@@ -1283,7 +1283,7 @@ __parallel_scan_copy(oneapi::dpl::__internal::__device_backend_tag __backend_tag
1283
1283
1284
1284
template <typename _ExecutionPolicy, typename _Range1, typename _Range2, typename _BinaryPredicate>
1285
1285
auto
1286
- __parallel_unique_copy (oneapi::dpl::__internal::__device_backend_tag __backend_tag, const _ExecutionPolicy& __exec,
1286
+ __parallel_unique_copy (oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy& & __exec,
1287
1287
_Range1&& __rng, _Range2&& __result, _BinaryPredicate __pred)
1288
1288
{
1289
1289
using _Assign = oneapi::dpl::__internal::__pstl_assign;
@@ -1316,8 +1316,9 @@ __parallel_unique_copy(oneapi::dpl::__internal::__device_backend_tag __backend_t
1316
1316
decltype (__n)>;
1317
1317
using _CopyOp = unseq_backend::__copy_by_mask<_ReduceOp, _Assign, /* inclusive*/ std::true_type, 1 >;
1318
1318
1319
- return __parallel_scan_copy (__backend_tag, __exec, std::forward<_Range1>(__rng), std::forward<_Range2>(__result),
1320
- __n, _CreateOp{oneapi::dpl::__internal::__not_pred<_BinaryPredicate>{__pred}},
1319
+ return __parallel_scan_copy (__backend_tag, std::forward<_ExecutionPolicy>(__exec), std::forward<_Range1>(__rng),
1320
+ std::forward<_Range2>(__result), __n,
1321
+ _CreateOp{oneapi::dpl::__internal::__not_pred<_BinaryPredicate>{__pred}},
1321
1322
_CopyOp{_ReduceOp{}, _Assign{}});
1322
1323
}
1323
1324
@@ -1357,7 +1358,7 @@ __parallel_reduce_by_segment_reduce_then_scan(oneapi::dpl::__internal::__device_
1357
1358
1358
1359
template <typename _ExecutionPolicy, typename _Range1, typename _Range2, typename _UnaryPredicate>
1359
1360
auto
1360
- __parallel_partition_copy (oneapi::dpl::__internal::__device_backend_tag __backend_tag, const _ExecutionPolicy& __exec,
1361
+ __parallel_partition_copy (oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy& & __exec,
1361
1362
_Range1&& __rng, _Range2&& __result, _UnaryPredicate __pred)
1362
1363
{
1363
1364
oneapi::dpl::__internal::__difference_t <_Range1> __n = __rng.size ();
@@ -1383,14 +1384,14 @@ __parallel_partition_copy(oneapi::dpl::__internal::__device_backend_tag __backen
1383
1384
using _CreateOp = unseq_backend::__create_mask<_UnaryPredicate, decltype (__n)>;
1384
1385
using _CopyOp = unseq_backend::__partition_by_mask<_ReduceOp, /* inclusive*/ std::true_type>;
1385
1386
1386
- return __parallel_scan_copy (__backend_tag, __exec, std::forward<_Range1>(__rng ), std::forward<_Range2>(__result ),
1387
- __n, _CreateOp{__pred}, _CopyOp{_ReduceOp{}});
1387
+ return __parallel_scan_copy (__backend_tag, std::forward<_ExecutionPolicy>(__exec ), std::forward<_Range1>(__rng ),
1388
+ std::forward<_Range2>(__result), __n, _CreateOp{__pred}, _CopyOp{_ReduceOp{}});
1388
1389
}
1389
1390
1390
1391
template <typename _ExecutionPolicy, typename _InRng, typename _OutRng, typename _Size, typename _Pred,
1391
1392
typename _Assign = oneapi::dpl::__internal::__pstl_assign>
1392
1393
auto
1393
- __parallel_copy_if (oneapi::dpl::__internal::__device_backend_tag __backend_tag, const _ExecutionPolicy& __exec,
1394
+ __parallel_copy_if (oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy& & __exec,
1394
1395
_InRng&& __in_rng, _OutRng&& __out_rng, _Size __n, _Pred __pred, _Assign __assign = _Assign{})
1395
1396
{
1396
1397
using _SingleGroupInvoker = __invoke_single_group_copy_if<_Size>;
@@ -1440,8 +1441,9 @@ __parallel_copy_if(oneapi::dpl::__internal::__device_backend_tag __backend_tag,
1440
1441
using _CopyOp = unseq_backend::__copy_by_mask<_ReduceOp, _Assign,
1441
1442
/* inclusive*/ std::true_type, 1 >;
1442
1443
1443
- return __parallel_scan_copy (__backend_tag, __exec, std::forward<_InRng>(__in_rng), std::forward<_OutRng>(__out_rng),
1444
- __n, _CreateOp{__pred}, _CopyOp{_ReduceOp{}, __assign});
1444
+ return __parallel_scan_copy (__backend_tag, std::forward<_ExecutionPolicy>(__exec), std::forward<_InRng>(__in_rng),
1445
+ std::forward<_OutRng>(__out_rng), __n, _CreateOp{__pred},
1446
+ _CopyOp{_ReduceOp{}, __assign});
1445
1447
}
1446
1448
1447
1449
#if _ONEDPL_COMPILE_KERNEL
@@ -1534,7 +1536,7 @@ __parallel_set_scan(oneapi::dpl::__internal::__device_backend_tag __backend_tag,
1534
1536
template <typename _ExecutionPolicy, typename _Range1, typename _Range2, typename _Range3, typename _Compare,
1535
1537
typename _IsOpDifference>
1536
1538
auto
1537
- __parallel_set_op (oneapi::dpl::__internal::__device_backend_tag __backend_tag, const _ExecutionPolicy& __exec,
1539
+ __parallel_set_op (oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy& & __exec,
1538
1540
_Range1&& __rng1, _Range2&& __rng2, _Range3&& __result, _Compare __comp,
1539
1541
_IsOpDifference __is_op_difference)
1540
1542
{
@@ -1552,8 +1554,9 @@ __parallel_set_op(oneapi::dpl::__internal::__device_backend_tag __backend_tag, c
1552
1554
}
1553
1555
}
1554
1556
#endif
1555
- return __parallel_set_scan (__backend_tag, __exec, std::forward<_Range1>(__rng1), std::forward<_Range2>(__rng2),
1556
- std::forward<_Range3>(__result), __comp, __is_op_difference);
1557
+ return __parallel_set_scan (__backend_tag, std::forward<_ExecutionPolicy>(__exec), std::forward<_Range1>(__rng1),
1558
+ std::forward<_Range2>(__rng2), std::forward<_Range3>(__result), __comp,
1559
+ __is_op_difference);
1557
1560
}
1558
1561
1559
1562
// ------------------------------------------------------------------------
@@ -2467,8 +2470,8 @@ __parallel_reduce_by_segment_fallback(oneapi::dpl::__internal::__device_backend_
2467
2470
template <typename _ExecutionPolicy, typename _Range1, typename _Range2, typename _Range3, typename _Range4,
2468
2471
typename _BinaryPredicate, typename _BinaryOperator>
2469
2472
oneapi::dpl::__internal::__difference_t <_Range3>
2470
- __parallel_reduce_by_segment (oneapi::dpl::__internal::__device_backend_tag, const _ExecutionPolicy& __exec,
2471
- _Range1&& __keys, _Range2&& __values, _Range3&& __out_keys, _Range4&& __out_values,
2473
+ __parallel_reduce_by_segment (oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Range1&& __keys ,
2474
+ _Range2&& __values, _Range3&& __out_keys, _Range4&& __out_values,
2472
2475
_BinaryPredicate __binary_pred, _BinaryOperator __binary_op)
2473
2476
{
2474
2477
// The algorithm reduces values in __values where the
@@ -2506,7 +2509,7 @@ __parallel_reduce_by_segment(oneapi::dpl::__internal::__device_backend_tag, cons
2506
2509
}
2507
2510
#endif
2508
2511
return __parallel_reduce_by_segment_fallback (
2509
- oneapi::dpl::__internal::__device_backend_tag{}, __exec,
2512
+ oneapi::dpl::__internal::__device_backend_tag{}, std::forward<_ExecutionPolicy>( __exec) ,
2510
2513
std::forward<_Range1>(__keys), std::forward<_Range2>(__values), std::forward<_Range3>(__out_keys),
2511
2514
std::forward<_Range4>(__out_values), __binary_pred, __binary_op,
2512
2515
oneapi::dpl::unseq_backend::__has_known_identity<_BinaryOperator, __val_type>{});
0 commit comments