@@ -1093,7 +1093,7 @@ struct __write_to_id_if_else
1093
1093
template <typename _ExecutionPolicy, typename _Range1, typename _Range2, typename _UnaryOperation, typename _InitType,
1094
1094
typename _BinaryOperation, typename _Inclusive>
1095
1095
auto
1096
- __parallel_transform_scan (oneapi::dpl::__internal::__device_backend_tag __backend_tag, const _ExecutionPolicy& __exec,
1096
+ __parallel_transform_scan (oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy& & __exec,
1097
1097
_Range1&& __in_rng, _Range2&& __out_rng, std::size_t __n, _UnaryOperation __unary_op,
1098
1098
_InitType __init, _BinaryOperation __binary_op, _Inclusive)
1099
1099
{
@@ -1122,9 +1122,9 @@ __parallel_transform_scan(oneapi::dpl::__internal::__device_backend_tag __backen
1122
1122
std::size_t __single_group_upper_limit = __use_reduce_then_scan ? 2048 : 16384 ;
1123
1123
if (__group_scan_fits_in_slm<_Type>(__exec.queue (), __n, __n_uniform, __single_group_upper_limit))
1124
1124
{
1125
- return __parallel_transform_scan_single_group (__backend_tag, __exec, std::forward<_Range1>(__in_rng),
1126
- std::forward<_Range2>(__out_rng ), __n, __unary_op, __init ,
1127
- __binary_op, _Inclusive{});
1125
+ return __parallel_transform_scan_single_group (
1126
+ __backend_tag, std::forward<_ExecutionPolicy>(__exec ), std::forward<_Range1>(__in_rng) ,
1127
+ std::forward<_Range2>(__out_rng), __n, __unary_op, __init, __binary_op, _Inclusive{});
1128
1128
}
1129
1129
}
1130
1130
#if _ONEDPL_COMPILE_KERNEL
@@ -1137,10 +1137,10 @@ __parallel_transform_scan(oneapi::dpl::__internal::__device_backend_tag __backen
1137
1137
_GenInput __gen_transform{__unary_op};
1138
1138
try
1139
1139
{
1140
- return __parallel_transform_reduce_then_scan (__backend_tag, __exec, __in_rng, __out_rng,
1141
- __gen_transform, __binary_op , __gen_transform,
1142
- _ScanInputTransform{}, _WriteOp{}, __init, _Inclusive{},
1143
- /* _IsUniquePattern=*/ std::false_type{});
1140
+ return __parallel_transform_reduce_then_scan (
1141
+ __backend_tag, std::forward<_ExecutionPolicy>(__exec), __in_rng, __out_rng , __gen_transform,
1142
+ __binary_op, __gen_transform, _ScanInputTransform{}, _WriteOp{}, __init, _Inclusive{},
1143
+ /* _IsUniquePattern=*/ std::false_type{});
1144
1144
}
1145
1145
catch (const sycl::exception & __e)
1146
1146
{
@@ -1161,7 +1161,8 @@ __parallel_transform_scan(oneapi::dpl::__internal::__device_backend_tag __backen
1161
1161
_NoOpFunctor __get_data_op;
1162
1162
1163
1163
return __parallel_transform_scan_base (
1164
- __backend_tag, __exec, std::forward<_Range1>(__in_rng), std::forward<_Range2>(__out_rng), __init,
1164
+ __backend_tag, std::forward<_ExecutionPolicy>(__exec), std::forward<_Range1>(__in_rng),
1165
+ std::forward<_Range2>(__out_rng), __init,
1165
1166
// local scan
1166
1167
unseq_backend::__scan<_Inclusive, _ExecutionPolicy, _BinaryOperation, _UnaryFunctor, _Assigner, _Assigner,
1167
1168
_NoOpFunctor, _InitType>{__binary_op, _UnaryFunctor{__unary_op}, __assign_op, __assign_op,
@@ -1283,7 +1284,7 @@ __parallel_scan_copy(oneapi::dpl::__internal::__device_backend_tag __backend_tag
1283
1284
1284
1285
template <typename _ExecutionPolicy, typename _Range1, typename _Range2, typename _BinaryPredicate>
1285
1286
auto
1286
- __parallel_unique_copy (oneapi::dpl::__internal::__device_backend_tag __backend_tag, const _ExecutionPolicy& __exec,
1287
+ __parallel_unique_copy (oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy& & __exec,
1287
1288
_Range1&& __rng, _Range2&& __result, _BinaryPredicate __pred)
1288
1289
{
1289
1290
using _Assign = oneapi::dpl::__internal::__pstl_assign;
@@ -1300,8 +1301,8 @@ __parallel_unique_copy(oneapi::dpl::__internal::__device_backend_tag __backend_t
1300
1301
using _WriteOp = oneapi::dpl::__par_backend_hetero::__write_to_id_if<1 , _Assign>;
1301
1302
try
1302
1303
{
1303
- return __parallel_reduce_then_scan_copy (__backend_tag, __exec, __rng, __result, __n, _GenMask{__pred} ,
1304
- _WriteOp{_Assign{}},
1304
+ return __parallel_reduce_then_scan_copy (__backend_tag, std::forward<_ExecutionPolicy>( __exec) , __rng,
1305
+ __result, __n, _GenMask{__pred}, _WriteOp{_Assign{}},
1305
1306
/* _IsUniquePattern=*/ std::true_type{});
1306
1307
}
1307
1308
catch (const sycl::exception & __e)
@@ -1316,8 +1317,9 @@ __parallel_unique_copy(oneapi::dpl::__internal::__device_backend_tag __backend_t
1316
1317
decltype (__n)>;
1317
1318
using _CopyOp = unseq_backend::__copy_by_mask<_ReduceOp, _Assign, /* inclusive*/ std::true_type, 1 >;
1318
1319
1319
- return __parallel_scan_copy (__backend_tag, __exec, std::forward<_Range1>(__rng), std::forward<_Range2>(__result),
1320
- __n, _CreateOp{oneapi::dpl::__internal::__not_pred<_BinaryPredicate>{__pred}},
1320
+ return __parallel_scan_copy (__backend_tag, std::forward<_ExecutionPolicy>(__exec), std::forward<_Range1>(__rng),
1321
+ std::forward<_Range2>(__result), __n,
1322
+ _CreateOp{oneapi::dpl::__internal::__not_pred<_BinaryPredicate>{__pred}},
1321
1323
_CopyOp{_ReduceOp{}, _Assign{}});
1322
1324
}
1323
1325
@@ -1357,7 +1359,7 @@ __parallel_reduce_by_segment_reduce_then_scan(oneapi::dpl::__internal::__device_
1357
1359
1358
1360
template <typename _ExecutionPolicy, typename _Range1, typename _Range2, typename _UnaryPredicate>
1359
1361
auto
1360
- __parallel_partition_copy (oneapi::dpl::__internal::__device_backend_tag __backend_tag, const _ExecutionPolicy& __exec,
1362
+ __parallel_partition_copy (oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy& & __exec,
1361
1363
_Range1&& __rng, _Range2&& __result, _UnaryPredicate __pred)
1362
1364
{
1363
1365
oneapi::dpl::__internal::__difference_t <_Range1> __n = __rng.size ();
@@ -1369,8 +1371,8 @@ __parallel_partition_copy(oneapi::dpl::__internal::__device_backend_tag __backen
1369
1371
oneapi::dpl::__par_backend_hetero::__write_to_id_if_else<oneapi::dpl::__internal::__pstl_assign>;
1370
1372
try
1371
1373
{
1372
- return __parallel_reduce_then_scan_copy (__backend_tag, __exec, __rng, __result, __n, _GenMask{__pred} ,
1373
- _WriteOp{},
1374
+ return __parallel_reduce_then_scan_copy (__backend_tag, std::forward<_ExecutionPolicy>( __exec) , __rng,
1375
+ __result, __n, _GenMask{__pred}, _WriteOp{},
1374
1376
/* _IsUniquePattern=*/ std::false_type{});
1375
1377
}
1376
1378
catch (const sycl::exception & __e)
@@ -1383,14 +1385,14 @@ __parallel_partition_copy(oneapi::dpl::__internal::__device_backend_tag __backen
1383
1385
using _CreateOp = unseq_backend::__create_mask<_UnaryPredicate, decltype (__n)>;
1384
1386
using _CopyOp = unseq_backend::__partition_by_mask<_ReduceOp, /* inclusive*/ std::true_type>;
1385
1387
1386
- return __parallel_scan_copy (__backend_tag, __exec, std::forward<_Range1>(__rng ), std::forward<_Range2>(__result ),
1387
- __n, _CreateOp{__pred}, _CopyOp{_ReduceOp{}});
1388
+ return __parallel_scan_copy (__backend_tag, std::forward<_ExecutionPolicy>(__exec ), std::forward<_Range1>(__rng ),
1389
+ std::forward<_Range2>(__result), __n, _CreateOp{__pred}, _CopyOp{_ReduceOp{}});
1388
1390
}
1389
1391
1390
1392
template <typename _ExecutionPolicy, typename _InRng, typename _OutRng, typename _Size, typename _Pred,
1391
1393
typename _Assign = oneapi::dpl::__internal::__pstl_assign>
1392
1394
auto
1393
- __parallel_copy_if (oneapi::dpl::__internal::__device_backend_tag __backend_tag, const _ExecutionPolicy& __exec,
1395
+ __parallel_copy_if (oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy& & __exec,
1394
1396
_InRng&& __in_rng, _OutRng&& __out_rng, _Size __n, _Pred __pred, _Assign __assign = _Assign{})
1395
1397
{
1396
1398
using _SingleGroupInvoker = __invoke_single_group_copy_if<_Size>;
@@ -1425,8 +1427,8 @@ __parallel_copy_if(oneapi::dpl::__internal::__device_backend_tag __backend_tag,
1425
1427
using _WriteOp = oneapi::dpl::__par_backend_hetero::__write_to_id_if<0 , _Assign>;
1426
1428
try
1427
1429
{
1428
- return __parallel_reduce_then_scan_copy (__backend_tag, __exec, __in_rng, __out_rng, __n, _GenMask{__pred} ,
1429
- _WriteOp{__assign},
1430
+ return __parallel_reduce_then_scan_copy (__backend_tag, std::forward<_ExecutionPolicy>( __exec) , __in_rng,
1431
+ __out_rng, __n, _GenMask{__pred}, _WriteOp{__assign},
1430
1432
/* _IsUniquePattern=*/ std::false_type{});
1431
1433
}
1432
1434
catch (const sycl::exception & __e)
@@ -1440,8 +1442,9 @@ __parallel_copy_if(oneapi::dpl::__internal::__device_backend_tag __backend_tag,
1440
1442
using _CopyOp = unseq_backend::__copy_by_mask<_ReduceOp, _Assign,
1441
1443
/* inclusive*/ std::true_type, 1 >;
1442
1444
1443
- return __parallel_scan_copy (__backend_tag, __exec, std::forward<_InRng>(__in_rng), std::forward<_OutRng>(__out_rng),
1444
- __n, _CreateOp{__pred}, _CopyOp{_ReduceOp{}, __assign});
1445
+ return __parallel_scan_copy (__backend_tag, std::forward<_ExecutionPolicy>(__exec), std::forward<_InRng>(__in_rng),
1446
+ std::forward<_OutRng>(__out_rng), __n, _CreateOp{__pred},
1447
+ _CopyOp{_ReduceOp{}, __assign});
1445
1448
}
1446
1449
1447
1450
#if _ONEDPL_COMPILE_KERNEL
@@ -1534,7 +1537,7 @@ __parallel_set_scan(oneapi::dpl::__internal::__device_backend_tag __backend_tag,
1534
1537
template <typename _ExecutionPolicy, typename _Range1, typename _Range2, typename _Range3, typename _Compare,
1535
1538
typename _IsOpDifference>
1536
1539
auto
1537
- __parallel_set_op (oneapi::dpl::__internal::__device_backend_tag __backend_tag, const _ExecutionPolicy& __exec,
1540
+ __parallel_set_op (oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy& & __exec,
1538
1541
_Range1&& __rng1, _Range2&& __rng2, _Range3&& __result, _Compare __comp,
1539
1542
_IsOpDifference __is_op_difference)
1540
1543
{
@@ -1543,17 +1546,18 @@ __parallel_set_op(oneapi::dpl::__internal::__device_backend_tag __backend_tag, c
1543
1546
{
1544
1547
try
1545
1548
{
1546
- return __parallel_set_reduce_then_scan (__backend_tag, __exec, __rng1, __rng2, __result, __comp ,
1547
- __is_op_difference);
1549
+ return __parallel_set_reduce_then_scan (__backend_tag, std::forward<_ExecutionPolicy>( __exec) , __rng1,
1550
+ __rng2, __result, __comp, __is_op_difference);
1548
1551
}
1549
1552
catch (const sycl::exception & __e)
1550
1553
{
1551
1554
__bypass_sycl_kernel_not_supported (__e);
1552
1555
}
1553
1556
}
1554
1557
#endif
1555
- return __parallel_set_scan (__backend_tag, __exec, std::forward<_Range1>(__rng1), std::forward<_Range2>(__rng2),
1556
- std::forward<_Range3>(__result), __comp, __is_op_difference);
1558
+ return __parallel_set_scan (__backend_tag, std::forward<_ExecutionPolicy>(__exec), std::forward<_Range1>(__rng1),
1559
+ std::forward<_Range2>(__rng2), std::forward<_Range3>(__result), __comp,
1560
+ __is_op_difference);
1557
1561
}
1558
1562
1559
1563
// ------------------------------------------------------------------------
@@ -2468,8 +2472,8 @@ __parallel_reduce_by_segment_fallback(oneapi::dpl::__internal::__device_backend_
2468
2472
template <typename _ExecutionPolicy, typename _Range1, typename _Range2, typename _Range3, typename _Range4,
2469
2473
typename _BinaryPredicate, typename _BinaryOperator>
2470
2474
oneapi::dpl::__internal::__difference_t <_Range3>
2471
- __parallel_reduce_by_segment (oneapi::dpl::__internal::__device_backend_tag, const _ExecutionPolicy& __exec,
2472
- _Range1&& __keys, _Range2&& __values, _Range3&& __out_keys, _Range4&& __out_values,
2475
+ __parallel_reduce_by_segment (oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Range1&& __keys ,
2476
+ _Range2&& __values, _Range3&& __out_keys, _Range4&& __out_values,
2473
2477
_BinaryPredicate __binary_pred, _BinaryOperator __binary_op)
2474
2478
{
2475
2479
// The algorithm reduces values in __values where the
@@ -2493,8 +2497,8 @@ __parallel_reduce_by_segment(oneapi::dpl::__internal::__device_backend_tag, cons
2493
2497
try
2494
2498
{
2495
2499
auto __res = oneapi::dpl::__par_backend_hetero::__parallel_reduce_by_segment_reduce_then_scan (
2496
- oneapi::dpl::__internal::__device_backend_tag{}, __exec, __keys, __values, __out_keys, __out_values ,
2497
- __binary_pred, __binary_op);
2500
+ oneapi::dpl::__internal::__device_backend_tag{}, std::forward<_ExecutionPolicy>( __exec) , __keys,
2501
+ __values, __out_keys, __out_values, __binary_pred, __binary_op);
2498
2502
// Because our init type ends up being tuple<std::size_t, ValType>, return the first component which is the write index. Add 1 to return the
2499
2503
// past-the-end iterator pair of segmented reduction.
2500
2504
return std::get<0 >(__res.get ()) + 1 ;
@@ -2507,7 +2511,7 @@ __parallel_reduce_by_segment(oneapi::dpl::__internal::__device_backend_tag, cons
2507
2511
}
2508
2512
#endif
2509
2513
return __parallel_reduce_by_segment_fallback (
2510
- oneapi::dpl::__internal::__device_backend_tag{}, __exec,
2514
+ oneapi::dpl::__internal::__device_backend_tag{}, std::forward<_ExecutionPolicy>( __exec) ,
2511
2515
std::forward<_Range1>(__keys), std::forward<_Range2>(__values), std::forward<_Range3>(__out_keys),
2512
2516
std::forward<_Range4>(__out_values), __binary_pred, __binary_op,
2513
2517
oneapi::dpl::unseq_backend::__has_known_identity<_BinaryOperator, __val_type>{});
0 commit comments