66
66
// 41(!!!) includes of SYCL headers + 10 includes of standard headers.
67
67
// 3300+ lines of code
68
68
69
- #define _KERNELFUNCPARAMTYPE const KernelType &
70
- #define _KERNELFUNCPARAM (a ) _KERNELFUNCPARAMTYPE a
71
-
72
69
#if defined(__SYCL_UNNAMED_LAMBDA__)
73
70
// We can't use nested types (e.g. struct S defined inside main() routine) to
74
71
// name kernels. At the same time, we have to provide a unique kernel name for
@@ -1251,7 +1248,7 @@ class __SYCL_EXPORT handler {
1251
1248
template <typename KernelName, typename KernelType, int Dims,
1252
1249
typename PropertiesT>
1253
1250
void parallel_for_impl (nd_range<Dims> ExecutionRange, PropertiesT Props,
1254
- _KERNELFUNCPARAM ( KernelFunc) ) {
1251
+ const KernelType & KernelFunc) {
1255
1252
// TODO: Properties may change the kernel function, so in order to avoid
1256
1253
// conflicts they should be included in the name.
1257
1254
using NameT =
@@ -1345,7 +1342,7 @@ class __SYCL_EXPORT handler {
1345
1342
typename PropertiesT = ext::oneapi::experimental::empty_properties_t >
1346
1343
void parallel_for_work_group_lambda_impl (range<Dims> NumWorkGroups,
1347
1344
PropertiesT Props,
1348
- _KERNELFUNCPARAM ( KernelFunc) ) {
1345
+ const KernelType & KernelFunc) {
1349
1346
// TODO: Properties may change the kernel function, so in order to avoid
1350
1347
// conflicts they should be included in the name.
1351
1348
using NameT =
@@ -1386,7 +1383,7 @@ class __SYCL_EXPORT handler {
1386
1383
void parallel_for_work_group_lambda_impl (range<Dims> NumWorkGroups,
1387
1384
range<Dims> WorkGroupSize,
1388
1385
PropertiesT Props,
1389
- _KERNELFUNCPARAM ( KernelFunc) ) {
1386
+ const KernelType & KernelFunc) {
1390
1387
// TODO: Properties may change the kernel function, so in order to avoid
1391
1388
// conflicts they should be included in the name.
1392
1389
using NameT =
@@ -1434,7 +1431,7 @@ class __SYCL_EXPORT handler {
1434
1431
#endif
1435
1432
1436
1433
__SYCL_KERNEL_ATTR__ static void
1437
- kernel_single_task (_KERNELFUNCPARAM( KernelFunc) ) {
1434
+ kernel_single_task (const KernelType & KernelFunc) {
1438
1435
#ifdef __SYCL_DEVICE_ONLY__
1439
1436
KernelFunc ();
1440
1437
#else
@@ -1453,7 +1450,7 @@ class __SYCL_EXPORT handler {
1453
1450
ext::oneapi::experimental::detail::PropertyMetaInfo<Props>::value...)]]
1454
1451
#endif
1455
1452
__SYCL_KERNEL_ATTR__ static void
1456
- kernel_single_task (_KERNELFUNCPARAM( KernelFunc) , kernel_handler KH) {
1453
+ kernel_single_task (const KernelType & KernelFunc, kernel_handler KH) {
1457
1454
#ifdef __SYCL_DEVICE_ONLY__
1458
1455
KernelFunc (KH);
1459
1456
#else
@@ -1472,7 +1469,7 @@ class __SYCL_EXPORT handler {
1472
1469
ext::oneapi::experimental::detail::PropertyMetaInfo<Props>::value...)]]
1473
1470
#endif
1474
1471
__SYCL_KERNEL_ATTR__ static void
1475
- kernel_parallel_for (_KERNELFUNCPARAM( KernelFunc) ) {
1472
+ kernel_parallel_for (const KernelType & KernelFunc) {
1476
1473
#ifdef __SYCL_DEVICE_ONLY__
1477
1474
KernelFunc (detail::Builder::getElement (detail::declptr<ElementType>()));
1478
1475
#else
@@ -1490,7 +1487,7 @@ class __SYCL_EXPORT handler {
1490
1487
ext::oneapi::experimental::detail::PropertyMetaInfo<Props>::value...)]]
1491
1488
#endif
1492
1489
__SYCL_KERNEL_ATTR__ static void
1493
- kernel_parallel_for (_KERNELFUNCPARAM( KernelFunc) , kernel_handler KH) {
1490
+ kernel_parallel_for (const KernelType & KernelFunc, kernel_handler KH) {
1494
1491
#ifdef __SYCL_DEVICE_ONLY__
1495
1492
KernelFunc (detail::Builder::getElement (detail::declptr<ElementType>()), KH);
1496
1493
#else
@@ -1509,7 +1506,7 @@ class __SYCL_EXPORT handler {
1509
1506
ext::oneapi::experimental::detail::PropertyMetaInfo<Props>::value...)]]
1510
1507
#endif
1511
1508
__SYCL_KERNEL_ATTR__ static void
1512
- kernel_parallel_for_work_group (_KERNELFUNCPARAM( KernelFunc) ) {
1509
+ kernel_parallel_for_work_group (const KernelType & KernelFunc) {
1513
1510
#ifdef __SYCL_DEVICE_ONLY__
1514
1511
KernelFunc (detail::Builder::getElement (detail::declptr<ElementType>()));
1515
1512
#else
@@ -1527,7 +1524,7 @@ class __SYCL_EXPORT handler {
1527
1524
ext::oneapi::experimental::detail::PropertyMetaInfo<Props>::value...)]]
1528
1525
#endif
1529
1526
__SYCL_KERNEL_ATTR__ static void
1530
- kernel_parallel_for_work_group (_KERNELFUNCPARAM( KernelFunc) ,
1527
+ kernel_parallel_for_work_group (const KernelType & KernelFunc,
1531
1528
kernel_handler KH) {
1532
1529
#ifdef __SYCL_DEVICE_ONLY__
1533
1530
KernelFunc (detail::Builder::getElement (detail::declptr<ElementType>()), KH);
@@ -1589,7 +1586,7 @@ class __SYCL_EXPORT handler {
1589
1586
// Couldn't think of a better way to achieve both.
1590
1587
template <typename KernelName, typename KernelType, typename PropertiesT,
1591
1588
bool HasKernelHandlerArg, typename FuncTy>
1592
- void unpack (_KERNELFUNCPARAM( KernelFunc) , FuncTy Lambda) {
1589
+ void unpack (const KernelType & KernelFunc, FuncTy Lambda) {
1593
1590
#ifdef __SYCL_DEVICE_ONLY__
1594
1591
detail::CheckDeviceCopyable<KernelType>();
1595
1592
#endif // __SYCL_DEVICE_ONLY__
@@ -1600,8 +1597,7 @@ class __SYCL_EXPORT handler {
1600
1597
#ifndef __SYCL_DEVICE_ONLY__
1601
1598
// If there are properties provided by get method then process them.
1602
1599
if constexpr (ext::oneapi::experimental::detail::
1603
- HasKernelPropertiesGetMethod<
1604
- _KERNELFUNCPARAMTYPE>::value) {
1600
+ HasKernelPropertiesGetMethod<const KernelType &>::value) {
1605
1601
processProperties<detail::isKernelESIMD<KernelName>()>(
1606
1602
KernelFunc.get (ext::oneapi::experimental::properties_tag{}));
1607
1603
}
@@ -1620,7 +1616,7 @@ class __SYCL_EXPORT handler {
1620
1616
template <
1621
1617
typename KernelName, typename KernelType,
1622
1618
typename PropertiesT = ext::oneapi::experimental::empty_properties_t >
1623
- void kernel_single_task_wrapper (_KERNELFUNCPARAM( KernelFunc) ) {
1619
+ void kernel_single_task_wrapper (const KernelType & KernelFunc) {
1624
1620
unpack<KernelName, KernelType, PropertiesT,
1625
1621
detail::KernelLambdaHasKernelHandlerArgT<KernelType>::value>(
1626
1622
KernelFunc, [&](auto Unpacker, auto ... args) {
@@ -1632,7 +1628,7 @@ class __SYCL_EXPORT handler {
1632
1628
template <
1633
1629
typename KernelName, typename ElementType, typename KernelType,
1634
1630
typename PropertiesT = ext::oneapi::experimental::empty_properties_t >
1635
- void kernel_parallel_for_wrapper (_KERNELFUNCPARAM( KernelFunc) ) {
1631
+ void kernel_parallel_for_wrapper (const KernelType & KernelFunc) {
1636
1632
unpack<KernelName, KernelType, PropertiesT,
1637
1633
detail::KernelLambdaHasKernelHandlerArgT<KernelType,
1638
1634
ElementType>::value>(
@@ -1645,7 +1641,7 @@ class __SYCL_EXPORT handler {
1645
1641
template <
1646
1642
typename KernelName, typename ElementType, typename KernelType,
1647
1643
typename PropertiesT = ext::oneapi::experimental::empty_properties_t >
1648
- void kernel_parallel_for_work_group_wrapper (_KERNELFUNCPARAM( KernelFunc) ) {
1644
+ void kernel_parallel_for_work_group_wrapper (const KernelType & KernelFunc) {
1649
1645
unpack<KernelName, KernelType, PropertiesT,
1650
1646
detail::KernelLambdaHasKernelHandlerArgT<KernelType,
1651
1647
ElementType>::value>(
@@ -1666,7 +1662,7 @@ class __SYCL_EXPORT handler {
1666
1662
typename KernelName, typename KernelType,
1667
1663
typename PropertiesT = ext::oneapi::experimental::empty_properties_t >
1668
1664
void single_task_lambda_impl (PropertiesT Props,
1669
- _KERNELFUNCPARAM ( KernelFunc) ) {
1665
+ const KernelType & KernelFunc) {
1670
1666
(void )Props;
1671
1667
// TODO: Properties may change the kernel function, so in order to avoid
1672
1668
// conflicts they should be included in the name.
@@ -1892,27 +1888,27 @@ class __SYCL_EXPORT handler {
1892
1888
// /
1893
1889
// / \param KernelFunc is a SYCL kernel function.
1894
1890
template <typename KernelName = detail::auto_name, typename KernelType>
1895
- void single_task (_KERNELFUNCPARAM( KernelFunc) ) {
1891
+ void single_task (const KernelType & KernelFunc) {
1896
1892
single_task_lambda_impl<KernelName>(
1897
1893
ext::oneapi::experimental::empty_properties_t {}, KernelFunc);
1898
1894
}
1899
1895
1900
1896
template <typename KernelName = detail::auto_name, typename KernelType>
1901
- void parallel_for (range<1 > NumWorkItems, _KERNELFUNCPARAM( KernelFunc) ) {
1897
+ void parallel_for (range<1 > NumWorkItems, const KernelType & KernelFunc) {
1902
1898
parallel_for_lambda_impl<KernelName>(
1903
1899
NumWorkItems, ext::oneapi::experimental::empty_properties_t {},
1904
1900
std::move (KernelFunc));
1905
1901
}
1906
1902
1907
1903
template <typename KernelName = detail::auto_name, typename KernelType>
1908
- void parallel_for (range<2 > NumWorkItems, _KERNELFUNCPARAM( KernelFunc) ) {
1904
+ void parallel_for (range<2 > NumWorkItems, const KernelType & KernelFunc) {
1909
1905
parallel_for_lambda_impl<KernelName>(
1910
1906
NumWorkItems, ext::oneapi::experimental::empty_properties_t {},
1911
1907
std::move (KernelFunc));
1912
1908
}
1913
1909
1914
1910
template <typename KernelName = detail::auto_name, typename KernelType>
1915
- void parallel_for (range<3 > NumWorkItems, _KERNELFUNCPARAM( KernelFunc) ) {
1911
+ void parallel_for (range<3 > NumWorkItems, const KernelType & KernelFunc) {
1916
1912
parallel_for_lambda_impl<KernelName>(
1917
1913
NumWorkItems, ext::oneapi::experimental::empty_properties_t {},
1918
1914
std::move (KernelFunc));
@@ -1955,7 +1951,7 @@ class __SYCL_EXPORT handler {
1955
1951
int Dims>
1956
1952
__SYCL2020_DEPRECATED (" offsets are deprecated in SYCL2020" )
1957
1953
void parallel_for (range<Dims> NumWorkItems, id<Dims> WorkItemOffset,
1958
- _KERNELFUNCPARAM ( KernelFunc) ) {
1954
+ const KernelType & KernelFunc) {
1959
1955
using NameT =
1960
1956
typename detail::get_kernel_name_t <KernelName, KernelType>::name;
1961
1957
using LambdaArgType = sycl::detail::lambda_arg_type<KernelType, item<Dims>>;
@@ -1990,7 +1986,7 @@ class __SYCL_EXPORT handler {
1990
1986
template <typename KernelName = detail::auto_name, typename KernelType,
1991
1987
int Dims>
1992
1988
void parallel_for_work_group (range<Dims> NumWorkGroups,
1993
- _KERNELFUNCPARAM ( KernelFunc) ) {
1989
+ const KernelType & KernelFunc) {
1994
1990
parallel_for_work_group_lambda_impl<KernelName>(
1995
1991
NumWorkGroups, ext::oneapi::experimental::empty_properties_t {},
1996
1992
KernelFunc);
@@ -2012,7 +2008,7 @@ class __SYCL_EXPORT handler {
2012
2008
int Dims>
2013
2009
void parallel_for_work_group (range<Dims> NumWorkGroups,
2014
2010
range<Dims> WorkGroupSize,
2015
- _KERNELFUNCPARAM ( KernelFunc) ) {
2011
+ const KernelType & KernelFunc) {
2016
2012
parallel_for_work_group_lambda_impl<KernelName>(
2017
2013
NumWorkGroups, WorkGroupSize,
2018
2014
ext::oneapi::experimental::empty_properties_t {}, KernelFunc);
@@ -2096,7 +2092,7 @@ class __SYCL_EXPORT handler {
2096
2092
// / \param KernelFunc is a lambda that is used if device, queue is bound to,
2097
2093
// / is a host device.
2098
2094
template <typename KernelName = detail::auto_name, typename KernelType>
2099
- void single_task (kernel Kernel, _KERNELFUNCPARAM( KernelFunc) ) {
2095
+ void single_task (kernel Kernel, const KernelType & KernelFunc) {
2100
2096
// Ignore any set kernel bundles and use the one associated with the kernel
2101
2097
setHandlerKernelBundle (Kernel);
2102
2098
using NameT =
@@ -2132,7 +2128,7 @@ class __SYCL_EXPORT handler {
2132
2128
template <typename KernelName = detail::auto_name, typename KernelType,
2133
2129
int Dims>
2134
2130
void parallel_for (kernel Kernel, range<Dims> NumWorkItems,
2135
- _KERNELFUNCPARAM ( KernelFunc) ) {
2131
+ const KernelType & KernelFunc) {
2136
2132
// Ignore any set kernel bundles and use the one associated with the kernel
2137
2133
setHandlerKernelBundle (Kernel);
2138
2134
using NameT =
@@ -2171,7 +2167,7 @@ class __SYCL_EXPORT handler {
2171
2167
int Dims>
2172
2168
__SYCL2020_DEPRECATED (" offsets are deprecated in SYCL 2020" )
2173
2169
void parallel_for (kernel Kernel, range<Dims> NumWorkItems,
2174
- id<Dims> WorkItemOffset, _KERNELFUNCPARAM( KernelFunc) ) {
2170
+ id<Dims> WorkItemOffset, const KernelType & KernelFunc) {
2175
2171
using NameT =
2176
2172
typename detail::get_kernel_name_t <KernelName, KernelType>::name;
2177
2173
using LambdaArgType = sycl::detail::lambda_arg_type<KernelType, item<Dims>>;
@@ -2210,7 +2206,7 @@ class __SYCL_EXPORT handler {
2210
2206
template <typename KernelName = detail::auto_name, typename KernelType,
2211
2207
int Dims>
2212
2208
void parallel_for (kernel Kernel, nd_range<Dims> NDRange,
2213
- _KERNELFUNCPARAM ( KernelFunc) ) {
2209
+ const KernelType & KernelFunc) {
2214
2210
using NameT =
2215
2211
typename detail::get_kernel_name_t <KernelName, KernelType>::name;
2216
2212
using LambdaArgType =
@@ -2253,7 +2249,7 @@ class __SYCL_EXPORT handler {
2253
2249
template <typename KernelName = detail::auto_name, typename KernelType,
2254
2250
int Dims>
2255
2251
void parallel_for_work_group (kernel Kernel, range<Dims> NumWorkGroups,
2256
- _KERNELFUNCPARAM ( KernelFunc) ) {
2252
+ const KernelType & KernelFunc) {
2257
2253
using NameT =
2258
2254
typename detail::get_kernel_name_t <KernelName, KernelType>::name;
2259
2255
using LambdaArgType =
@@ -2294,7 +2290,7 @@ class __SYCL_EXPORT handler {
2294
2290
int Dims>
2295
2291
void parallel_for_work_group (kernel Kernel, range<Dims> NumWorkGroups,
2296
2292
range<Dims> WorkGroupSize,
2297
- _KERNELFUNCPARAM ( KernelFunc) ) {
2293
+ const KernelType & KernelFunc) {
2298
2294
using NameT =
2299
2295
typename detail::get_kernel_name_t <KernelName, KernelType>::name;
2300
2296
using LambdaArgType =
@@ -2327,7 +2323,7 @@ class __SYCL_EXPORT handler {
2327
2323
" member function instead." )
2328
2324
std::enable_if_t <ext::oneapi::experimental::is_property_list<
2329
2325
PropertiesT>::value> single_task (PropertiesT Props,
2330
- _KERNELFUNCPARAM ( KernelFunc) ) {
2326
+ const KernelType & KernelFunc) {
2331
2327
single_task_lambda_impl<KernelName, KernelType, PropertiesT>(Props,
2332
2328
KernelFunc);
2333
2329
}
@@ -2341,7 +2337,7 @@ class __SYCL_EXPORT handler {
2341
2337
std::enable_if_t <ext::oneapi::experimental::is_property_list<
2342
2338
PropertiesT>::value> parallel_for (range<1 > NumWorkItems,
2343
2339
PropertiesT Props,
2344
- _KERNELFUNCPARAM ( KernelFunc) ) {
2340
+ const KernelType & KernelFunc) {
2345
2341
parallel_for_lambda_impl<KernelName, KernelType, 1 , PropertiesT>(
2346
2342
NumWorkItems, Props, std::move (KernelFunc));
2347
2343
}
@@ -2355,7 +2351,7 @@ class __SYCL_EXPORT handler {
2355
2351
std::enable_if_t <ext::oneapi::experimental::is_property_list<
2356
2352
PropertiesT>::value> parallel_for (range<2 > NumWorkItems,
2357
2353
PropertiesT Props,
2358
- _KERNELFUNCPARAM ( KernelFunc) ) {
2354
+ const KernelType & KernelFunc) {
2359
2355
parallel_for_lambda_impl<KernelName, KernelType, 2 , PropertiesT>(
2360
2356
NumWorkItems, Props, std::move (KernelFunc));
2361
2357
}
@@ -2369,7 +2365,7 @@ class __SYCL_EXPORT handler {
2369
2365
std::enable_if_t <ext::oneapi::experimental::is_property_list<
2370
2366
PropertiesT>::value> parallel_for (range<3 > NumWorkItems,
2371
2367
PropertiesT Props,
2372
- _KERNELFUNCPARAM ( KernelFunc) ) {
2368
+ const KernelType & KernelFunc) {
2373
2369
parallel_for_lambda_impl<KernelName, KernelType, 3 , PropertiesT>(
2374
2370
NumWorkItems, Props, std::move (KernelFunc));
2375
2371
}
@@ -2383,7 +2379,7 @@ class __SYCL_EXPORT handler {
2383
2379
std::enable_if_t <ext::oneapi::experimental::is_property_list<
2384
2380
PropertiesT>::value> parallel_for (nd_range<Dims> Range,
2385
2381
PropertiesT Properties,
2386
- _KERNELFUNCPARAM ( KernelFunc) ) {
2382
+ const KernelType & KernelFunc) {
2387
2383
parallel_for_impl<KernelName>(Range, Properties, std::move (KernelFunc));
2388
2384
}
2389
2385
@@ -2511,7 +2507,7 @@ class __SYCL_EXPORT handler {
2511
2507
" get(sycl::ext::oneapi::experimental::properties_tag) "
2512
2508
" member function instead." )
2513
2509
void parallel_for_work_group (range<Dims> NumWorkGroups, PropertiesT Props,
2514
- _KERNELFUNCPARAM ( KernelFunc) ) {
2510
+ const KernelType & KernelFunc) {
2515
2511
parallel_for_work_group_lambda_impl<KernelName, KernelType, Dims,
2516
2512
PropertiesT>(NumWorkGroups, Props,
2517
2513
KernelFunc);
@@ -2525,15 +2521,12 @@ class __SYCL_EXPORT handler {
2525
2521
" member function instead." )
2526
2522
void parallel_for_work_group (range<Dims> NumWorkGroups,
2527
2523
range<Dims> WorkGroupSize, PropertiesT Props,
2528
- _KERNELFUNCPARAM ( KernelFunc) ) {
2524
+ const KernelType & KernelFunc) {
2529
2525
parallel_for_work_group_lambda_impl<KernelName, KernelType, Dims,
2530
2526
PropertiesT>(
2531
2527
NumWorkGroups, WorkGroupSize, Props, KernelFunc);
2532
2528
}
2533
2529
2534
- // Clean up KERNELFUNC macro.
2535
- #undef _KERNELFUNCPARAM
2536
-
2537
2530
// Explicit copy operations API
2538
2531
2539
2532
// / Copies the content of memory object accessed by Src into the memory
0 commit comments