Skip to content

Commit 4d9921f

Browse files
authored
Cherry pick fix for warning and compilation errors (#1849) (#1947)
1 parent 969c5e8 commit 4d9921f

File tree

3 files changed

+16
-14
lines changed

3 files changed

+16
-14
lines changed

include/oneapi/dpl/pstl/execution_impl.h

+4-4
Original file line numberDiff line numberDiff line change
@@ -106,25 +106,25 @@ __select_backend(oneapi::dpl::execution::parallel_unsequenced_policy, _IteratorT
106106
namespace __ranges
107107
{
108108

109-
::oneapi::dpl::__internal::__serial_tag<std::false_type>
109+
inline ::oneapi::dpl::__internal::__serial_tag<std::false_type>
110110
__select_backend(oneapi::dpl::execution::sequenced_policy)
111111
{
112112
return {};
113113
}
114114

115-
::oneapi::dpl::__internal::__serial_tag<std::true_type> //vectorization allowed
115+
inline ::oneapi::dpl::__internal::__serial_tag<std::true_type> //vectorization allowed
116116
__select_backend(oneapi::dpl::execution::unsequenced_policy)
117117
{
118118
return {};
119119
}
120120

121-
::oneapi::dpl::__internal::__parallel_tag<std::false_type>
121+
inline ::oneapi::dpl::__internal::__parallel_tag<std::false_type>
122122
__select_backend(oneapi::dpl::execution::parallel_policy)
123123
{
124124
return {};
125125
}
126126

127-
::oneapi::dpl::__internal::__parallel_tag<std::true_type> //vectorization allowed
127+
inline ::oneapi::dpl::__internal::__parallel_tag<std::true_type> //vectorization allowed
128128
__select_backend(oneapi::dpl::execution::parallel_unsequenced_policy)
129129
{
130130
return {};

include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h

+5-5
Original file line numberDiff line numberDiff line change
@@ -816,11 +816,11 @@ __parallel_radix_sort(oneapi::dpl::__internal::__device_backend_tag, _ExecutionP
816816
else if (__n <= 4096 && __wg_size * 4 <= __max_wg_size)
817817
__event = __subgroup_radix_sort<_RadixSortKernel, __wg_size * 4, 16, __radix_bits, __is_ascending>{}(
818818
__exec.queue(), ::std::forward<_Range>(__in_rng), __proj);
819-
// In __subgroup_radix_sort, we request a sub-group size via _ONEDPL_SYCL_REQD_SUB_GROUP_SIZE_IF_SUPPORTED
820-
// based upon the iters per item. For the below cases, register spills that result in runtime exceptions have
821-
// been observed on accelerators that do not support the requested sub-group size of 16. For the above cases
822-
// that request but may not receive a sub-group size of 16, inputs are small enough to avoid register
823-
// spills on assessed hardware.
819+
// In __subgroup_radix_sort, we request a sub-group size of 16 via _ONEDPL_SYCL_REQD_SUB_GROUP_SIZE_IF_SUPPORTED
820+
// for compilation targets that support this option. For the below cases, register spills that result in
821+
// runtime exceptions have been observed on accelerators that do not support the requested sub-group size of 16.
822+
// For the above cases that request but may not receive a sub-group size of 16, inputs are small enough to avoid
823+
// register spills on assessed hardware.
824824
else if (__n <= 8192 && __wg_size * 8 <= __max_wg_size && __dev_has_sg16)
825825
__event = __subgroup_radix_sort<_RadixSortKernel, __wg_size * 8, 16, __radix_bits, __is_ascending>{}(
826826
__exec.queue(), ::std::forward<_Range>(__in_rng), __proj);

include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort_one_wg.h

+7-5
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,7 @@ template <typename... _Name>
3030
class __radix_sort_one_wg_kernel;
3131

3232
template <typename _KernelNameBase, uint16_t __wg_size = 256 /*work group size*/, uint16_t __block_size = 16,
33-
::std::uint32_t __radix = 4, bool __is_asc = true,
34-
uint16_t __req_sub_group_size = (__block_size < 4 ? 32 : 16)>
33+
std::uint32_t __radix = 4, bool __is_asc = true>
3534
struct __subgroup_radix_sort
3635
{
3736
template <typename _RangeIn, typename _Proj>
@@ -164,9 +163,12 @@ struct __subgroup_radix_sort
164163
auto __counter_lacc = __buf_count.get_acc(__cgh);
165164

166165
__cgh.parallel_for<_Name...>(
167-
__range,
168-
([=](sycl::nd_item<1> __it)[[_ONEDPL_SYCL_REQD_SUB_GROUP_SIZE_IF_SUPPORTED(__req_sub_group_size)]] {
169-
union __storage { _ValT __v[__block_size]; __storage(){} } __values;
166+
__range, ([=](sycl::nd_item<1> __it) [[_ONEDPL_SYCL_REQD_SUB_GROUP_SIZE_IF_SUPPORTED(16)]] {
167+
union __storage
168+
{
169+
_ValT __v[__block_size];
170+
__storage() {}
171+
} __values;
170172
uint16_t __wi = __it.get_local_linear_id();
171173
uint16_t __begin_bit = 0;
172174
constexpr uint16_t __end_bit = sizeof(_KeyT) * ::std::numeric_limits<unsigned char>::digits;

0 commit comments

Comments
 (0)