Skip to content

Commit ab4ae3d

Browse files
[oneDPL][test] Optimize merge_sort algorithm for largest data sizes (#1977)
1 parent 2a3a0b8 commit ab4ae3d

File tree

5 files changed

+490
-95
lines changed

5 files changed

+490
-95
lines changed

include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h

+3
Original file line numberDiff line numberDiff line change
@@ -2212,6 +2212,9 @@ struct __parallel_partial_sort_submitter<__internal::__optional_kernel_name<_Glo
22122212
}
22132213
};
22142214

2215+
template <typename... _Name>
2216+
class __sort_global_kernel;
2217+
22152218
template <typename _ExecutionPolicy, typename _Range, typename _Merge, typename _Compare>
22162219
auto
22172220
__parallel_partial_sort_impl(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Range&& __rng,

include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge.h

+5-2
Original file line numberDiff line numberDiff line change
@@ -164,10 +164,13 @@ __serial_merge(const _Rng1& __rng1, const _Rng2& __rng2, _Rng3& __rng3, const _I
164164
_Index __rng1_idx = __start1;
165165
_Index __rng2_idx = __start2;
166166

167+
bool __rng1_idx_less_n1 = false;
168+
bool __rng2_idx_less_n2 = false;
169+
167170
for (_Index __rng3_idx = __start3; __rng3_idx < __rng3_idx_end; ++__rng3_idx)
168171
{
169-
const bool __rng1_idx_less_n1 = __rng1_idx < __rng1_idx_end;
170-
const bool __rng2_idx_less_n2 = __rng2_idx < __rng2_idx_end;
172+
__rng1_idx_less_n1 = __rng1_idx < __rng1_idx_end;
173+
__rng2_idx_less_n2 = __rng2_idx < __rng2_idx_end;
171174

172175
// One of __rng1_idx_less_n1 and __rng2_idx_less_n2 should be true here
173176
// because 1) we should fill output data with elements from one of the input ranges

0 commit comments

Comments
 (0)