Skip to content

Commit e945898

Browse files
Fix chunk calculation in merge-sort (#2002)
1 parent c3c1c59 commit e945898

File tree

1 file changed

+3
-1
lines changed

1 file changed

+3
-1
lines changed

include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge_sort.h

+3-1
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,9 @@ struct __merge_sort_global_submitter<_IndexT, __internal::__optional_kernel_name
242242
const _IndexT __n = __rng.size();
243243
_IndexT __n_sorted = __leaf_size;
244244
const bool __is_cpu = __q.get_device().is_cpu();
245-
const _IndexT __chunk = __is_cpu ? 32 : 4;
245+
// The chunk size must not exceed two sorted sub-sequences to be merged,
246+
// ensuring that at least one work-item processes them.
247+
const _IndexT __chunk = std::min<_IndexT>(__is_cpu ? 32 : 4, __n_sorted * 2);
246248
const std::size_t __steps = oneapi::dpl::__internal::__dpl_ceiling_div(__n, __chunk);
247249
bool __data_in_temp = false;
248250

0 commit comments

Comments
 (0)