Skip to content

Commit 0066210

Browse files
committed
[oneDPL][ranges][merge] support size limit for output; fixes for __par_backend::__parallel_for
1 parent 6ec465c commit 0066210

File tree

4 files changed

+13
-9
lines changed

4 files changed

+13
-9
lines changed

include/oneapi/dpl/pstl/algorithm_impl.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -2948,7 +2948,7 @@ __pattern_remove_if(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec,
29482948
// merge
29492949
//------------------------------------------------------------------------
29502950

2951-
template<std::random_access_iterator It1, std::random_access_iterator It2, std::random_access_iterator ItOut, typename _Comp>
2951+
template<typename It1, typename It2, typename ItOut, typename _Comp>
29522952
std::pair<It1, It2>
29532953
__brick_merge_2(It1 __it_1, It1 __it_1_e, It2 __it_2, It2 __it_2_e, ItOut __it_out, ItOut __it_out_e, _Comp __comp,
29542954
/* __is_vector = */ std::false_type)
@@ -3082,8 +3082,8 @@ __pattern_merge_2(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _It1 __i
30823082

30833083
//serial merge n elements, starting from input x and y, to [i, j) output range
30843084
auto __res = __brick_merge_2(__it_1 + __r, __it_1 + __n_1,
3085-
__it_2 + __c, __it_2 + __n_2,
3086-
__it_out + __i, __it_out + __j, __comp, _IsVector{});
3085+
__it_2 + __c, __it_2 + __n_2,
3086+
__it_out + __i, __it_out + __j, __comp, _IsVector{});
30873087

30883088
if(__j == __n_out)
30893089
{

include/oneapi/dpl/pstl/omp/parallel_for.h

+6-5
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,10 @@ namespace __omp_backend
2929

3030
template <class _Index, class _Fp>
3131
void
32-
__parallel_for_body(_Index __first, _Index __last, _Fp __f)
32+
__parallel_for_body(_Index __first, _Index __last, _Fp __f, std::size_t __grainsize)
3333
{
3434
// initial partition of the iteration space into chunks
35-
auto __policy = oneapi::dpl::__omp_backend::__chunk_partitioner(__first, __last);
35+
auto __policy = oneapi::dpl::__omp_backend::__chunk_partitioner(__first, __last, __grainsize);
3636

3737
// To avoid over-subscription we use taskloop for the nested parallelism
3838
_PSTL_PRAGMA(omp taskloop untied mergeable)
@@ -49,20 +49,21 @@ __parallel_for_body(_Index __first, _Index __last, _Fp __f)
4949

5050
template <class _ExecutionPolicy, class _Index, class _Fp>
5151
void
52-
__parallel_for(oneapi::dpl::__internal::__omp_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f)
52+
__parallel_for(oneapi::dpl::__internal::__omp_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f,
53+
std::size_t __grainsize = __default_chunk_size)
5354
{
5455
if (omp_in_parallel())
5556
{
5657
// we don't create a nested parallel region in an existing parallel
5758
// region: just create tasks
58-
oneapi::dpl::__omp_backend::__parallel_for_body(__first, __last, __f);
59+
oneapi::dpl::__omp_backend::__parallel_for_body(__first, __last, __f, __grainsize);
5960
}
6061
else
6162
{
6263
// in any case (nested or non-nested) one parallel region is created and
6364
// only one thread creates a set of tasks
6465
_PSTL_PRAGMA(omp parallel)
65-
_PSTL_PRAGMA(omp single nowait) { oneapi::dpl::__omp_backend::__parallel_for_body(__first, __last, __f); }
66+
_PSTL_PRAGMA(omp single nowait) { oneapi::dpl::__omp_backend::__parallel_for_body(__first, __last, __f, __grainsize); }
6667
}
6768
}
6869

include/oneapi/dpl/pstl/parallel_backend.h

+3
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@
3535
# endif
3636
#endif
3737

38+
//the parallel backend constants
39+
#define _ONEDPL_MERGE_CUT_OFF 2000
40+
3841
namespace oneapi
3942
{
4043
namespace dpl

include/oneapi/dpl/pstl/parallel_backend_serial.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ __cancel_execution(oneapi::dpl::__internal::__serial_backend_tag)
4545
template <class _ExecutionPolicy, class _Index, class _Fp>
4646
void
4747
__parallel_for(oneapi::dpl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last,
48-
_Fp __f)
48+
_Fp __f, std::size_t __grainsize = 1)
4949
{
5050
__f(__first, __last);
5151
}

0 commit comments

Comments
 (0)