diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml index ab0b4ec93ba..370fb7d33d9 100644 --- a/.github/workflows/ci-testing.yml +++ b/.github/workflows/ci-testing.yml @@ -7,6 +7,7 @@ on: branches: - main - 'release/**' + - dev/mdvorski/ranges_algo_L2_set-origin_UXL paths: - '.github/**/*' - 'cmake/**/*' diff --git a/documentation/library_guide/parallel_api/parallel_range_algorithms.rst b/documentation/library_guide/parallel_api/parallel_range_algorithms.rst index 44ad07921af..7853af322e6 100644 --- a/documentation/library_guide/parallel_api/parallel_range_algorithms.rst +++ b/documentation/library_guide/parallel_api/parallel_range_algorithms.rst @@ -105,8 +105,13 @@ If ``ONEDPL_HAS_RANGE_ALGORITHMS`` is defined to ``202505L`` or a greater value, If ``ONEDPL_HAS_RANGE_ALGORITHMS`` is defined to ``202509L`` or a greater value, the following algorithms are provided: +* ``includes`` * ``reverse`` * ``reverse_copy`` +* ``set_difference`` +* ``set_intersection`` +* ``set_symmetric_difference`` +* ``set_union`` * ``unique`` * ``unique_copy`` * ``swap_ranges`` diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index a4112130d07..e17646feb89 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -3270,20 +3270,23 @@ __pattern_includes(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _ inline constexpr auto __set_algo_cut_off = 1000; template + class _OutputIterator, class _Compare, class _SizeFunction, class _SetOP, + class _Proj1 = oneapi::dpl::identity, class _Proj2 = oneapi::dpl::identity> _OutputIterator __parallel_set_op(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, - _OutputIterator __result, _Compare __comp, _SizeFunction __size_func, _SetOP __set_op) + _OutputIterator __result, _Compare __comp, _SizeFunction __size_func, _SetOP __set_op, + _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) { using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; - typedef typename ::std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType; - typedef typename ::std::iterator_traits<_OutputIterator>::value_type _T; + using _DifferenceType1 = typename std::iterator_traits<_RandomAccessIterator1>::difference_type; + using _DifferenceType2 = typename std::iterator_traits<_RandomAccessIterator2>::difference_type; + using _T = typename std::iterator_traits<_OutputIterator>::value_type; struct _SetRange { - _DifferenceType __pos, __len, __buf_pos; + _DifferenceType1 __pos, __len, __buf_pos; bool empty() const { @@ -3291,16 +3294,16 @@ __parallel_set_op(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomA } }; - const _DifferenceType __n1 = __last1 - __first1; - const _DifferenceType __n2 = __last2 - __first2; + const _DifferenceType1 __n1 = __last1 - __first1; + const _DifferenceType2 __n2 = __last2 - __first2; __par_backend::__buffer<_T> __buf(__size_func(__n1, __n2)); return __internal::__except_handler([&__exec, __n1, __first1, __last1, __first2, __last2, __result, __comp, - __size_func, __set_op, &__buf]() { + __size_func, __set_op, &__buf, __proj1, __proj2]() { auto __tmp_memory = __buf.get(); - _DifferenceType __m{}; - auto __scan = [=](_DifferenceType, _DifferenceType, const _SetRange& __s) { // Scan + _DifferenceType1 __m{}; + auto __scan = [=](_DifferenceType1, _DifferenceType1, const _SetRange& __s) { // Scan if (!__s.empty()) __brick_move_destroy<__parallel_tag<_IsVector>>{}(__tmp_memory + __s.__buf_pos, __tmp_memory + (__s.__buf_pos + __s.__len), @@ -3308,42 +3311,47 @@ __parallel_set_op(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomA }; __par_backend::__parallel_strict_scan( __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __n1, _SetRange{0, 0, 0}, //-1, 0}, - [=](_DifferenceType __i, _DifferenceType __len) { // Reduce + [=](_DifferenceType1 __i, _DifferenceType1 __len) { // Reduce //[__b; __e) - a subrange of the first sequence, to reduce _RandomAccessIterator1 __b = __first1 + __i; _RandomAccessIterator1 __e = __first1 + (__i + __len); //try searching for the first element which not equal to *__b if (__b != __first1) - __b = ::std::upper_bound(__b, __last1, *__b, __comp); + __b += __internal::__pstl_upper_bound(__b, _DifferenceType1{0}, __last1 - __b, + std::invoke(__proj1, *__b), __comp, __proj1); //try searching for the first element which not equal to *__e if (__e != __last1) - __e = ::std::upper_bound(__e, __last1, *__e, __comp); + __e += __internal::__pstl_upper_bound(__e, _DifferenceType1{0}, __last1 - __e, + std::invoke(__proj1, *__e), __comp, __proj1); //check is [__b; __e) empty if (__e - __b < 1) { _RandomAccessIterator2 __bb = __last2; if (__b != __last1) - __bb = ::std::lower_bound(__first2, __last2, *__b, __comp); + __bb = __first2 + __internal::__pstl_lower_bound(__first2, _DifferenceType2{0}, __last2 - __first2, + std::invoke(__proj1, *__b), __comp, __proj2); - const _DifferenceType __buf_pos = __size_func((__b - __first1), (__bb - __first2)); + const _DifferenceType1 __buf_pos = __size_func((__b - __first1), (__bb - __first2)); return _SetRange{0, 0, __buf_pos}; } //try searching for "corresponding" subrange [__bb; __ee) in the second sequence _RandomAccessIterator2 __bb = __first2; if (__b != __first1) - __bb = ::std::lower_bound(__first2, __last2, *__b, __comp); + __bb = __first2 + __internal::__pstl_lower_bound(__first2, _DifferenceType2{0}, __last2 - __first2, + std::invoke(__proj1, *__b), __comp, __proj2); _RandomAccessIterator2 __ee = __last2; if (__e != __last1) - __ee = ::std::lower_bound(__bb, __last2, *__e, __comp); + __ee = __bb + __internal::__pstl_lower_bound(__bb, _DifferenceType2{0}, __last2 - __bb, + std::invoke(__proj1, *__e), __comp, __proj2); - const _DifferenceType __buf_pos = __size_func((__b - __first1), (__bb - __first2)); + const _DifferenceType1 __buf_pos = __size_func((__b - __first1), (__bb - __first2)); auto __buffer_b = __tmp_memory + __buf_pos; - auto __res = __set_op(__b, __e, __bb, __ee, __buffer_b, __comp); + auto __res = __set_op(__b, __e, __bb, __ee, __buffer_b, __comp, __proj1, __proj2); return _SetRange{0, __res - __buffer_b, __buf_pos}; }, @@ -3364,15 +3372,18 @@ __parallel_set_op(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomA //a shared parallel pattern for '__pattern_set_union' and '__pattern_set_symmetric_difference' template + class _OutputIterator, class _Compare, class _SetUnionOp, class _Proj1 = oneapi::dpl::identity, + class _Proj2 = oneapi::dpl::identity> _OutputIterator __parallel_set_union_op(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, - _OutputIterator __result, _Compare __comp, _SetUnionOp __set_union_op) + _OutputIterator __result, _Compare __comp, _SetUnionOp __set_union_op, _Proj1 __proj1 = {}, + _Proj2 __proj2 = {}) { using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; - typedef typename ::std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType; + using _DifferenceType1 = typename std::iterator_traits<_RandomAccessIterator1>::difference_type; + using _DifferenceType2 = typename std::iterator_traits<_RandomAccessIterator2>::difference_type; const auto __n1 = __last1 - __first1; const auto __n2 = __last2 - __first2; @@ -3384,13 +3395,15 @@ __parallel_set_union_op(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __ex return __internal::__pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __result, __copy_range); - // {} {2}: parallel copying justmake second sequence + // {} {2}: parallel copying just second sequence if (__n1 == 0) return __internal::__pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first2, __last2, __result, __copy_range); // testing whether the sequences are intersected - _RandomAccessIterator1 __left_bound_seq_1 = ::std::lower_bound(__first1, __last1, *__first2, __comp); + _RandomAccessIterator1 __left_bound_seq_1 = + __first1 + __internal::__pstl_lower_bound(__first1, _DifferenceType1{0}, __last1 - __first1, + std::invoke(__proj2, *__first2), __comp, __proj1); if (__left_bound_seq_1 == __last1) { @@ -3407,7 +3420,9 @@ __parallel_set_union_op(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __ex } // testing whether the sequences are intersected - _RandomAccessIterator2 __left_bound_seq_2 = ::std::lower_bound(__first2, __last2, *__first1, __comp); + _RandomAccessIterator2 __left_bound_seq_2 = + __first2 + __internal::__pstl_lower_bound(__first2, _DifferenceType2{0}, __last2 - __first2, + std::invoke(__proj1, *__first1), __comp, __proj2); if (__left_bound_seq_2 == __last2) { @@ -3437,7 +3452,8 @@ __parallel_set_union_op(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __ex [=, &__exec, &__result] { __result = __internal::__parallel_set_op( __tag, __exec, __left_bound_seq_1, __last1, __first2, __last2, __result, __comp, - [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, __set_union_op); + [](_DifferenceType1 __n, _DifferenceType1 __m) { return __n + __m; }, __set_union_op, __proj1, + __proj2); }); return __result; } @@ -3457,14 +3473,15 @@ __parallel_set_union_op(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __ex [=, &__exec, &__result] { __result = __internal::__parallel_set_op( __tag, __exec, __first1, __last1, __left_bound_seq_2, __last2, __result, __comp, - [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, __set_union_op); + [](_DifferenceType1 __n, _DifferenceType1 __m) { return __n + __m; }, __set_union_op, __proj1, + __proj2); }); return __result; } return __internal::__parallel_set_op( __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, - [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, __set_union_op); + [](_DifferenceType1 __n, _DifferenceType1 __m) { return __n + __m; }, __set_union_op, __proj1, __proj2); } //------------------------------------------------------------------------ @@ -3477,7 +3494,7 @@ __brick_set_union(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _Forwar _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp, /*__is_vector=*/::std::false_type) noexcept { - return ::std::set_union(__first1, __last1, __first2, __last2, __result, __comp); + return std::set_union(__first1, __last1, __first2, __last2, __result, __comp); } template @@ -3498,7 +3515,7 @@ __brick_set_union(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last /*__is_vector=*/::std::true_type) noexcept { _PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial"); - return ::std::set_union(__first1, __last1, __first2, __last2, __result, __comp); + return std::set_union(__first1, __last1, __first2, __last2, __result, __comp); } template __tag, _ExecutionPolicy&& __exec, // use serial algorithm if (__n1 + __n2 <= __set_algo_cut_off) - return ::std::set_union(__first1, __last1, __first2, __last2, __result, __comp); + return std::set_union(__first1, __last1, __first2, __last2, __result, __comp); - typedef typename ::std::iterator_traits<_OutputIterator>::value_type _Tp; + using _Tp = typename std::iterator_traits<_OutputIterator>::value_type; return __parallel_set_union_op( __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, - _RandomAccessIterator2 __last2, _Tp* __result, _Compare __comp) { + _RandomAccessIterator2 __last2, _Tp* __result, _Compare __comp, oneapi::dpl::identity, + oneapi::dpl::identity) { return oneapi::dpl::__utils::__set_union_construct(__first1, __last1, __first2, __last2, __result, __comp, __BrickCopyConstruct<_IsVector>()); }); @@ -3548,7 +3566,7 @@ __brick_set_intersection(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp, /*__is_vector=*/::std::false_type) noexcept { - return ::std::set_intersection(__first1, __last1, __first2, __last2, __result, __comp); + return std::set_intersection(__first1, __last1, __first2, __last2, __result, __comp); } template @@ -3559,7 +3577,7 @@ __brick_set_intersection(_RandomAccessIterator1 __first1, _RandomAccessIterator1 /*__is_vector=*/::std::true_type) noexcept { _PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial"); - return ::std::set_intersection(__first1, __last1, __first2, __last2, __result, __comp); + return std::set_intersection(__first1, __last1, __first2, __last2, __result, __comp); } template __tag, _ExecutionPolicy&& _ _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _RandomAccessIterator3 __result, _Compare __comp) { - typedef typename ::std::iterator_traits<_RandomAccessIterator3>::value_type _T; - typedef typename ::std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType; + using _T = typename std::iterator_traits<_RandomAccessIterator3>::value_type; + using _DifferenceType = typename std::iterator_traits<_RandomAccessIterator1>::difference_type; const auto __n1 = __last1 - __first1; const auto __n2 = __last2 - __first2; @@ -3593,13 +3611,13 @@ __pattern_set_intersection(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& _ return __result; // testing whether the sequences are intersected - _RandomAccessIterator1 __left_bound_seq_1 = ::std::lower_bound(__first1, __last1, *__first2, __comp); + _RandomAccessIterator1 __left_bound_seq_1 = std::lower_bound(__first1, __last1, *__first2, __comp); //{1} < {2}: seq 2 is wholly greater than seq 1, so, the intersection is empty if (__left_bound_seq_1 == __last1) return __result; // testing whether the sequences are intersected - _RandomAccessIterator2 __left_bound_seq_2 = ::std::lower_bound(__first2, __last2, *__first1, __comp); + _RandomAccessIterator2 __left_bound_seq_2 = std::lower_bound(__first2, __last2, *__first1, __comp); //{2} < {1}: seq 1 is wholly greater than seq 2, so, the intersection is empty if (__left_bound_seq_2 == __last2) return __result; @@ -3610,10 +3628,11 @@ __pattern_set_intersection(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& _ //we know proper offset due to [first1; left_bound_seq_1) < [first2; last2) return __internal::__except_handler([&]() { return __internal::__parallel_set_op( - __tag, ::std::forward<_ExecutionPolicy>(__exec), __left_bound_seq_1, __last1, __first2, __last2, - __result, __comp, [](_DifferenceType __n, _DifferenceType __m) { return ::std::min(__n, __m); }, + __tag, std::forward<_ExecutionPolicy>(__exec), __left_bound_seq_1, __last1, __first2, __last2, __result, + __comp, [](_DifferenceType __n, _DifferenceType __m) { return std::min(__n, __m); }, [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, - _RandomAccessIterator2 __last2, _T* __result, _Compare __comp) { + _RandomAccessIterator2 __last2, _T* __result, _Compare __comp, oneapi::dpl::identity, + oneapi::dpl::identity) { return oneapi::dpl::__utils::__set_intersection_construct( __first1, __last1, __first2, __last2, __result, __comp, oneapi::dpl::__internal::__op_uninitialized_copy<_ExecutionPolicy>{}, @@ -3628,10 +3647,11 @@ __pattern_set_intersection(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& _ //we know proper offset due to [first2; left_bound_seq_2) < [first1; last1) return __internal::__except_handler([&]() { __result = __internal::__parallel_set_op( - __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __left_bound_seq_2, __last2, - __result, __comp, [](_DifferenceType __n, _DifferenceType __m) { return ::std::min(__n, __m); }, + __tag, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __left_bound_seq_2, __last2, __result, + __comp, [](_DifferenceType __n, _DifferenceType __m) { return std::min(__n, __m); }, [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, - _RandomAccessIterator2 __last2, _T* __result, _Compare __comp) { + _RandomAccessIterator2 __last2, _T* __result, _Compare __comp, oneapi::dpl::identity, + oneapi::dpl::identity) { return oneapi::dpl::__utils::__set_intersection_construct( __first2, __last2, __first1, __last1, __result, __comp, oneapi::dpl::__internal::__op_uninitialized_copy<_ExecutionPolicy>{}, @@ -3642,7 +3662,7 @@ __pattern_set_intersection(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& _ } // [left_bound_seq_1; last1) and [left_bound_seq_2; last2) - use serial algorithm - return ::std::set_intersection(__left_bound_seq_1, __last1, __left_bound_seq_2, __last2, __result, __comp); + return std::set_intersection(__left_bound_seq_1, __last1, __left_bound_seq_2, __last2, __result, __comp); } //------------------------------------------------------------------------ @@ -3655,7 +3675,7 @@ __brick_set_difference(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _F _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp, /*__is_vector=*/::std::false_type) noexcept { - return ::std::set_difference(__first1, __last1, __first2, __last2, __result, __comp); + return std::set_difference(__first1, __last1, __first2, __last2, __result, __comp); } template @@ -3665,7 +3685,7 @@ __brick_set_difference(_RandomAccessIterator1 __first1, _RandomAccessIterator1 _ /*__is_vector=*/::std::true_type) noexcept { _PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial"); - return ::std::set_difference(__first1, __last1, __first2, __last2, __result, __comp); + return std::set_difference(__first1, __last1, __first2, __last2, __result, __comp); } template __tag, _ExecutionPolicy&& __e _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _RandomAccessIterator3 __result, _Compare __comp) { - typedef typename ::std::iterator_traits<_RandomAccessIterator3>::value_type _T; - typedef typename ::std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType; + using _T = typename std::iterator_traits<_RandomAccessIterator3>::value_type; + using _DifferenceType = typename std::iterator_traits<_RandomAccessIterator1>::difference_type; const auto __n1 = __last1 - __first1; const auto __n2 = __last2 - __first2; @@ -3700,35 +3720,36 @@ __pattern_set_difference(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __e // {1} \ {}: parallel copying just first sequence if (__n2 == 0) - return __pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __result, + return __pattern_walk2_brick(__tag, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __result, __internal::__brick_copy<__parallel_tag<_IsVector>>{}); // testing whether the sequences are intersected - _RandomAccessIterator1 __left_bound_seq_1 = ::std::lower_bound(__first1, __last1, *__first2, __comp); + _RandomAccessIterator1 __left_bound_seq_1 = std::lower_bound(__first1, __last1, *__first2, __comp); //{1} < {2}: seq 2 is wholly greater than seq 1, so, parallel copying just first sequence if (__left_bound_seq_1 == __last1) - return __pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __result, + return __pattern_walk2_brick(__tag, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __result, __internal::__brick_copy<__parallel_tag<_IsVector>>{}); // testing whether the sequences are intersected - _RandomAccessIterator2 __left_bound_seq_2 = ::std::lower_bound(__first2, __last2, *__first1, __comp); + _RandomAccessIterator2 __left_bound_seq_2 = std::lower_bound(__first2, __last2, *__first1, __comp); //{2} < {1}: seq 1 is wholly greater than seq 2, so, parallel copying just first sequence if (__left_bound_seq_2 == __last2) - return __internal::__pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + return __internal::__pattern_walk2_brick(__tag, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __result, __brick_copy<__parallel_tag<_IsVector>>{}); if (__n1 + __n2 > __set_algo_cut_off) return __parallel_set_op( - __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, + __tag, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, [](_DifferenceType __n, _DifferenceType) { return __n; }, [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, - _RandomAccessIterator2 __last2, _T* __result, _Compare __comp) { + _RandomAccessIterator2 __last2, _T* __result, _Compare __comp, oneapi::dpl::identity, + oneapi::dpl::identity) { return oneapi::dpl::__utils::__set_difference_construct(__first1, __last1, __first2, __last2, __result, __comp, __BrickCopyConstruct<_IsVector>()); }); // use serial algorithm - return ::std::set_difference(__first1, __last1, __first2, __last2, __result, __comp); + return std::set_difference(__first1, __last1, __first2, __last2, __result, __comp); } //------------------------------------------------------------------------ @@ -3741,7 +3762,7 @@ __brick_set_symmetric_difference(_ForwardIterator1 __first1, _ForwardIterator1 _ _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp, /*__is_vector=*/::std::false_type) noexcept { - return ::std::set_symmetric_difference(__first1, __last1, __first2, __last2, __result, __comp); + return std::set_symmetric_difference(__first1, __last1, __first2, __last2, __result, __comp); } template @@ -3752,7 +3773,7 @@ __brick_set_symmetric_difference(_RandomAccessIterator1 __first1, _RandomAccessI /*__is_vector=*/::std::true_type) noexcept { _PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial"); - return ::std::set_symmetric_difference(__first1, __last1, __first2, __last2, __result, __comp); + return std::set_symmetric_difference(__first1, __last1, __first2, __last2, __result, __comp); } template __tag, _ExecutionPo // use serial algorithm if (__n1 + __n2 <= __set_algo_cut_off) - return ::std::set_symmetric_difference(__first1, __last1, __first2, __last2, __result, __comp); + return std::set_symmetric_difference(__first1, __last1, __first2, __last2, __result, __comp); typedef typename ::std::iterator_traits<_RandomAccessIterator3>::value_type _T; return __internal::__except_handler([&]() { return __internal::__parallel_set_union_op( - __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, + __tag, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, - _RandomAccessIterator2 __last2, _T* __result, _Compare __comp) { + _RandomAccessIterator2 __last2, _T* __result, _Compare __comp, oneapi::dpl::identity, + oneapi::dpl::identity) { return oneapi::dpl::__utils::__set_symmetric_difference_construct( __first1, __last1, __first2, __last2, __result, __comp, __BrickCopyConstruct<_IsVector>()); }); @@ -4046,8 +4068,8 @@ __pattern_minmax_element(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _ typedef ::std::pair<_RandomAccessIterator, _RandomAccessIterator> _Result; return __par_backend::__parallel_reduce( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - /*identity*/ ::std::make_pair(__last, __last), + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last, + /*identity*/ std::make_pair(__last, __last), [=, &__comp](_RandomAccessIterator __begin, _RandomAccessIterator __end, _Result __init) -> _Result { const _Result __subresult = __internal::__brick_minmax_element(__begin, __end, __comp, _IsVector{}); if (__init.first == __last) // = identity diff --git a/include/oneapi/dpl/pstl/algorithm_ranges_impl.h b/include/oneapi/dpl/pstl/algorithm_ranges_impl.h index 74aae1bc565..a7c5ed088a9 100644 --- a/include/oneapi/dpl/pstl/algorithm_ranges_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_ranges_impl.h @@ -24,8 +24,8 @@ # include # include -# include "algorithm_fwd.h" # include "execution_impl.h" +# include "algorithm_impl.h" namespace oneapi { @@ -643,6 +643,772 @@ __pattern_merge_ranges(_Tag __tag, _ExecutionPolicy&& __exec, _R1&& __r1, _R2&& return __return_type{__res1, __res2, __it_out + __n_out}; } +//--------------------------------------------------------------------------------------------------------------------- +// includes +//--------------------------------------------------------------------------------------------------------------------- + +template +bool +__brick_includes(_R1&& __r1, _R2&& __r2, _Comp __comp, _Proj1 __proj1, _Proj2 __proj2, + /*__is_vector=*/std::false_type) noexcept +{ + return std::ranges::includes(std::forward<_R1>(__r1), std::forward<_R2>(__r2), __comp, __proj1, __proj2); +} + +template +bool +__brick_includes(_R1&& __r1, _R2&& __r2, _Comp __comp, _Proj1 __proj1, _Proj2 __proj2, + /*__is_vector=*/std::true_type) noexcept +{ + _PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial"); + return std::ranges::includes(std::forward<_R1>(__r1), std::forward<_R2>(__r2), __comp, __proj1, __proj2); +} + +template +bool +__pattern_includes(_Tag __tag, _ExecutionPolicy&& __exec, _R1&& __r1, _R2&& __r2, _Comp __comp, _Proj1 __proj1, + _Proj2 __proj2) +{ + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __brick_includes(std::forward<_R1>(__r1), std::forward<_R2>(__r2), __comp, __proj1, __proj2, + typename _Tag::__is_vector{}); +} + +template +bool +__pattern_includes(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _R1&& __r1, _R2&& __r2, _Comp __comp, + _Proj1 __proj1, _Proj2 __proj2) +{ + using _RandomAccessIterator2 = std::ranges::iterator_t<_R2>; + + const auto __n1 = std::ranges::size(__r1); + const auto __n2 = std::ranges::size(__r2); + + // TODO wht this code absent in __pattern_includes + __parallel_tag for iterators? + // use serial algorithm + //if (__n1 + __n2 <= oneapi::dpl::__internal::__set_algo_cut_off) + // return std::ranges::includes(std::forward<_R1>(__r1), std::forward<_R2>(__r2), __comp, __proj1, __proj2); + + auto __first1 = std::ranges::begin(__r1); + auto __last1 = __first1 + __n1; + auto __first2 = std::ranges::begin(__r2); + auto __last2 = __first2 + __n2; + + using _DifferenceType1 = typename std::iterator_traits::difference_type; + using _DifferenceType2 = typename std::iterator_traits::difference_type; + + if (__first2 == __last2) + return true; + + //optimization; {1} - the first sequence, {2} - the second sequence + //{1} is empty or size_of{2} > size_of{1} + if (__first1 == __last1 || __last2 - __first2 > __last1 - __first1 || + // {1}: [**********] or [**********] + // {2}: [***********] [***********] + std::invoke(__comp, std::invoke(__proj2, *__first2), std::invoke(__proj1, *__first1)) || + std::invoke(__comp, std::invoke(__proj1, *(__last1 - 1)), std::invoke(__proj2, *(__last2 - 1)))) + return false; + + __first1 += oneapi::dpl::__internal::__pstl_lower_bound(__first1, _DifferenceType1{0}, __last1 - __first1, + std::invoke(__proj2, *__first2), __comp, __proj1); + if (__first1 == __last1) + return false; + + if (__last2 - __first2 == 1) + return !std::invoke(__comp, std::invoke(__proj1, *__first1), std::invoke(__proj2, *__first2)) && + !std::invoke(__comp, std::invoke(__proj2, *__first2), std::invoke(__proj1, *__first1)); + + return !__internal::__parallel_or( + __tag, std::forward<_ExecutionPolicy>(__exec), __first2, __last2, + [__first1, __last1, __first2, __last2, __comp, __proj1, __proj2](_RandomAccessIterator2 __i, + _RandomAccessIterator2 __j) { + assert(__j > __i); + //assert(__j - __i > 1); + + //1. moving boundaries to "consume" subsequence of equal elements + auto __is_equal_sorted = [&__comp, __proj2](_RandomAccessIterator2 __a, + _RandomAccessIterator2 __b) -> bool { + //enough one call of __comp due to compared couple belongs to one sorted sequence + return !std::invoke(__comp, std::invoke(__proj2, *__a), std::invoke(__proj2, *__b)); + }; + + //1.1 left bound, case "aaa[aaaxyz...]" - searching "x" + if (__i > __first2 && __is_equal_sorted(__i - 1, __i)) + { + //whole subrange continues to have equal elements - return "no op" + if (__is_equal_sorted(__i, __j - 1)) + return false; + + __i += oneapi::dpl::__internal::__pstl_upper_bound(__i, _DifferenceType2{0}, __last2 - __i, + std::invoke(__proj2, *__i), __comp, __proj2); + } + + //1.2 right bound, case "[...aaa]aaaxyz" - searching "x" + if (__j < __last2 && __is_equal_sorted(__j - 1, __j)) + __j += oneapi::dpl::__internal::__pstl_upper_bound(__j, _DifferenceType2{0}, __last2 - __j, + std::invoke(__proj2, *__j), __comp, __proj2); + + //2. testing is __a subsequence of the second range included into the first range + auto __b = __first1 + + oneapi::dpl::__internal::__pstl_lower_bound(__first1, _DifferenceType1{0}, __last1 - __first1, + std::invoke(__proj2, *__i), __comp, __proj1); + + //assert(!__comp(*(__last1 - 1), *__b)); + //assert(!__comp(*(__j - 1), *__i)); + return !std::ranges::includes(__b, __last1, __i, __j, __comp, __proj1, __proj2); + }); +} + +//--------------------------------------------------------------------------------------------------------------------- +// set_union +//--------------------------------------------------------------------------------------------------------------------- + +// Bounded set union: performs set_union with output range capacity checking. +// Truncates result if output range is too small. +template +std::ranges::set_union_result, + std::ranges::borrowed_iterator_t<_R2>, + std::ranges::borrowed_iterator_t<_OutRange>> +__serial_set_union(_R1&& __r1, _R2&& __r2, _OutRange&& __r_out, _Comp __comp = {}, + _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) +{ + auto __it1 = std::ranges::begin(__r1); + auto __in1_sz = std::ranges::size(__r1); + auto __end1 = __it1 + __in1_sz; + + auto __it2 = std::ranges::begin(__r2); + auto __in2_sz = std::ranges::size(__r2); + auto __end2 = __it2 + __in2_sz; + + auto __out_it = std::ranges::begin(__r_out); + auto __out_sz = std::ranges::size(__r_out); + auto __out_end = __out_it + __out_sz; + + auto __merge_loop = [&__it1, __end1, &__it2, __end2, &__out_it, __out_end, + __comp, __proj1, __proj2] (bool __check_output_bounds) { + // TODO: check if (!__check_output_bounds || __out_it != __out_end) is optimized out + // when __check_output_bounds is false otherwise make it compile-time constant + while (__it1 != __end1 && __it2 != __end2 && (!__check_output_bounds || __out_it != __out_end)) + { + if (std::invoke(__comp, std::invoke(__proj1, *__it1), std::invoke(__proj2, *__it2))) + { + *__out_it = *__it1; + ++__it1; + } + else if (std::invoke(__comp, std::invoke(__proj2, *__it2), std::invoke(__proj1, *__it1))) + { + *__out_it = *__it2; + ++__it2; + } + else + { + *__out_it = *__it1; + ++__it1; + ++__it2; + } + ++__out_it; + } + }; + + if (__out_sz >= __in1_sz + __in2_sz) + { + // 1. Main set_union operation + __merge_loop(/*__check_output_bounds=*/false); + + // 2. Copying the residual elements if one of the input sequences is exhausted + auto __copy1 = std::ranges::copy(__it1, __end1, __out_it); + auto __copy2 = std::ranges::copy(__it2, __end2, __copy1.out); + return {__copy1.in, __copy2.in, __copy2.out}; + } + else + { + // 1. Main set_union operation + __merge_loop(/*__check_output_bounds=*/true); + + // 2. Copying the residual elements if one of the input sequences is exhausted + using _SizeCommon1 = std::common_type_t, std::ranges::range_size_t<_OutRange>>; + using _SizeCommon2 = std::common_type_t, std::ranges::range_size_t<_OutRange>>; + const _SizeCommon1 __copy_n1 = std::min<_SizeCommon1>(std::ranges::distance(__it1, __end1), + std::ranges::distance(__out_it, __out_end)); + auto __copy1 = std::ranges::copy_n(__it1, __copy_n1, __out_it); + const _SizeCommon2 __copy_n2 = std::min<_SizeCommon2>(std::ranges::distance(__it2, __end2), + std::ranges::distance(__copy1.out, __out_end)); + auto __copy2 = std::ranges::copy_n(__it2, __copy_n2, __copy1.out); + return {__copy1.in, __copy2.in, __copy2.out}; + } +} + +template +auto +__brick_set_union(_R1&& __r1, _R2&& __r2, _OutRange&& __out_r, _Comp __comp, _Proj1 __proj1, _Proj2 __proj2, + /*__is_vector=*/std::false_type) noexcept +{ + return __serial_set_union(std::forward<_R1>(__r1), std::forward<_R2>(__r2), std::forward<_OutRange>(__out_r), + __comp, __proj1, __proj2); +} + +template +auto +__brick_set_union(_R1&& __r1, _R2&& __r2, _OutRange&& __out_r, _Comp __comp, _Proj1 __proj1, _Proj2 __proj2, + /*__is_vector=*/std::true_type) noexcept +{ + _PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial"); + return __serial_set_union(std::forward<_R1>(__r1), std::forward<_R2>(__r2), std::forward<_OutRange>(__out_r), + __comp, __proj1, __proj2); +} + +template +using __pattern_set_union_return_t = + std::ranges::set_union_result, std::ranges::borrowed_iterator_t<_R2>, + std::ranges::borrowed_iterator_t<_OutRange>>; + +template +__pattern_set_union_return_t<_R1, _R2, _OutRange> +__pattern_set_union(_Tag __tag, _ExecutionPolicy&& __exec, _R1&& __r1, _R2&& __r2, _OutRange&& __out_r, _Comp __comp, + _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) +{ + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __brick_set_union(std::forward<_R1>(__r1), std::forward<_R2>(__r2), std::forward<_OutRange>(__out_r), __comp, + __proj1, __proj2, typename _Tag::__is_vector{}); +} + +template +__pattern_set_union_return_t<_R1, _R2, _OutRange> +__pattern_set_union(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _R1&& __r1, _R2&& __r2, + _OutRange&& __out_r, _Comp __comp, _Proj1 __proj1 = oneapi::dpl::identity{}, + _Proj2 __proj2 = oneapi::dpl::identity{}) +{ + using _RandomAccessIterator1 = std::ranges::iterator_t<_R1>; + using _RandomAccessIterator2 = std::ranges::iterator_t<_R2>; + using _Tp = std::ranges::range_value_t<_OutRange>; + + const auto __n1 = std::ranges::size(__r1); + const auto __n2 = std::ranges::size(__r2); + + auto __first1 = std::ranges::begin(__r1); + auto __last1 = __first1 + __n1; + auto __first2 = std::ranges::begin(__r2); + auto __last2 = __first2 + __n2; + auto __result = std::ranges::begin(__out_r); + + // use serial algorithm + if (__n1 + __n2 <= oneapi::dpl::__internal::__set_algo_cut_off) + return __serial_set_union(std::forward<_R1>(__r1), std::forward<_R2>(__r2), std::forward<_OutRange>(__out_r), + __comp, __proj1, __proj2); + + auto __out_last = oneapi::dpl::__internal::__parallel_set_union_op( + __tag, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, + [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, + _RandomAccessIterator2 __last2, _Tp* __result, _Comp __comp, _Proj1 __proj1, _Proj2 __proj2) { + return oneapi::dpl::__utils::__set_union_construct( + __first1, __last1, __first2, __last2, __result, __comp, + oneapi::dpl::__internal::__BrickCopyConstruct<_IsVector>(), __proj1, __proj2); + }, + __proj1, __proj2); + + return __pattern_set_union_return_t<_R1, _R2, _OutRange>{__first1 + __n1, __first2 + __n2, + __result + (__out_last - __result)}; +} + +//--------------------------------------------------------------------------------------------------------------------- +// set_intersection +//--------------------------------------------------------------------------------------------------------------------- + +// Bounded set intersection: performs set_intersection with output range capacity checking. +// Truncates result if output range is too small. + +template +std::ranges::set_intersection_result, std::ranges::borrowed_iterator_t<_R2>, + std::ranges::borrowed_iterator_t<_OutRange>> +__serial_set_intersection(std::ranges::iterator_t<_R1> __it1, std::ranges::iterator_t<_R1> __end1, + std::ranges::iterator_t<_R2> __it2, std::ranges::iterator_t<_R2> __end2, + std::ranges::iterator_t<_OutRange> __out_it, std::ranges::iterator_t<_OutRange> __out_end, + _Comp __comp = {}, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) +{ + while (__it1 != __end1 && __it2 != __end2 && __out_it != __out_end) + { + if (std::invoke(__comp, std::invoke(__proj1, *__it1), std::invoke(__proj2, *__it2))) + { + ++__it1; + } + else if (std::invoke(__comp, std::invoke(__proj2, *__it2), std::invoke(__proj1, *__it1))) + { + ++__it2; + } + else + { + *__out_it = *__it1; + ++__it1; + ++__it2; + ++__out_it; + } + } + + if (__it1 == __end1 || __it2 == __end2) + return {__end1, __end2, __out_it}; + + return {__it1, __it2, __out_it}; +} + +template +std::ranges::set_intersection_result, std::ranges::borrowed_iterator_t<_R2>, + std::ranges::borrowed_iterator_t<_OutRange>> +__serial_set_intersection(_R1&& __r1, _R2&& __r2, _OutRange&& __out_r, _Comp __comp = {}, _Proj1 __proj1 = {}, + _Proj2 __proj2 = {}) +{ + auto __it1 = std::ranges::begin(__r1); + auto __end1 = __it1 + std::ranges::size(__r1); + + auto __it2 = std::ranges::begin(__r2); + auto __end2 = __it2 + std::ranges::size(__r2); + + auto __out_it = std::ranges::begin(__out_r); + auto __out_end = __out_it + std::ranges::size(__out_r); + + return __serial_set_intersection<_R1, _R2, _OutRange>(__it1, __end1, __it2, __end2, __out_it, __out_end, __comp, __proj1, __proj2); +} + +template +std::ranges::set_intersection_result, std::ranges::borrowed_iterator_t<_R2>, + std::ranges::borrowed_iterator_t<_OutRange>> +__brick_set_intersection(_R1&& __r1, _R2&& __r2, _OutRange&& __out_r, _Comp __comp, _Proj1 __proj1, _Proj2 __proj2, + /*__is_vector=*/std::false_type) noexcept +{ + return __serial_set_intersection(std::forward<_R1>(__r1), std::forward<_R2>(__r2), std::forward<_OutRange>(__out_r), + __comp, __proj1, __proj2); +} + +template +std::ranges::set_intersection_result, std::ranges::borrowed_iterator_t<_R2>, + std::ranges::borrowed_iterator_t<_OutRange>> +__brick_set_intersection(_R1&& __r1, _R2&& __r2, _OutRange&& __out_r, _Comp __comp, _Proj1 __proj1, _Proj2 __proj2, + /*__is_vector=*/std::true_type) noexcept +{ + _PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial"); + return __serial_set_intersection(std::forward<_R1>(__r1), std::forward<_R2>(__r2), std::forward<_OutRange>(__out_r), + __comp, __proj1, __proj2); +} + +template +using __pattern_set_intersection_return_t = + std::ranges::set_intersection_result, std::ranges::borrowed_iterator_t<_R2>, + std::ranges::borrowed_iterator_t<_OutRange>>; + +template +__pattern_set_intersection_return_t<_R1, _R2, _OutRange> +__pattern_set_intersection(_Tag __tag, _ExecutionPolicy&& __exec, _R1&& __r1, _R2&& __r2, _OutRange&& __out_r, + _Comp __comp, _Proj1 __proj1, _Proj2 __proj2) +{ + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __brick_set_intersection(std::forward<_R1>(__r1), std::forward<_R2>(__r2), std::forward<_OutRange>(__out_r), + __comp, __proj1, __proj2, typename _Tag::__is_vector{}); +} + +template +__pattern_set_intersection_return_t<_R1, _R2, _OutRange> +__pattern_set_intersection(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _R1&& __r1, _R2&& __r2, + _OutRange&& __out_r, _Comp __comp, _Proj1 __proj1, _Proj2 __proj2) +{ + using _RandomAccessIterator1 = std::ranges::iterator_t<_R1>; + using _RandomAccessIterator2 = std::ranges::iterator_t<_R2>; + using _Tp = std::ranges::range_value_t<_OutRange>; + + using _DifferenceType1 = typename std::iterator_traits<_RandomAccessIterator1>::difference_type; + using _DifferenceType2 = typename std::iterator_traits<_RandomAccessIterator2>::difference_type; + + const auto __n1 = std::ranges::size(__r1); + const auto __n2 = std::ranges::size(__r2); + + auto __first1 = std::ranges::begin(__r1); + auto __last1 = __first1 + __n1; + auto __first2 = std::ranges::begin(__r2); + auto __last2 = __first2 + __n2; + auto __result = std::ranges::begin(__out_r); + + // intersection is empty + if (__n1 == 0 || __n2 == 0) + return __pattern_set_intersection_return_t<_R1, _R2, _OutRange>{__last1, __last2, __result}; + + // testing whether the sequences are intersected + auto __left_bound_seq_1 = + __first1 + oneapi::dpl::__internal::__pstl_lower_bound(__first1, _DifferenceType1{0}, __last1 - __first1, + std::invoke(__proj2, *__first2), __comp, __proj1); + //{1} < {2}: seq 2 is wholly greater than seq 1, so, the intersection is empty + if (__left_bound_seq_1 == __last1) + return __pattern_set_intersection_return_t<_R1, _R2, _OutRange>{__last1, __last2, __result}; + + // testing whether the sequences are intersected + auto __left_bound_seq_2 = + __first2 + oneapi::dpl::__internal::__pstl_lower_bound(__first2, _DifferenceType2{0}, __last2 - __first2, + std::invoke(__proj1, *__first1), __comp, __proj2); + //{2} < {1}: seq 1 is wholly greater than seq 2, so, the intersection is empty + if (__left_bound_seq_2 == __last2) + return __pattern_set_intersection_return_t<_R1, _R2, _OutRange>{__last1, __last2, __result}; + + const auto __m1 = __last1 - __left_bound_seq_1 + __n2; + if (__m1 > oneapi::dpl::__internal::__set_algo_cut_off) + { + //we know proper offset due to [first1; left_bound_seq_1) < [first2; last2) + auto __out_last = oneapi::dpl::__internal::__parallel_set_union_op( + __tag, std::forward<_ExecutionPolicy>(__exec), __left_bound_seq_1, __last1, __first2, __last2, __result, + __comp, + [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, + _RandomAccessIterator2 __last2, _Tp* __result, _Comp __comp, _Proj1 __proj1, _Proj2 __proj2) { + return oneapi::dpl::__utils::__set_intersection_construct( + __first1, __last1, __first2, __last2, __result, __comp, + oneapi::dpl::__internal::__op_uninitialized_copy<_ExecutionPolicy>{}, + /*CopyFromFirstSet = */ std::true_type{}, __proj1, __proj2); + }, + __proj1, __proj2); + return __pattern_set_intersection_return_t<_R1, _R2, _OutRange>{__last1, __last2, __out_last}; + } + + const auto __m2 = __last2 - __left_bound_seq_2 + __n1; + if (__m2 > oneapi::dpl::__internal::__set_algo_cut_off) + { + //we know proper offset due to [first2; left_bound_seq_2) < [first1; last1) + auto __out_last = oneapi::dpl::__internal::__parallel_set_union_op( + __tag, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __left_bound_seq_2, __last2, __result, + __comp, + [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, + _RandomAccessIterator2 __last2, _Tp* __result, _Comp __comp, _Proj1 __proj1, _Proj2 __proj2) { + return oneapi::dpl::__utils::__set_intersection_construct( + __first1, __last1, __first2, __last2, __result, __comp, + oneapi::dpl::__internal::__op_uninitialized_copy<_ExecutionPolicy>{}, + /*CopyFromFirstSet = */ std::false_type{}, __proj1, __proj2); + }, + __proj1, __proj2); + return __pattern_set_intersection_return_t<_R1, _R2, _OutRange>{__last1, __last2, __out_last}; + } + + // [left_bound_seq_1; last1) and [left_bound_seq_2; last2) - use serial algorithm + return __serial_set_intersection<_R1, _R2, _OutRange>(__left_bound_seq_1, __last1, __left_bound_seq_2, __last2, + std::ranges::begin(__out_r), std::ranges::end(__out_r), + __comp, __proj1, __proj2); +} + +//--------------------------------------------------------------------------------------------------------------------- +// set_difference +//--------------------------------------------------------------------------------------------------------------------- + +template +std::ranges::set_difference_result, std::ranges::borrowed_iterator_t<_OutRange>> +__serial_set_difference(std::ranges::iterator_t<_R1> __it1, std::ranges::iterator_t<_R1> __end1, + std::ranges::iterator_t<_R2> __it2, std::ranges::iterator_t<_R2> __end2, + std::ranges::iterator_t<_OutRange> __out_it, std::ranges::iterator_t<_OutRange> __out_end, + _Comp __comp = {}, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) +{ + while (__it1 != __end1 && __it2 != __end2 && __out_it != __out_end) + { + if (std::invoke(__comp, std::invoke(__proj1, *__it1), std::invoke(__proj2, *__it2))) + { + *__out_it = *__it1; + ++__it1; + ++__out_it; + } + else if (std::invoke(__comp, std::invoke(__proj2, *__it2), std::invoke(__proj1, *__it1))) + { + ++__it2; + } + else + { + ++__it1; + ++__it2; + } + } + + if (__out_it != __out_end) + return std::ranges::copy(__it1, __end1, __out_it); + + return {__it1, __out_it}; +} + +template +std::ranges::set_difference_result, std::ranges::borrowed_iterator_t<_OutRange>> +__serial_set_difference(_R1&& __r1, _R2&& __r2, _OutRange&& __out_r, _Comp __comp = {}, _Proj1 __proj1 = {}, + _Proj2 __proj2 = {}) +{ + auto __it1 = std::ranges::begin(__r1); + auto __end1 = __it1 + std::ranges::size(__r1); + + auto __it2 = std::ranges::begin(__r2); + auto __end2 = __it2 + std::ranges::size(__r2); + + auto __out_it = std::ranges::begin(__out_r); + auto __out_end = __out_it + std::ranges::size(__out_r); + + return __serial_set_difference<_R1, _R2, _OutRange>(__it1, __end1, __it2, __end2, __out_it, __out_end, __comp, __proj1, __proj2); +} + +template +auto +__brick_set_difference(_R1&& __r1, _R2&& __r2, _OutRange&& __out_r, _Comp __comp, _Proj1 __proj1, _Proj2 __proj2, + /*__is_vector=*/std::false_type) noexcept +{ + return __serial_set_difference(std::forward<_R1>(__r1), std::forward<_R2>(__r2), std::forward<_OutRange>(__out_r), + __comp, __proj1, __proj2); +} + +template +auto +__brick_set_difference(_R1&& __r1, _R2&& __r2, _OutRange&& __out_r, _Comp __comp, _Proj1 __proj1, _Proj2 __proj2, + /*__is_vector=*/std::true_type) noexcept +{ + _PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial"); + return __serial_set_difference(std::forward<_R1>(__r1), std::forward<_R2>(__r2), std::forward<_OutRange>(__out_r), + __comp, __proj1, __proj2); +} + +template +using __pattern_set_difference_return_t = + std::ranges::set_difference_result, + std::ranges::borrowed_iterator_t<_OutRange>>; + +template +__pattern_set_difference_return_t<_R1, _OutRange> +__pattern_set_difference(_Tag __tag, _ExecutionPolicy&& __exec, _R1&& __r1, _R2&& __r2, _OutRange&& __out_r, + _Comp __comp, _Proj1 __proj1, _Proj2 __proj2) +{ + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __brick_set_difference(std::forward<_R1>(__r1), std::forward<_R2>(__r2), std::forward<_OutRange>(__out_r), + __comp, __proj1, __proj2, typename _Tag::__is_vector{}); +} + +template +__pattern_set_difference_return_t<_R1, _OutRange> +__pattern_set_difference(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _R1&& __r1, _R2&& __r2, + _OutRange&& __out_r, _Comp __comp, _Proj1 __proj1, _Proj2 __proj2) +{ + using _RandomAccessIterator1 = std::ranges::iterator_t<_R1>; + using _RandomAccessIterator2 = std::ranges::iterator_t<_R2>; + using _Tp = std::ranges::range_value_t<_OutRange>; + + using _DifferenceType1 = typename std::iterator_traits<_RandomAccessIterator1>::difference_type; + using _DifferenceType2 = typename std::iterator_traits<_RandomAccessIterator2>::difference_type; + + const auto __n1 = std::ranges::size(__r1); + const auto __n2 = std::ranges::size(__r2); + + auto __first1 = std::ranges::begin(__r1); + auto __last1 = __first1 + __n1; + auto __first2 = std::ranges::begin(__r2); + auto __last2 = __first2 + __n2; + auto __result = std::ranges::begin(__out_r); + + // {} \ {2}: the difference is empty + if (__n1 == 0) + return __pattern_set_difference_return_t<_R1, _OutRange>{__first1, __result}; + + // {1} \ {}: parallel copying just first sequence + if (__n2 == 0) + { + auto __out_last = __pattern_walk2_brick(__tag, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + __result, __internal::__brick_copy<__parallel_tag<_IsVector>>{}); + return __pattern_set_difference_return_t<_R1, _OutRange>{__last1, __out_last}; + } + + // testing whether the sequences are intersected + auto __left_bound_seq_1 = + __first1 + oneapi::dpl::__internal::__pstl_lower_bound(__first1, _DifferenceType1{0}, __last1 - __first1, + std::invoke(__proj2, *__first2), __comp, __proj1); + //{1} < {2}: seq 2 is wholly greater than seq 1, so, parallel copying just first sequence + if (__left_bound_seq_1 == __last1) + { + auto __out_last = __pattern_walk2_brick(__tag, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + __result, __internal::__brick_copy<__parallel_tag<_IsVector>>{}); + return __pattern_set_difference_return_t<_R1, _OutRange>{__last1, __out_last}; + } + + // testing whether the sequences are intersected + auto __left_bound_seq_2 = + __first2 + oneapi::dpl::__internal::__pstl_lower_bound(__first2, _DifferenceType2{0}, __last2 - __first2, + std::invoke(__proj1, *__first1), __comp, __proj2); + //{2} < {1}: seq 1 is wholly greater than seq 2, so, parallel copying just first sequence + if (__left_bound_seq_2 == __last2) + { + auto __out_last = + __internal::__pattern_walk2_brick(__tag, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + __result, __brick_copy<__parallel_tag<_IsVector>>{}); + return __pattern_set_difference_return_t<_R1, _OutRange>{__last1, __out_last}; + } + + if (__n1 + __n2 > oneapi::dpl::__internal::__set_algo_cut_off) + { + auto __out_last = oneapi::dpl::__internal::__parallel_set_union_op( + __tag, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, + [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, + _RandomAccessIterator2 __last2, _Tp* __result, _Comp __comp, _Proj1 __proj1, _Proj2 __proj2) { + return oneapi::dpl::__utils::__set_difference_construct( + __first1, __last1, __first2, __last2, __result, __comp, + oneapi::dpl::__internal::__BrickCopyConstruct<_IsVector>(), __proj1, __proj2); + }, + __proj1, __proj2); + + return __pattern_set_difference_return_t<_R1, _OutRange>{__last1, __result + (__out_last - __result)}; + } + + // use serial algorithm + return __serial_set_difference(std::forward<_R1>(__r1), std::forward<_R2>(__r2), std::forward<_OutRange>(__out_r), + __comp, __proj1, __proj2); +} + +//--------------------------------------------------------------------------------------------------------------------- +// set_symmetric_difference +//--------------------------------------------------------------------------------------------------------------------- + +template +std::ranges::set_symmetric_difference_result, + std::ranges::borrowed_iterator_t<_R2>, + std::ranges::borrowed_iterator_t<_OutRange>> +__serial_set_symmetric_difference(std::ranges::iterator_t<_R1> __it1, std::ranges::iterator_t<_R1> __end1, + std::ranges::iterator_t<_R2> __it2, std::ranges::iterator_t<_R2> __end2, + std::ranges::iterator_t<_OutRange> __out_it, + std::ranges::iterator_t<_OutRange> __out_end, _Comp __comp = {}, _Proj1 __proj1 = {}, + _Proj2 __proj2 = {}) +{ + while (__it1 != __end1 && __it2 != __end2 /*&& __out_it != __out_end*/) // TODO commented till other implementations will be improved to check limited output range size + { + if (std::invoke(__comp, std::invoke(__proj1, *__it1), std::invoke(__proj2, *__it2))) + { + *__out_it = *__it1; + ++__it1; + ++__out_it; + } + else if (std::invoke(__comp, std::invoke(__proj2, *__it2), std::invoke(__proj1, *__it1))) + { + *__out_it = *__it2; + ++__it2; + ++__out_it; + } + else + { + ++__it1; + ++__it2; + } + } + + // TODO required to implement support of limmited output range + auto __copy1 = std::ranges::copy(__it1, __end1, __out_it); + auto __copy2 = std::ranges::copy(__it2, __end2, __copy1.out); + + return {__copy1.in, __copy2.in, __copy2.out}; +} + +template +std::ranges::set_symmetric_difference_result, + std::ranges::borrowed_iterator_t<_R2>, + std::ranges::borrowed_iterator_t<_OutRange>> +__serial_set_symmetric_difference(_R1&& __r1, _R2&& __r2, _OutRange&& __out_r, _Comp __comp = {}, _Proj1 __proj1 = {}, + _Proj2 __proj2 = {}) +{ + return __serial_set_symmetric_difference<_R1, _R2, _OutRange>( + std::ranges::begin(__r1), std::ranges::begin(__r1) + std::ranges::size(__r1), std::ranges::begin(__r2), + std::ranges::begin(__r2) + std::ranges::size(__r2), std::ranges::begin(__out_r), + std::ranges::begin(__out_r) + std::ranges::size(__out_r), __comp, __proj1, __proj2); +} + +template +std::ranges::set_symmetric_difference_result, + std::ranges::borrowed_iterator_t<_R2>, + std::ranges::borrowed_iterator_t<_OutRange>> +__brick_set_symmetric_difference(_R1&& __r1, _R2&& __r2, _OutRange&& __out_r, _Comp __comp, _Proj1 __proj1, + _Proj2 __proj2, + /*__is_vector=*/std::false_type) noexcept +{ + return __serial_set_symmetric_difference(std::forward<_R1>(__r1), std::forward<_R2>(__r2), + std::forward<_OutRange>(__out_r), __comp, __proj1, __proj2); +} + +template +std::ranges::set_symmetric_difference_result, + std::ranges::borrowed_iterator_t<_R2>, + std::ranges::borrowed_iterator_t<_OutRange>> +__brick_set_symmetric_difference(_R1&& __r1, _R2&& __r2, _OutRange&& __out_r, _Comp __comp, _Proj1 __proj1, + _Proj2 __proj2, + /*__is_vector=*/std::true_type) noexcept +{ + _PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial"); + return __serial_set_symmetric_difference(std::forward<_R1>(__r1), std::forward<_R2>(__r2), + std::forward<_OutRange>(__out_r), __comp, __proj1, __proj2); +} + +template +using __pattern_set_symmetric_difference_return_t = + std::ranges::set_symmetric_difference_result, + std::ranges::borrowed_iterator_t<_R2>, + std::ranges::borrowed_iterator_t<_OutRange>>; + +template +__pattern_set_symmetric_difference_return_t<_R1, _R2, _OutRange> +__pattern_set_symmetric_difference(_Tag __tag, _ExecutionPolicy&& __exec, _R1&& __r1, _R2&& __r2, _OutRange&& __out_r, + _Comp __comp, _Proj1 __proj1, _Proj2 __proj2) +{ + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __brick_set_symmetric_difference(std::forward<_R1>(__r1), std::forward<_R2>(__r2), + std::forward<_OutRange>(__out_r), __comp, __proj1, __proj2, + typename _Tag::__is_vector{}); +} + +template +__pattern_set_symmetric_difference_return_t<_R1, _R2, _OutRange> +__pattern_set_symmetric_difference(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _R1&& __r1, _R2&& __r2, + _OutRange&& __out_r, _Comp __comp, _Proj1 __proj1, _Proj2 __proj2) +{ + using _RandomAccessIterator1 = std::ranges::iterator_t<_R1>; + using _RandomAccessIterator2 = std::ranges::iterator_t<_R2>; + using _Tp = std::ranges::range_value_t<_OutRange>; + + const auto __n1 = std::ranges::size(__r1); + const auto __n2 = std::ranges::size(__r2); + + auto __first1 = std::ranges::begin(__r1); + auto __last1 = __first1 + __n1; + auto __first2 = std::ranges::begin(__r2); + auto __last2 = __first2 + __n2; + auto __result = std::ranges::begin(__out_r); + + // use serial algorithm + if (__n1 + __n2 <= oneapi::dpl::__internal::__set_algo_cut_off) + return __serial_set_symmetric_difference(std::forward<_R1>(__r1), std::forward<_R2>(__r2), + std::forward<_OutRange>(__out_r), __comp, __proj1, __proj2); + + auto __out_last = oneapi::dpl::__internal::__parallel_set_union_op( + __tag, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, + [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, + _RandomAccessIterator2 __last2, _Tp* __result, _Comp __comp, _Proj1 __proj1, _Proj2 __proj2) { + return oneapi::dpl::__utils::__set_symmetric_difference_construct( + __first1, __last1, __first2, __last2, __result, __comp, + oneapi::dpl::__internal::__BrickCopyConstruct<_IsVector>(), __proj1, __proj2); + }, + __proj1, __proj2); + + return __pattern_set_symmetric_difference_return_t<_R1, _R2, _OutRange>{__last1, __last2, __out_last}; +} + //--------------------------------------------------------------------------------------------------------------------- // __pattern_mismatch //--------------------------------------------------------------------------------------------------------------------- diff --git a/include/oneapi/dpl/pstl/glue_algorithm_ranges_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_ranges_impl.h index 2fad4ae655b..ebcecff3672 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_ranges_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_ranges_impl.h @@ -795,6 +795,153 @@ struct __merge_fn inline constexpr __internal::__merge_fn merge; +// [includes] +namespace __internal +{ +struct __includes_fn +{ + template , _Proj1>, + std::projected, _Proj2>> + _Comp = std::ranges::less> + requires oneapi::dpl::is_execution_policy_v> && + std::ranges::sized_range<_R1> && std::ranges::sized_range<_R2> + + bool + operator()(_ExecutionPolicy&& __exec, _R1&& __r1, _R2&& __r2, _Comp __comp = {}, _Proj1 __proj1 = {}, + _Proj2 __proj2 = {}) const + { + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec); + return oneapi::dpl::__internal::__ranges::__pattern_includes( + __dispatch_tag, std::forward<_ExecutionPolicy>(__exec), std::forward<_R1>(__r1), std::forward<_R2>(__r2), + __comp, __proj1, __proj2); + } +}; //__includes_fn +} // namespace __internal + +inline constexpr __internal::__includes_fn includes; + +// [set.union] +namespace __internal +{ +struct __set_union_fn +{ + template + requires oneapi::dpl::is_execution_policy_v> && + std::ranges::sized_range<_R1> && std::ranges::sized_range<_R2> && + std::ranges::sized_range<_OutRange> && + std::mergeable, std::ranges::iterator_t<_R2>, + std::ranges::iterator_t<_OutRange>, _Comp, _Proj1, _Proj2> + + std::ranges::set_union_result, std::ranges::borrowed_iterator_t<_R2>, + std::ranges::borrowed_iterator_t<_OutRange>> + operator()(_ExecutionPolicy&& __exec, _R1&& __r1, _R2&& __r2, _OutRange&& __out_r, _Comp __comp = {}, + _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const + { + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec); + return oneapi::dpl::__internal::__ranges::__pattern_set_union( + __dispatch_tag, std::forward<_ExecutionPolicy>(__exec), std::forward<_R1>(__r1), std::forward<_R2>(__r2), + std::forward<_OutRange>(__out_r), __comp, __proj1, __proj2); + } +}; //__set_union_fn +} // namespace __internal + +inline constexpr __internal::__set_union_fn set_union; + +// [set.intersection] + +namespace __internal +{ +struct __set_intersection_fn +{ + template + requires oneapi::dpl::is_execution_policy_v> && + std::ranges::sized_range<_R1> && std::ranges::sized_range<_R2> && + std::ranges::sized_range<_OutRange> && + std::mergeable, std::ranges::iterator_t<_R2>, + std::ranges::iterator_t<_OutRange>, _Comp, _Proj1, _Proj2> + + std::ranges::set_intersection_result, std::ranges::borrowed_iterator_t<_R2>, + std::ranges::borrowed_iterator_t<_OutRange>> + operator()(_ExecutionPolicy&& __exec, _R1&& __r1, _R2&& __r2, _OutRange&& __out_r, _Comp __comp = {}, + _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const + { + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec); + return oneapi::dpl::__internal::__ranges::__pattern_set_intersection( + __dispatch_tag, std::forward<_ExecutionPolicy>(__exec), std::forward<_R1>(__r1), std::forward<_R2>(__r2), + std::forward<_OutRange>(__out_r), __comp, __proj1, __proj2); + } +}; //__set_intersection_fn +} // namespace __internal + +inline constexpr __internal::__set_intersection_fn set_intersection; + +// [set.difference] + +namespace __internal +{ +struct __set_difference_fn +{ + template + requires oneapi::dpl::is_execution_policy_v> && + std::ranges::sized_range<_R1> && std::ranges::sized_range<_R2> && + std::ranges::sized_range<_OutRange> && + std::mergeable, std::ranges::iterator_t<_R2>, + std::ranges::iterator_t<_OutRange>, _Comp, _Proj1, _Proj2> + + std::ranges::set_difference_result, + std::ranges::borrowed_iterator_t<_OutRange>> + operator()(_ExecutionPolicy&& __exec, _R1&& __r1, _R2&& __r2, _OutRange&& __out_r, _Comp __comp = {}, + _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const + { + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec); + return oneapi::dpl::__internal::__ranges::__pattern_set_difference( + __dispatch_tag, std::forward<_ExecutionPolicy>(__exec), std::forward<_R1>(__r1), std::forward<_R2>(__r2), + std::forward<_OutRange>(__out_r), __comp, __proj1, __proj2); + } +}; //__set_difference_fn +} // namespace __internal + +inline constexpr __internal::__set_difference_fn set_difference; + +// [set.symmetric.difference] + +namespace __internal +{ +struct __set_symmetric_difference_fn +{ + template + requires oneapi::dpl::is_execution_policy_v> && + std::ranges::sized_range<_R1> && std::ranges::sized_range<_R2> && + std::ranges::sized_range<_OutRange> && + std::mergeable, std::ranges::iterator_t<_R2>, + std::ranges::iterator_t<_OutRange>, _Comp, _Proj1, _Proj2> + + std::ranges::set_symmetric_difference_result, + std::ranges::borrowed_iterator_t<_R2>, + std::ranges::borrowed_iterator_t<_OutRange>> + operator()(_ExecutionPolicy&& __exec, _R1&& __r1, _R2&& __r2, _OutRange&& __out_r, _Comp __comp = {}, + _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const + { + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec); + return oneapi::dpl::__internal::__ranges::__pattern_set_symmetric_difference( + __dispatch_tag, std::forward<_ExecutionPolicy>(__exec), std::forward<_R1>(__r1), std::forward<_R2>(__r2), + std::forward<_OutRange>(__out_r), __comp, __proj1, __proj2); + } +}; //__set_symmetric_difference_fn +} // namespace __internal + +inline constexpr __internal::__set_symmetric_difference_fn set_symmetric_difference; + // [alg.fill] namespace __internal diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 2974f6e6532..071607c38a8 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -1475,7 +1475,7 @@ __pattern_includes(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Forwar if (__n1 == 0 || __n2 > __n1) return false; - using __brick_include_type = unseq_backend::__brick_includes<_Compare, decltype(__n1), decltype(__n2)>; + using __brick_includes_t = unseq_backend::__brick_includes; using _TagType = __par_backend_hetero::__parallel_or_tag; using __size_calc = oneapi::dpl::__ranges::__first_size_calc; @@ -1484,7 +1484,7 @@ __pattern_includes(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Forwar auto __buf2 = __keep(__first2, __last2); return !oneapi::dpl::__par_backend_hetero::__parallel_find_or(_BackendTag{}, std::forward<_ExecutionPolicy>(__exec), - __brick_include_type{__comp, __n1, __n2}, _TagType{}, + __brick_includes_t{__n2, __n1, __comp}, _TagType{}, __size_calc{}, __buf2.all_view(), __buf1.all_view()); } diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h index c1b9f2b36a2..412378888cd 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h @@ -655,8 +655,8 @@ __pattern_copy_if(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range1& template auto -__pattern_copy_if_ranges(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _InRange&& __in_r, _OutRange&& __out_r, - _Pred __pred, _Proj __proj) +__pattern_copy_if_ranges(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _InRange&& __in_r, + _OutRange&& __out_r, _Pred __pred, _Proj __proj) { oneapi::dpl::__internal::__unary_op<_Pred, _Proj> __pred_1{__pred, __proj}; @@ -953,6 +953,357 @@ __pattern_merge_ranges(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exe return __return_t{std::ranges::begin(__r1) + __res.first, std::ranges::begin(__r2) + __res.second, std::ranges::begin(__out_r) + __n_out}; } + +template +bool +__pattern_includes(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _R1&& __r1, _R2&& __r2, _Comp __comp, + _Proj1 __proj1, _Proj2 __proj2) +{ + //according to the spec + if (std::ranges::empty(__r2)) + return true; + + const auto __n1 = std::ranges::size(__r1); + const auto __n2 = std::ranges::size(__r2); + + //optimization; {1} - the first sequence, {2} - the second sequence + //{1} is empty or size_of{2} > size_of{1} + if (std::ranges::empty(__r1) || __n2 > __n1) + return false; + + using __brick_includes_t = unseq_backend::__brick_includes; + using _TagType = __par_backend_hetero::__parallel_or_tag; + using __size_calc = oneapi::dpl::__ranges::__first_size_calc; + + return !oneapi::dpl::__par_backend_hetero::__parallel_find_or( + _BackendTag{}, std::forward<_ExecutionPolicy>(__exec), __brick_includes_t{__n2, __n1, __comp, __proj2, __proj1}, + _TagType{}, __size_calc{}, oneapi::dpl::__ranges::views::all_read(__r2), + oneapi::dpl::__ranges::views::all_read(__r1)); +} + +//Dummy names to avoid kernel problems +template +struct __set_union_copy_case_1; + +template +struct __set_union_copy_case_2; + +template +struct __set_union_scan_then_propagate; + +template +std::ranges::set_union_result, std::ranges::borrowed_iterator_t<_R2>, + std::ranges::borrowed_iterator_t<_OutRange>> +__pattern_set_union(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _R1&& __r1, _R2&& __r2, + _OutRange&& __out_r, _Comp __comp, _Proj1 __proj1, _Proj2 __proj2) +{ + const auto __first1 = std::ranges::begin(__r1); + const auto __first2 = std::ranges::begin(__r2); + const auto __result = std::ranges::begin(__out_r); + + if (__r1.empty() && __r2.empty()) + return {__first1, __first2, __result}; + + const auto __sz1 = std::ranges::size(__r1); + const auto __sz2 = std::ranges::size(__r2); + + //{1} is empty + if (__r1.empty()) + { + const auto __idx = oneapi::dpl::__internal::__ranges::__pattern_walk_n( + __tag, + oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_union_copy_case_1>( + std::forward<_ExecutionPolicy>(__exec)), + oneapi::dpl::__internal::__brick_copy<__hetero_tag<_BackendTag>>{}, + oneapi::dpl::__ranges::views::all_read(std::forward<_R2>(__r2)), + oneapi::dpl::__ranges::views::all_write(std::forward<_OutRange>(__out_r))); + + return {__first1, __first2 + __sz2, __result + __idx}; + } + + //{2} is empty + if (__r2.empty()) + { + const auto __idx = oneapi::dpl::__internal::__ranges::__pattern_walk_n( + __tag, + oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_union_copy_case_2>( + std::forward<_ExecutionPolicy>(__exec)), + oneapi::dpl::__internal::__brick_copy<__hetero_tag<_BackendTag>>{}, + oneapi::dpl::__ranges::views::all_read(std::forward<_R1>(__r1)), + oneapi::dpl::__ranges::views::all_write(std::forward<_OutRange>(__out_r))); + + return {__first1 + __sz1, __first2, __result + __idx}; + } + + if (__par_backend_hetero::__can_set_op_write_from_set_b(_BackendTag{}, __exec)) + { + const auto __idx = oneapi::dpl::__par_backend_hetero::__parallel_set_op( + _BackendTag{}, std::forward<_ExecutionPolicy>(__exec), + oneapi::dpl::__ranges::views::all_read(std::forward<_R1>(__r1)), + oneapi::dpl::__ranges::views::all_read(std::forward<_R2>(__r2)), + oneapi::dpl::__ranges::views::all_write(std::forward<_OutRange>(__out_r)), __comp, + unseq_backend::_UnionTag(), __proj1, __proj2) + .get(); + + return {__first1 + __sz1, __first2 + __sz2, __result + __idx}; + } + + using _ValueType = oneapi::dpl::__internal::__value_t<_R2>; + + // temporary buffer to store intermediate result + const auto __n2 = __r2.size(); + oneapi::dpl::__par_backend_hetero::__buffer<_ValueType> __diff(__n2); + auto __buf = oneapi::dpl::__ranges::views::all(__diff.get_buffer()); + + //1. Calc difference {2} \ {1} + const auto __n_diff = + oneapi::dpl::__par_backend_hetero::__parallel_set_op( + _BackendTag{}, + oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_union_scan_then_propagate>(__exec), + oneapi::dpl::__ranges::views::all_read(std::forward<_R2>(__r2)), + oneapi::dpl::__ranges::views::all_read(std::forward<_R1>(__r1)), __buf, __comp, + unseq_backend::_DifferenceTag(), __proj2, __proj1) + .get(); + + //2. Merge {1} and the difference + const auto __res = oneapi::dpl::__internal::__ranges::__pattern_merge( + __tag, + oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_union_copy_case_2>( + std::forward<_ExecutionPolicy>(__exec)), + oneapi::dpl::__ranges::views::all_read(std::forward<_R1>(__r1)), + oneapi::dpl::__ranges::take_view_simple(__buf, __n_diff), + oneapi::dpl::__ranges::views::all_write(std::forward<_OutRange>(__out_r)), __comp, __proj1, __proj2); + + const auto __idx = __res.first + __res.second; + return {__first1 + __sz1, __first2 + __sz2, __result + __idx}; +} + +template +struct __set_intersection_scan_then_propagate; + +template +std::ranges::set_intersection_result, std::ranges::borrowed_iterator_t<_R2>, + std::ranges::borrowed_iterator_t<_OutRange>> +__pattern_set_intersection(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _R1&& __r1, _R2&& __r2, + _OutRange&& __out_r, _Comp __comp, _Proj1 __proj1, _Proj2 __proj2) +{ + const auto __first1 = std::ranges::begin(__r1); + const auto __first2 = std::ranges::begin(__r2); + const auto __result = std::ranges::begin(__out_r); + + // intersection is empty + if (__r1.empty() || __r2.empty()) + return {__first1 + std::ranges::size(__r1), __first2 + std::ranges::size(__r2), __result}; + + const auto __sz1 = std::ranges::size(__r1); + const auto __sz2 = std::ranges::size(__r2); + + if (__par_backend_hetero::__can_set_op_write_from_set_b(_BackendTag{}, __exec)) + { + const auto __idx = oneapi::dpl::__par_backend_hetero::__parallel_set_op( + _BackendTag{}, std::forward<_ExecutionPolicy>(__exec), + oneapi::dpl::__ranges::views::all_read(std::forward<_R1>(__r1)), + oneapi::dpl::__ranges::views::all_read(std::forward<_R2>(__r2)), + oneapi::dpl::__ranges::views::all_write(std::forward<_OutRange>(__out_r)), __comp, + unseq_backend::_IntersectionTag(), __proj1, __proj2) + .get(); + + return {__first1 + __sz1, __first2 + __sz2, __result + __idx}; + } + + const auto __idx = + oneapi::dpl::__par_backend_hetero::__parallel_set_op( + _BackendTag{}, + oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_intersection_scan_then_propagate>( + std::forward<_ExecutionPolicy>(__exec)), + oneapi::dpl::__ranges::views::all_read(std::forward<_R1>(__r1)), + oneapi::dpl::__ranges::views::all_read(std::forward<_R2>(__r2)), + oneapi::dpl::__ranges::views::all_write(std::forward<_OutRange>(__out_r)), __comp, + unseq_backend::_IntersectionTag(), __proj1, __proj2) + .get(); + + return {__first1 + __sz1, __first2 + __sz2, __result + __idx}; +} + +//Dummy names to avoid kernel problems +template +struct __set_difference_copy_case_1; + +template +struct __set_difference_scan_then_propagate; + +template +std::ranges::set_difference_result, std::ranges::borrowed_iterator_t<_OutRange>> +__pattern_set_difference(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _R1&& __r1, _R2&& __r2, + _OutRange&& __out_r, _Comp __comp, _Proj1 __proj1, _Proj2 __proj2) +{ + const auto __first1 = std::ranges::begin(__r1); + const auto __result = std::ranges::begin(__out_r); + + // {} \ {2}: the difference is empty + if (__r1.empty()) + return {__first1, __result}; + + const auto __sz1 = std::ranges::size(__r1); + + // {1} \ {}: the difference is {1} + if (__r2.empty()) + { + const auto __idx = oneapi::dpl::__internal::__ranges::__pattern_walk_n( + __tag, + oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_difference_copy_case_1>( + std::forward<_ExecutionPolicy>(__exec)), + oneapi::dpl::__internal::__brick_copy<__hetero_tag<_BackendTag>>{}, + oneapi::dpl::__ranges::views::all_read(std::forward<_R1>(__r1)), + oneapi::dpl::__ranges::views::all_write(std::forward<_OutRange>(__out_r))); + + return {__first1 + __sz1, __result + __idx}; + } + + if (__par_backend_hetero::__can_set_op_write_from_set_b(_BackendTag{}, __exec)) + { + const auto __idx = + oneapi::dpl::__par_backend_hetero::__parallel_set_op( + _BackendTag{}, + oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_difference_scan_then_propagate>( + std::forward<_ExecutionPolicy>(__exec)), + oneapi::dpl::__ranges::views::all_read(std::forward<_R1>(__r1)), + oneapi::dpl::__ranges::views::all_read(std::forward<_R2>(__r2)), + oneapi::dpl::__ranges::views::all_write(std::forward<_OutRange>(__out_r)), __comp, + unseq_backend::_DifferenceTag(), __proj1, __proj2) + .get(); + + return {__first1 + __sz1, __result + __idx}; + } + + const auto __idx = __par_backend_hetero::__parallel_set_op( + _BackendTag{}, std::forward<_ExecutionPolicy>(__exec), + oneapi::dpl::__ranges::views::all_read(std::forward<_R1>(__r1)), + oneapi::dpl::__ranges::views::all_read(std::forward<_R2>(__r2)), + oneapi::dpl::__ranges::views::all_write(std::forward<_OutRange>(__out_r)), __comp, + unseq_backend::_DifferenceTag(), __proj1, __proj2) + .get(); + + return {__first1 + __sz1, __result + __idx}; +} + +//Dummy names to avoid kernel problems +template +struct __set_symmetric_difference_copy_case_1; + +template +struct __set_symmetric_difference_copy_case_2; + +template +struct __set_symmetric_difference_phase_1; + +template +struct __set_symmetric_difference_phase_2; + +template +std::ranges::set_symmetric_difference_result, + std::ranges::borrowed_iterator_t<_R2>, + std::ranges::borrowed_iterator_t<_OutRange>> +__pattern_set_symmetric_difference(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _R1&& __r1, _R2&& __r2, + _OutRange&& __out_r, _Comp __comp, _Proj1 __proj1, _Proj2 __proj2) +{ + const auto __first1 = std::ranges::begin(__r1); + const auto __first2 = std::ranges::begin(__r2); + const auto __result = std::ranges::begin(__out_r); + + if (__r1.empty() && __r2.empty()) + return {__first1, __first2, __result}; + + const auto __n1 = std::ranges::size(__r1); + const auto __n2 = std::ranges::size(__r2); + + //{1} is empty + if (__r1.empty()) + { + const auto __idx = oneapi::dpl::__internal::__ranges::__pattern_walk_n( + __tag, + oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_symmetric_difference_copy_case_1>( + std::forward<_ExecutionPolicy>(__exec)), + oneapi::dpl::__internal::__brick_copy<__hetero_tag<_BackendTag>>{}, + oneapi::dpl::__ranges::views::all_read(std::forward<_R2>(__r2)), + oneapi::dpl::__ranges::views::all_write(std::forward<_OutRange>(__out_r))); + + return {__first1, __first2 + __n2, __result + __idx}; + } + + //{2} is empty + if (__r2.empty()) + { + const auto __idx = oneapi::dpl::__internal::__ranges::__pattern_walk_n( + __tag, + oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_symmetric_difference_copy_case_2>( + std::forward<_ExecutionPolicy>(__exec)), + oneapi::dpl::__internal::__brick_copy<__hetero_tag<_BackendTag>>{}, + oneapi::dpl::__ranges::views::all_read(std::forward<_R1>(__r1)), + oneapi::dpl::__ranges::views::all_write(std::forward<_OutRange>(__out_r))); + + return {__first1 + __n1, __first2, __result + __idx}; + } + + if (__par_backend_hetero::__can_set_op_write_from_set_b(_BackendTag{}, __exec)) + { + const auto __idx = + oneapi::dpl::__par_backend_hetero::__parallel_set_op( + _BackendTag{}, + oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_difference_scan_then_propagate>( + std::forward<_ExecutionPolicy>(__exec)), + oneapi::dpl::__ranges::views::all_read(std::forward<_R1>(__r1)), + oneapi::dpl::__ranges::views::all_read(std::forward<_R2>(__r2)), + oneapi::dpl::__ranges::views::all_write(std::forward<_OutRange>(__out_r)), __comp, + unseq_backend::_SymmetricDifferenceTag(), __proj1, __proj2) + .get(); + + return {__first1 + __n1, __first2 + __n2, __result + __idx}; + } + + using _ValueType1 = oneapi::dpl::__internal::__value_t<_R1>; + using _ValueType2 = oneapi::dpl::__internal::__value_t<_R2>; + + // temporary buffers to store intermediate result + oneapi::dpl::__par_backend_hetero::__buffer<_ValueType1> __diff_1(__n1); + auto __buf_1 = oneapi::dpl::__ranges::views::all(__diff_1.get_buffer()); + oneapi::dpl::__par_backend_hetero::__buffer<_ValueType2> __diff_2(__n2); + auto __buf_2 = oneapi::dpl::__ranges::views::all(__diff_2.get_buffer()); + + //1. Calc difference {1} \ {2} + const auto __n_diff_1 = + oneapi::dpl::__par_backend_hetero::__parallel_set_op( + _BackendTag{}, + oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_symmetric_difference_phase_1>(__exec), + oneapi::dpl::__ranges::views::all_read(std::forward<_R1>(__r1)), + oneapi::dpl::__ranges::views::all_read(std::forward<_R2>(__r2)), __buf_1, __comp, + unseq_backend::_DifferenceTag(), __proj1, __proj2) + .get(); + + //2. Calc difference {2} \ {1} + const auto __n_diff_2 = + oneapi::dpl::__par_backend_hetero::__parallel_set_op( + _BackendTag{}, + oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_symmetric_difference_phase_2>(__exec), + oneapi::dpl::__ranges::views::all_read(std::forward<_R2>(__r2)), + oneapi::dpl::__ranges::views::all_read(std::forward<_R1>(__r1)), __buf_2, __comp, + unseq_backend::_DifferenceTag(), __proj2, __proj1) + .get(); + + //3. Merge the differences + oneapi::dpl::__internal::__ranges::__pattern_merge( + __tag, std::forward<_ExecutionPolicy>(__exec), oneapi::dpl::__ranges::take_view_simple(__buf_1, __n_diff_1), + oneapi::dpl::__ranges::take_view_simple(__buf_2, __n_diff_2), oneapi::dpl::__ranges::views::all_write(__out_r), + __comp, __proj1, __proj2); + + return {__first1 + __n1, __first2 + __n2, __result + __n_diff_1 + __n_diff_2}; +} + #endif //_ONEDPL_CPP20_RANGES_PRESENT //------------------------------------------------------------------------ diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index 962ec26a5b8..ad3620e39c7 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -1047,10 +1047,10 @@ __parallel_set_reduce_then_scan_set_a_write(sycl::queue& __q, _Range1&& __rng1, // balanced path template + typename _SetTag, typename _Proj1 = oneapi::dpl::identity, typename _Proj2 = oneapi::dpl::identity> __future>> __parallel_set_reduce_then_scan(sycl::queue& __q, _Range1&& __rng1, _Range2&& __rng2, _Range3&& __result, - _Compare __comp, _SetTag) + _Compare __comp, _SetTag, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) { constexpr std::uint16_t __diagonal_spacing = 32; @@ -1063,10 +1063,11 @@ __parallel_set_reduce_then_scan(sycl::queue& __q, _Range1&& __rng1, _Range2&& __ using _ReduceOp = std::plus<_Size>; using _BoundsProvider = oneapi::dpl::__par_backend_hetero::__get_bounds_partitioned; - using _GenReduceInput = - oneapi::dpl::__par_backend_hetero::__gen_set_balanced_path<_SetOperation, _BoundsProvider, _Compare>; + using _GenReduceInput = oneapi::dpl::__par_backend_hetero::__gen_set_balanced_path<_SetOperation, _BoundsProvider, + _Compare, _Proj1, _Proj2>; using _GenScanInput = - oneapi::dpl::__par_backend_hetero::__gen_set_op_from_known_balanced_path<_SetOperation, _TempData, _Compare>; + oneapi::dpl::__par_backend_hetero::__gen_set_op_from_known_balanced_path<_SetOperation, _TempData, _Compare, + _Proj1, _Proj2>; using _ScanInputTransform = oneapi::dpl::__par_backend_hetero::__get_zeroth_element; using _WriteOp = oneapi::dpl::__par_backend_hetero::__write_multiple_to_id; @@ -1089,7 +1090,7 @@ __parallel_set_reduce_then_scan(sycl::queue& __q, _Range1&& __rng1, _Range2&& __ _GenReduceInput __gen_reduce_input{_SetOperation{}, __diagonal_spacing, _BoundsProvider{__diagonal_spacing, __partition_size, __partition_threshold}, - __comp}; + __comp, __proj1, __proj2}; constexpr std::uint32_t __bytes_per_work_item_iter = __average_input_ele_size * (__diagonal_spacing + 1) + sizeof(_TemporaryType); @@ -1107,16 +1108,16 @@ __parallel_set_reduce_then_scan(sycl::queue& __q, _Range1&& __rng1, _Range2&& __ } return __parallel_transform_reduce_then_scan<__bytes_per_work_item_iter, _CustomName>( __q, __num_diagonals, std::move(__in_in_tmp_rng), std::forward<_Range3>(__result), __gen_reduce_input, - _ReduceOp{}, _GenScanInput{_SetOperation{}, __diagonal_spacing, __comp}, _ScanInputTransform{}, _WriteOp{}, - oneapi::dpl::unseq_backend::__no_init_value<_Size>{}, /*_Inclusive=*/std::true_type{}, - /*__is_unique_pattern=*/std::false_type{}, __partition_event); + _ReduceOp{}, _GenScanInput{_SetOperation{}, __diagonal_spacing, __comp, __proj1, __proj2}, + _ScanInputTransform{}, _WriteOp{}, oneapi::dpl::unseq_backend::__no_init_value<_Size>{}, + /*_Inclusive=*/std::true_type{}, /*__is_unique_pattern=*/std::false_type{}, __partition_event); } template + typename _IsOpDifference, typename _Proj1 = oneapi::dpl::identity, typename _Proj2 = oneapi::dpl::identity> __future>> __parallel_set_scan(sycl::queue& __q, _Range1&& __rng1, _Range2&& __rng2, _Range3&& __result, _Compare __comp, - _IsOpDifference) + _IsOpDifference, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) { using _Size1 = oneapi::dpl::__internal::__difference_t<_Range1>; using _Size2 = oneapi::dpl::__internal::__difference_t<_Range2>; @@ -1137,7 +1138,8 @@ __parallel_set_scan(sycl::queue& __q, _Range1&& __rng1, _Range2&& __rng2, _Range _DataAcc __get_data_op; unseq_backend::__copy_by_mask<_ReduceOp, oneapi::dpl::__internal::__pstl_assign, /*inclusive*/ std::true_type, 2> __copy_by_mask_op; - unseq_backend::__brick_set_op<_Compare, _Size1, _Size2, _IsOpDifference> __create_mask_op{__comp, __n1, __n2}; + unseq_backend::__brick_set_op<_Size1, _Size2, _IsOpDifference, _Compare, _Proj1, _Proj2> __create_mask_op{ + __n1, __n2, __comp, __proj1, __proj2}; // temporary buffer to store boolean mask oneapi::dpl::__par_backend_hetero::__buffer __mask_buf(__n1); @@ -1161,10 +1163,11 @@ __parallel_set_scan(sycl::queue& __q, _Range1&& __rng1, _Range2&& __rng2, _Range } template + typename _SetTag, typename _Proj1 = oneapi::dpl::identity, typename _Proj2 = oneapi::dpl::identity> __future>> __parallel_set_op(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Range1&& __rng1, - _Range2&& __rng2, _Range3&& __result, _Compare __comp, _SetTag __set_tag) + _Range2&& __rng2, _Range3&& __result, _Compare __comp, _SetTag __set_tag, _Proj1 __proj1 = {}, + _Proj2 __proj2 = {}) { using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>; @@ -1172,14 +1175,14 @@ __parallel_set_op(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolic if constexpr (_SetTag::__can_write_from_rng2_v) { - return __parallel_set_reduce_then_scan<_CustomName>(__q_local, std::forward<_Range1>(__rng1), - std::forward<_Range2>(__rng2), - std::forward<_Range3>(__result), __comp, __set_tag); + return __parallel_set_reduce_then_scan<_CustomName>( + __q_local, std::forward<_Range1>(__rng1), std::forward<_Range2>(__rng2), std::forward<_Range3>(__result), + __comp, __set_tag, __proj1, __proj2); } else { return __parallel_set_scan<_CustomName>(__q_local, std::forward<_Range1>(__rng1), std::forward<_Range2>(__rng2), - std::forward<_Range3>(__result), __comp, __set_tag); + std::forward<_Range3>(__result), __comp, __set_tag, __proj1, __proj2); } } diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_for.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_for.h index c8562659850..d329a6dcd35 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_for.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_for.h @@ -83,8 +83,7 @@ struct __parallel_for_small_submitter<__internal::__optional_kernel_name<_Name.. __future operator()(sycl::queue& __q, _Fp __brick, _Index __count, _Ranges&&... __rngs) const { - assert(std::min({std::make_unsigned_t...>>( - __rngs.size())...}) > 0); + assert(oneapi::dpl::__ranges::__min_size_calc{}(__rngs...) > 0); assert(__count > 0); _PRINT_INFO_IN_DEBUG_MODE(__q); @@ -235,8 +234,7 @@ __parallel_for(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __has_pfor_brick_members_v<_Fp>, "The brick provided to __parallel_for must define const / constexpr static bool members __can_vectorize and " "__can_process_multiple_iters which must be evaluated at compile time."); - assert(std::min({std::make_unsigned_t...>>( - __rngs.size())...}) > 0); + assert(oneapi::dpl::__ranges::__min_size_calc{}(__rngs...) > 0); assert(__count > 0); using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>; diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h index dc2fa0b6dfc..c36a829c876 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h @@ -60,8 +60,7 @@ struct __parallel_for_fpga_submitter<__internal::__optional_kernel_name<_Name... __future operator()(sycl::queue& __q, _Fp __brick, _Index __count, _Ranges&&... __rngs) const { - assert(std::min({std::make_unsigned_t...>>( - __rngs.size())...}) > 0); + assert(oneapi::dpl::__ranges::__min_size_calc{}(__rngs...) > 0); assert(__count > 0); _PRINT_INFO_IN_DEBUG_MODE(__q); diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge.h index 5911141f4c0..9162632c2a3 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge.h @@ -194,7 +194,8 @@ __serial_merge(const _Rng1& __rng1, const _Rng2& __rng2, _Rng3& __rng3, const _I { // TODO required to understand why the usual if-else is slower then ternary operator if (!__rng1_idx_less_n1 || (__rng1_idx_less_n1 && __rng2_idx_less_n2 && - __comp(__proj2(__rng2[__rng2_idx]), __proj1(__rng1[__rng1_idx])))) + std::invoke(__comp, std::invoke(__proj2, __rng2[__rng2_idx]), + std::invoke(__proj1, __rng1[__rng1_idx])))) __rng3[__rng3_idx] = __rng2[__rng2_idx++]; else __rng3[__rng3_idx] = __rng1[__rng1_idx++]; diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce_then_scan.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce_then_scan.h index b5b64bfee5a..5f42e0e5bcb 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce_then_scan.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce_then_scan.h @@ -431,11 +431,13 @@ struct __gen_set_mask // Returns by reference: iterations consumed, and the number of elements copied to temp output. template + typename _InRng2, typename _SizeType, typename _TempOutput, typename _Compare, + typename _Proj1 = oneapi::dpl::identity, typename _Proj2 = oneapi::dpl::identity> void __set_generic_operation_iteration(const _InRng1& __in_rng1, const _InRng2& __in_rng2, std::size_t& __idx1, std::size_t& __idx2, const _SizeType __num_eles_min, _TempOutput& __temp_out, - _SizeType& __idx, std::uint16_t& __count, const _Compare __comp) + _SizeType& __idx, std::uint16_t& __count, const _Compare __comp, _Proj1 __proj1 = {}, + _Proj2 __proj2 = {}) { using _ValueTypeRng1 = typename oneapi::dpl::__internal::__value_t<_InRng1>; using _ValueTypeRng2 = typename oneapi::dpl::__internal::__value_t<_InRng2>; @@ -474,7 +476,7 @@ __set_generic_operation_iteration(const _InRng1& __in_rng1, const _InRng2& __in_ const _ValueTypeRng1& __ele_rng1 = __in_rng1[__idx1]; const _ValueTypeRng2& __ele_rng2 = __in_rng2[__idx2]; - if (__comp(__ele_rng1, __ele_rng2)) + if (std::invoke(__comp, std::invoke(__proj1, __ele_rng1), std::invoke(__proj2, __ele_rng2))) { if constexpr (_CopyDiffSetA) { @@ -484,7 +486,7 @@ __set_generic_operation_iteration(const _InRng1& __in_rng1, const _InRng2& __in_ ++__idx1; ++__idx; } - else if (__comp(__ele_rng2, __ele_rng1)) + else if (std::invoke(__comp, std::invoke(__proj2, __ele_rng2), std::invoke(__proj1, __ele_rng1))) { if constexpr (_CopyDiffSetB) { @@ -512,10 +514,12 @@ __set_generic_operation_iteration(const _InRng1& __in_rng1, const _InRng2& __in_ template struct __set_generic_operation { - template + template std::uint16_t operator()(const _InRng1& __in_rng1, const _InRng2& __in_rng2, std::size_t __idx1, std::size_t __idx2, - const _SizeType __num_eles_min, _TempOutput& __temp_out, const _Compare __comp) const + const _SizeType __num_eles_min, _TempOutput& __temp_out, const _Compare __comp, _Proj1 __proj1 = {}, + _Proj2 __proj2 = {}) const { std::uint16_t __count = 0; @@ -529,7 +533,8 @@ struct __set_generic_operation { // no bounds checking __set_generic_operation_iteration<_CopyMatch, _CopyDiffSetA, _CopyDiffSetB, false>( - __in_rng1, __in_rng2, __idx1, __idx2, __num_eles_min, __temp_out, __idx, __count, __comp); + __in_rng1, __in_rng2, __idx1, __idx2, __num_eles_min, __temp_out, __idx, __count, __comp, __proj1, + __proj2); } } else @@ -538,7 +543,8 @@ struct __set_generic_operation { //bounds check all __set_generic_operation_iteration<_CopyMatch, _CopyDiffSetA, _CopyDiffSetB, true>( - __in_rng1, __in_rng2, __idx1, __idx2, __num_eles_min, __temp_out, __idx, __count, __comp); + __in_rng1, __in_rng2, __idx1, __idx2, __num_eles_min, __temp_out, __idx, __count, __comp, __proj1, + __proj2); } } return __count; @@ -672,7 +678,8 @@ struct __get_bounds_simple // Reduce then scan building block for set balanced path which is used in the reduction kernel to calculate the // balanced path intersection, store it to temporary data with "star" status, then count the number of elements to write // to the output for the reduction operation. -template +template struct __gen_set_balanced_path { using TempData = __noop_temp_data; @@ -696,9 +703,9 @@ struct __gen_set_balanced_path return std::make_tuple(__merge_path_rng1, __merge_path_rng2, false); } - const auto __ele_val = __rng1[__merge_path_rng1 - 1]; + const auto __ele_val_proj = std::invoke(__proj1, __rng1[__merge_path_rng1 - 1]); - if (__comp(__ele_val, __rng2[__merge_path_rng2])) + if (std::invoke(__comp, __ele_val_proj, std::invoke(__proj2, __rng2[__merge_path_rng2]))) { // There is no chance that the balanced path differs from the merge path here, because the previous element of // rng1 does not match the next element of rng2. We can just return the merge path. @@ -707,10 +714,10 @@ struct __gen_set_balanced_path // find first element of repeating sequence in the first set of the previous element _Index __rng1_repeat_start = oneapi::dpl::__internal::__biased_lower_bound( - __rng1, __rng1_begin, __merge_path_rng1, __ele_val, __comp); + __rng1, __rng1_begin, __merge_path_rng1, __ele_val_proj, __comp, __proj1); // find first element of repeating sequence in the second set of the next element _Index __rng2_repeat_start = oneapi::dpl::__internal::__biased_lower_bound( - __rng2, __rng2_begin, __merge_path_rng2, __ele_val, __comp); + __rng2, __rng2_begin, __merge_path_rng2, __ele_val_proj, __comp, __proj2); _Index __rng1_repeats = __merge_path_rng1 - __rng1_repeat_start; _Index __rng2_repeats_bck = __merge_path_rng2 - __rng2_repeat_start; @@ -729,7 +736,7 @@ struct __gen_set_balanced_path _Index __fwd_search_bound = std::min(__merge_path_rng2 + __fwd_search_count, __rng2_end); _Index __balanced_path_intersection_rng2 = oneapi::dpl::__internal::__pstl_upper_bound( - __rng2, __merge_path_rng2, __fwd_search_bound, __ele_val, __comp); + __rng2, __merge_path_rng2, __fwd_search_bound, __ele_val_proj, __comp, __proj2); // Calculate the number of matchable "future" repeats in the second set _Index __matchable_forward_ele_rng2 = __balanced_path_intersection_rng2 - __merge_path_rng2; @@ -766,7 +773,7 @@ struct __gen_set_balanced_path auto [__rng1_lower, __rng1_upper, __rng2_lower, __rng2_upper] = __get_bounds_local(__in_rng, __id); //find merge path intersection auto [__rng1_pos, __rng2_pos] = oneapi::dpl::__par_backend_hetero::__find_start_point( - __rng1, __rng1_lower, __rng1_upper, __rng2, __rng2_lower, __rng2_upper, __i_elem, __comp); + __rng1, __rng1_lower, __rng1_upper, __rng2, __rng2_lower, __rng2_upper, __i_elem, __comp, __proj1, __proj2); //Find balanced path for diagonal start auto [__rng1_balanced_pos, __rng2_balanced_pos, __star] = __find_balanced_path_start_point( @@ -840,19 +847,22 @@ struct __gen_set_balanced_path __rng1.size() + __rng2.size() - _IndexT{__id * __diagonal_spacing - 1}); std::uint16_t __count = __set_op_count(__rng1, __rng2, __rng1_balanced_pos, __rng2_balanced_pos, - __eles_to_process, __temp_data, __comp); + __eles_to_process, __temp_data, __comp, __proj1, __proj2); return __count; } _SetOpCount __set_op_count; std::uint16_t __diagonal_spacing; _BoundsProvider __get_bounds; _Compare __comp; + _Proj1 __proj1; + _Proj2 __proj2; }; // Reduce then scan building block for set balanced path which is used in the scan kernel to decode the stored balanced // path intersection, perform the serial set operation for the diagonal, counting the number of elements and writing // the output to temporary data in registers to be ready for the scan and write operations to follow. -template +template struct __gen_set_op_from_known_balanced_path { using TempData = _TempData; @@ -881,13 +891,16 @@ struct __gen_set_op_from_known_balanced_path static_cast(std::min(static_cast<_SizeType>(__diagonal_spacing - __star_offset), static_cast<_SizeType>(__rng1.size() + __rng2.size() - __i_elem + 1))); - std::uint16_t __count = - __set_op_count(__rng1, __rng2, __rng1_idx, __rng2_idx, __eles_to_process, __output_data, __comp); + std::uint16_t __count = __set_op_count(__rng1, __rng2, __rng1_idx, __rng2_idx, __eles_to_process, __output_data, + __comp, __proj1, __proj2); + return std::make_tuple(std::uint32_t{__count}, __count); } _SetOpCount __set_op_count; std::uint16_t __diagonal_spacing; _Compare __comp; + _Proj1 __proj1; + _Proj2 __proj2; }; // kernel for balanced path to partition the input into tiles by calculating balanced path on diagonals of tile bounds diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h index e0481de1fb2..18b2fffb057 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h @@ -486,10 +486,10 @@ struct __red_by_seg_op; template struct __scan_by_seg_op; -template +template struct __gen_set_balanced_path; -template +template struct __gen_set_op_from_known_balanced_path; template +template struct sycl::is_device_copyable<_ONEDPL_SPECIALIZE_FOR(oneapi::dpl::__par_backend_hetero::__gen_set_balanced_path, - _SetOpCount, _BoundsProvider, _Compare)> - : oneapi::dpl::__internal::__are_all_device_copyable<_SetOpCount, _BoundsProvider, _Compare> + _SetOpCount, _BoundsProvider, _Compare, _Proj1, _Proj2)> + : oneapi::dpl::__internal::__are_all_device_copyable<_SetOpCount, _BoundsProvider, _Compare, _Proj1, _Proj2> { }; -template +template struct sycl::is_device_copyable<_ONEDPL_SPECIALIZE_FOR( - oneapi::dpl::__par_backend_hetero::__gen_set_op_from_known_balanced_path, _SetOpCount, _TempData, _Compare)> - : oneapi::dpl::__internal::__are_all_device_copyable<_SetOpCount, _Compare> + oneapi::dpl::__par_backend_hetero::__gen_set_op_from_known_balanced_path, _SetOpCount, _TempData, _Compare, _Proj1, + _Proj2)> : oneapi::dpl::__internal::__are_all_device_copyable<_SetOpCount, _Compare, _Proj1, _Proj2> { }; @@ -721,10 +721,11 @@ template struct __scan; -template +template struct __brick_includes; -template +template class __brick_set_op; template @@ -838,17 +839,18 @@ struct sycl::is_device_copyable<_ONEDPL_SPECIALIZE_FOR(oneapi::dpl::unseq_backen { }; -template -struct sycl::is_device_copyable<_ONEDPL_SPECIALIZE_FOR(oneapi::dpl::unseq_backend::__brick_includes, _Compare, _Size1, - _Size2)> - : oneapi::dpl::__internal::__are_all_device_copyable<_Compare, _Size1, _Size2> +template +struct sycl::is_device_copyable<_ONEDPL_SPECIALIZE_FOR(oneapi::dpl::unseq_backend::__brick_includes, _Size1, _Size2, + _Compare, _Proj1, _Proj2)> + : oneapi::dpl::__internal::__are_all_device_copyable<_Size1, _Size2, _Compare, _Proj1, _Proj2> { }; -template -struct sycl::is_device_copyable<_ONEDPL_SPECIALIZE_FOR(oneapi::dpl::unseq_backend::__brick_set_op, _Compare, _Size1, - _Size2, _IsOpDifference)> - : oneapi::dpl::__internal::__are_all_device_copyable<_Compare, _Size1, _Size2> +template +struct sycl::is_device_copyable<_ONEDPL_SPECIALIZE_FOR(oneapi::dpl::unseq_backend::__brick_set_op, _Size1, _Size2, + _IsOpDifference, _Compare, _Proj1, _Proj2)> + : oneapi::dpl::__internal::__are_all_device_copyable<_Size1, _Size2, _Compare, _Proj1, _Proj2> { }; diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h index e858bbe5c75..e91293d2ac3 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h @@ -987,54 +987,54 @@ struct __scan // __brick_includes //------------------------------------------------------------------------ -template +template struct __brick_includes { + _Size1 __size1; + _Size2 __size2; _Compare __comp; - _Size1 __na; - _Size2 __nb; + _Proj1 __proj1; + _Proj2 __proj2; - __brick_includes(_Compare __c, _Size1 __n1, _Size2 __n2) : __comp(__c), __na(__n1), __nb(__n2) {} + __brick_includes(_Size1 __size1, _Size2 __size2, _Compare __comp, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) + : __size1(__size1), __size2(__size2), __comp(__comp), __proj1(__proj1), __proj2(__proj2) {} - template + template bool - operator()(_ItemId __idx, const _Acc1& __b_acc, const _Acc2& __a_acc) const + operator()(_ItemId __idx, const __Rng1& __rng1, const __Rng2& __rng2) const { - using ::std::get; - - auto __a = __a_acc; - auto __b = __b_acc; - - auto __a_beg = _Size1(0); - auto __a_end = __na; - - auto __b_beg = _Size2(0); - auto __b_end = __nb; + using std::get; // testing __comp(*__first2, *__first1) or __comp(*(__last1 - 1), *(__last2 - 1)) - if ((__idx == 0 && __comp(__b[__b_beg + 0], __a[__a_beg + 0])) || - (__idx == __nb - 1 && __comp(__a[__a_end - 1], __b[__b_end - 1]))) - return true; //__a doesn't include __b + if ((__idx == 0 && std::invoke(__comp, std::invoke(__proj1, __rng1[0]), std::invoke(__proj2, __rng2[0]))) || + (__idx == __size1 - 1 && + std::invoke(__comp, std::invoke(__proj2, __rng2[__size2 - 1]), std::invoke(__proj1, __rng1[__size1 - 1])))) + return true; //__rng2 doesn't include __rng1 + + const auto __idx_b = 0 + __idx; + const auto __val_b = __rng1[__idx_b]; - const auto __idx_b = __b_beg + __idx; - const auto __val_b = __b[__idx_b]; - auto __res = __internal::__pstl_lower_bound(__a, __a_beg, __a_end, __val_b, __comp); + auto __res = + __internal::__pstl_lower_bound(__rng2, _Size1{0}, __size2, std::invoke(__proj1, __val_b), __comp, __proj2); - // {a} < {b} or __val_b != __a[__res] - if (__res == __a_end || __comp(__val_b, __a[__res])) - return true; //__a doesn't include __b + // {a} < {b} or __val_b != __rng2[__res] + if (__res == __size2 || std::invoke(__comp, std::invoke(__proj1, __val_b), std::invoke(__proj2, __rng2[__res]))) + return true; //__rng2 doesn't include __rng1 - auto __val_a = __a[__res]; + auto __val_a = __rng2[__res]; //searching number of duplication - const auto __count_a = __internal::__pstl_right_bound(__a, __res, __a_end, __val_a, __comp) - - __internal::__pstl_left_bound(__a, __a_beg, __res, __val_a, __comp); + const auto __count_a = + __internal::__pstl_right_bound(__rng2, __res, __size2, std::invoke(__proj2, __val_a), __comp, __proj2) - + __internal::__pstl_left_bound(__rng2, _Size1{0}, __res, std::invoke(__proj2, __val_a), __comp, __proj2); - const auto __count_b = __internal::__pstl_right_bound(__b, _Size2(__idx_b), __b_end, __val_b, __comp) - - __idx_b + __idx_b - - __internal::__pstl_left_bound(__b, __b_beg, _Size2(__idx_b), __val_b, __comp); + const auto __count_b = __internal::__pstl_right_bound(__rng1, _Size2(__idx_b), __size1, + std::invoke(__proj1, __val_b), __comp, __proj1) - + __internal::__pstl_left_bound(__rng1, _Size2{0}, _Size2(__idx_b), + std::invoke(__proj1, __val_b), __comp, __proj1); - return __count_b > __count_a; //false means __a includes __b + return __count_b > __count_a; //false means __rng2 includes __rng1 } }; @@ -1249,23 +1249,27 @@ struct _SymmetricDifferenceTag : public std::true_type static constexpr bool __can_write_from_rng2_v = _IsOneShot::value; }; -template +template class __brick_set_op { - _Compare __comp; _Size1 __na; _Size2 __nb; + _Compare __comp; + _Proj1 __proj1; + _Proj2 __proj2; public: - __brick_set_op(_Compare __c, _Size1 __n1, _Size2 __n2) : __comp(__c), __na(__n1), __nb(__n2) {} + __brick_set_op(_Size1 __na, _Size2 __nb, _Compare __comp, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) + : __na(__na), __nb(__nb), __comp(__comp), __proj1(__proj1), __proj2(__proj2) {} template bool operator()(_ItemId __idx, const _Acc& __inout_acc) const { using ::std::get; - auto __a = get<0>(__inout_acc.tuple()); // first sequence - auto __b = get<1>(__inout_acc.tuple()); // second sequence + auto __a = get<0>(__inout_acc.tuple()); // first sequence: applied __proj1 + auto __b = get<1>(__inout_acc.tuple()); // second sequence: applied __proj2 auto __c = get<2>(__inout_acc.tuple()); // mask buffer auto __a_beg = _Size1(0); @@ -1275,10 +1279,12 @@ class __brick_set_op const auto __idx_a = __idx; auto __val_a = __a[__a_beg + __idx_a]; - auto __res = __internal::__pstl_lower_bound(__b, _Size2(0), __nb, __val_a, __comp); + auto __res = + __internal::__pstl_lower_bound(__b, _Size2(0), __nb, std::invoke(__proj1, __val_a), __comp, __proj2); bool bres = _IsOpDifference(); //initialization in true in case of difference operation; false - intersection. - if (__res == __nb || __comp(__val_a, __b[__b_beg + __res])) + if (__res == __nb || + std::invoke(__comp, std::invoke(__proj1, __val_a), std::invoke(__proj2, __b[__b_beg + __res]))) { // there is no __val_a in __b, so __b in the difference {__a}/{__b}; } @@ -1292,12 +1298,13 @@ class __brick_set_op //Intersection operation logic: if number of duplication in __a on left side from __idx <= total number of //duplication in __b than a mask is 1 - const _Size1 __count_a_left = - __idx_a - __internal::__pstl_left_bound(__a, _Size1(0), _Size1(__idx_a), __val_a, __comp) + 1; + const _Size1 __count_a_left = __idx_a - __internal::__pstl_left_bound(__a, + _Size1(0), _Size1(__idx_a), std::invoke(__proj1, __val_a), __comp, __proj1) + 1; - const _Size2 __count_b = __internal::__pstl_right_bound(__b, _Size2(__res), __nb, __val_b, __comp) - __res + - __res - - __internal::__pstl_left_bound(__b, _Size2(0), _Size2(__res), __val_b, __comp); + const _Size2 __count_b = __internal::__pstl_right_bound(__b, _Size2(__res), __nb, + std::invoke(__proj2, __val_b), __comp, __proj2) - + __internal::__pstl_left_bound(__b, _Size2(0), _Size2(__res), + std::invoke(__proj2, __val_b), __comp, __proj2); if constexpr (_IsOpDifference::value) bres = __count_a_left > __count_b; /*difference*/ diff --git a/include/oneapi/dpl/pstl/histogram_binhash_utils.h b/include/oneapi/dpl/pstl/histogram_binhash_utils.h index 609bae7be4a..aabb863d760 100644 --- a/include/oneapi/dpl/pstl/histogram_binhash_utils.h +++ b/include/oneapi/dpl/pstl/histogram_binhash_utils.h @@ -87,12 +87,10 @@ template ::std::int32_t __custom_boundary_get_bin_helper(_Acc __acc, ::std::int32_t __size, _T2 __value, _T3 __min, _T3 __max) { - ::std::int32_t ret = -1; + std::int32_t ret = -1; if (__value >= __min && __value < __max) { - ret = - oneapi::dpl::__internal::__pstl_upper_bound(__acc, ::std::int32_t{0}, __size, __value, ::std::less<_T2>{}) - - 1; + ret = oneapi::dpl::__internal::__pstl_upper_bound(__acc, std::int32_t{0}, __size, __value, std::less<_T2>{}) - 1; } return ret; } diff --git a/include/oneapi/dpl/pstl/onedpl_config.h b/include/oneapi/dpl/pstl/onedpl_config.h index 68e7cba1a72..caf0c1114db 100644 --- a/include/oneapi/dpl/pstl/onedpl_config.h +++ b/include/oneapi/dpl/pstl/onedpl_config.h @@ -308,6 +308,17 @@ # define _ONEDPL_STD_BIT_FLOOR_BROKEN 0 #endif +// There is a bug in the libc++ with 21 being the latest major release at the time of writing this comment. +// 23 is set to avoid frequent bump-ups. +// See: https://github.com/llvm/llvm-project/blob/6096d35ea93c75f648a253a00775b4d74915c819/libcxx/include/__algorithm/ranges_set_union.h#L94 +// This line does not take into account that the iterator-based implementation may arbitrary call comp(a, b) or comp(b, a) +// TODO: report it or contribute. +#if defined(_LIBCPP_VERSION) && _LIBCPP_VERSION <= 230000 +# define _ONEDPL_LIBCPP_RANGE_SET_BROKEN 1 +#else +# define _ONEDPL_LIBCPP_RANGE_SET_BROKEN 0 +#endif + // The implementation of std::ranges algorithms in MS C++ standard library is done via C++ functions. #if defined(_MSC_VER) && (_MSC_VER < 1939) # define _ONEDPL_STD_RANGES_ALGO_CPP_FUN 1 diff --git a/include/oneapi/dpl/pstl/parallel_backend_utils.h b/include/oneapi/dpl/pstl/parallel_backend_utils.h index 2b4dda64121..bfc424b6c6a 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_utils.h +++ b/include/oneapi/dpl/pstl/parallel_backend_utils.h @@ -26,6 +26,7 @@ #include #include "utils.h" #include "memory_fwd.h" +#include "functional_impl.h" // for oneapi::dpl::identity namespace oneapi { @@ -217,11 +218,12 @@ struct __serial_move_merge }; template + typename _CopyConstructRange, typename _Proj1 = oneapi::dpl::identity, + typename _Proj2 = oneapi::dpl::identity> _OutputIterator __set_union_construct(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp, - _CopyConstructRange __cc_range) + _CopyConstructRange __cc_range, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) { using _Tp = typename ::std::iterator_traits<_OutputIterator>::value_type; @@ -229,7 +231,7 @@ __set_union_construct(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _Fo { if (__first2 == __last2) return __cc_range(__first1, __last1, __result); - if (__comp(*__first2, *__first1)) + if (std::invoke(__comp, std::invoke(__proj2, *__first2), std::invoke(__proj1, *__first1))) { ::new (::std::addressof(*__result)) _Tp(*__first2); ++__first2; @@ -237,7 +239,7 @@ __set_union_construct(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _Fo else { ::new (::std::addressof(*__result)) _Tp(*__first1); - if (!__comp(*__first1, *__first2)) + if (!std::invoke(__comp, std::invoke(__proj1, *__first1), std::invoke(__proj2, *__first2))) ++__first2; ++__first1; } @@ -246,17 +248,18 @@ __set_union_construct(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _Fo } template + typename _CopyFunc, typename _CopyFromFirstSet, typename _Proj1 = oneapi::dpl::identity, + typename _Proj2 = oneapi::dpl::identity> _OutputIterator __set_intersection_construct(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp, _CopyFunc _copy, - _CopyFromFirstSet) + _CopyFromFirstSet, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) { while (__first1 != __last1 && __first2 != __last2) { - if (__comp(*__first1, *__first2)) + if (std::invoke(__comp, std::invoke(__proj1, *__first1), std::invoke(__proj2, *__first2))) ++__first1; - else if (__comp(*__first2, *__first1)) + else if (std::invoke(__comp, std::invoke(__proj2, *__first2), std::invoke(__proj1, *__first1))) ++__first2; else { @@ -274,11 +277,12 @@ __set_intersection_construct(_ForwardIterator1 __first1, _ForwardIterator1 __las } template + typename _CopyConstructRange, typename _Proj1 = oneapi::dpl::identity, + typename _Proj2 = oneapi::dpl::identity> _OutputIterator __set_difference_construct(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp, - _CopyConstructRange __cc_range) + _CopyConstructRange __cc_range, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) { using _Tp = typename ::std::iterator_traits<_OutputIterator>::value_type; @@ -287,7 +291,7 @@ __set_difference_construct(_ForwardIterator1 __first1, _ForwardIterator1 __last1 if (__first2 == __last2) return __cc_range(__first1, __last1, __result); - if (__comp(*__first1, *__first2)) + if (std::invoke(__comp, std::invoke(__proj1, *__first1), std::invoke(__proj2, *__first2))) { ::new (::std::addressof(*__result)) _Tp(*__first1); ++__result; @@ -295,19 +299,21 @@ __set_difference_construct(_ForwardIterator1 __first1, _ForwardIterator1 __last1 } else { - if (!__comp(*__first2, *__first1)) + if (!std::invoke(__comp, std::invoke(__proj2, *__first2), std::invoke(__proj1, *__first1))) ++__first1; ++__first2; } } return __result; } + template + typename _CopyConstructRange, typename _Proj1 = oneapi::dpl::identity, + typename _Proj2 = oneapi::dpl::identity> _OutputIterator __set_symmetric_difference_construct(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp, - _CopyConstructRange __cc_range) + _CopyConstructRange __cc_range, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) { using _Tp = typename ::std::iterator_traits<_OutputIterator>::value_type; @@ -316,7 +322,7 @@ __set_symmetric_difference_construct(_ForwardIterator1 __first1, _ForwardIterato if (__first2 == __last2) return __cc_range(__first1, __last1, __result); - if (__comp(*__first1, *__first2)) + if (std::invoke(__comp, std::invoke(__proj1, *__first1), std::invoke(__proj2, *__first2))) { ::new (::std::addressof(*__result)) _Tp(*__first1); ++__result; @@ -324,7 +330,7 @@ __set_symmetric_difference_construct(_ForwardIterator1 __first1, _ForwardIterato } else { - if (__comp(*__first2, *__first1)) + if (std::invoke(__comp, std::invoke(__proj2, *__first2), std::invoke(__proj1, *__first1))) { ::new (::std::addressof(*__result)) _Tp(*__first2); ++__result; diff --git a/include/oneapi/dpl/pstl/utils.h b/include/oneapi/dpl/pstl/utils.h index 2c8e8033055..6178c1e2e6b 100644 --- a/include/oneapi/dpl/pstl/utils.h +++ b/include/oneapi/dpl/pstl/utils.h @@ -50,6 +50,8 @@ # include // for std::equality_comparable_with #endif +#include "functional_impl.h" + namespace oneapi { namespace dpl @@ -92,7 +94,7 @@ class __not_pred bool operator()(_Args&&... __args) const { - return !_M_pred(::std::forward<_Args>(__args)...); + return !std::invoke(_M_pred, std::forward<_Args>(__args)...); } }; @@ -108,7 +110,7 @@ class __reorder_pred bool operator()(_FTp&& __a, _STp&& __b) const { - return _M_pred(::std::forward<_STp>(__b), ::std::forward<_FTp>(__a)); + return std::invoke(_M_pred, std::forward<_STp>(__b), std::forward<_FTp>(__a)); } }; @@ -627,22 +629,22 @@ __dpl_signbit(const _T& __x) return (__x & __mask) != 0; } -template -_Size1 -__pstl_lower_bound(_Acc __acc, _Size1 __first, _Size1 __last, const _Value& __value, _Compare __comp) +template +_Size +__pstl_lower_bound(_Acc __acc, _Size __first, _Size __last, const _Value& __value, _Compare __comp, _Proj __proj = {}) { auto __n = __last - __first; auto __cur = __n; - _Size1 __it; + _Size __idx; while (__n > 0) { - __it = __first; + __idx = __first; __cur = __n / 2; - __it += __cur; - if (__comp(__acc[__it], __value)) + __idx += __cur; + if (std::invoke(__comp, std::invoke(__proj, __acc[__idx]), __value)) { __n -= __cur + 1; - __first = ++__it; + __first = ++__idx; } else __n = __cur; @@ -650,30 +652,33 @@ __pstl_lower_bound(_Acc __acc, _Size1 __first, _Size1 __last, const _Value& __va return __first; } -template -_Size1 -__pstl_upper_bound(_Acc __acc, _Size1 __first, _Size1 __last, const _Value& __value, _Compare __comp) +template +_Size +__pstl_upper_bound(_Acc __acc, _Size __first, _Size __last, const _Value& __value, _Compare __comp, _Proj __proj = {}) { - return __pstl_lower_bound(__acc, __first, __last, __value, - oneapi::dpl::__internal::__not_pred>{ - oneapi::dpl::__internal::__reorder_pred<_Compare>{__comp}}); + __reorder_pred<_Compare> __reordered_comp{__comp}; + __not_pred __negation_reordered_comp{__reordered_comp}; + + return __pstl_lower_bound(__acc, __first, __last, __value, __negation_reordered_comp, __proj); } // Searching for the first element strongly greater than a passed value - right bound -template +template _Index -__pstl_right_bound(_Buffer& __a, _Index __first, _Index __last, const _Value& __val, _Compare __comp) +__pstl_right_bound(_Buffer& __a, _Index __first, _Index __last, const _Value& __val, _Compare __comp, _Proj __proj = {}) { - return __pstl_upper_bound(__a, __first, __last, __val, __comp); + return __pstl_upper_bound(__a, __first, __last, __val, __comp, __proj); } // Performs a "biased" binary search targets the split point close to one edge of the range. // When __bias_last==true, it searches first near the last element, otherwise it searches first near the first element. // After each iteration which fails to capture the element in the small side, it reduces the "bias", eventually // resulting in a standard binary search. -template +template _Size1 -__biased_lower_bound(_Acc __acc, _Size1 __first, _Size1 __last, const _Value& __value, _Compare __comp) +__biased_lower_bound(_Acc __acc, _Size1 __first, _Size1 __last, const _Value& __value, _Compare __comp, + _Proj __proj = {}) { auto __n = __last - __first; std::int8_t __shift_right_div = 10; // divide by 2^10 = 1024 @@ -689,7 +694,7 @@ __biased_lower_bound(_Acc __acc, _Size1 __first, _Size1 __last, const _Value& __ __cur_idx = __biased_step; __it = __first + __cur_idx; - if (__comp(__acc[__it], __value)) + if (std::invoke(__comp, std::invoke(__proj, __acc[__it]), __value)) { __first = __it + 1; } @@ -704,18 +709,21 @@ __biased_lower_bound(_Acc __acc, _Size1 __first, _Size1 __last, const _Value& __ if (__n > 0) { //end up fully at binary search - return oneapi::dpl::__internal::__pstl_lower_bound(__acc, __first, __last, __value, __comp); + return oneapi::dpl::__internal::__pstl_lower_bound(__acc, __first, __last, __value, __comp, __proj); } return __first; } -template +template _Size1 -__biased_upper_bound(_Acc __acc, _Size1 __first, _Size1 __last, const _Value& __value, _Compare __comp) +__biased_upper_bound(_Acc __acc, _Size1 __first, _Size1 __last, const _Value& __value, _Compare __comp, + _Proj __proj = {}) { - return __biased_lower_bound<__bias_last>( - __acc, __first, __last, __value, - oneapi::dpl::__internal::__not_pred{oneapi::dpl::__internal::__reorder_pred<_Compare>{__comp}}); + __reorder_pred<_Compare> __reordered_comp{__comp}; + __not_pred __negation_reordered_comp{__reordered_comp}; + + return __biased_lower_bound<__bias_last>(__acc, __first, __last, __value, __negation_reordered_comp, __proj); } template @@ -779,14 +787,16 @@ struct _ReverseCounter }; // Reverse searching for the first element strongly less than a passed value - left bound -template +template _Index -__pstl_left_bound(_Buffer& __a, _Index __first, _Index __last, const _Value& __val, _Compare __comp) +__pstl_left_bound(_Buffer& __a, _Index __first, _Index __last, const _Value& __val, _Compare __comp, _Proj __proj = {}) { auto __beg = _ReverseCounter<_Index, _Buffer>{__last - 1}; auto __end = _ReverseCounter<_Index, _Buffer>{__first - 1}; - return __pstl_upper_bound(__a, __beg, __end, __val, __reorder_pred<_Compare>{__comp}); + __not_pred __negation_comp{__comp}; + + return __pstl_lower_bound(__a, __beg, __end, __val, __negation_comp, __proj); } // Lower bound implementation based on Shar's algorithm for binary search. diff --git a/test/parallel_api/ranges/std_ranges_includes.pass.cpp b/test/parallel_api/ranges/std_ranges_includes.pass.cpp new file mode 100644 index 00000000000..888863c45d2 --- /dev/null +++ b/test/parallel_api/ranges/std_ranges_includes.pass.cpp @@ -0,0 +1,121 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Copyright (C) Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// This file incorporates work covered by the following copyright and permission +// notice: +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// +//===----------------------------------------------------------------------===// + +#include "std_ranges_test.h" + +#if _ENABLE_STD_RANGES_TESTING +struct A1 +{ + int a1; +}; + +struct A2 +{ + int a2; +}; + +void test_mixed_types_host() +{ + std::vector vec_a1 = {{1}, {2}, {3}}; + std::vector vec_a2 = {{2}, {3}}; + + auto proj_a1 = [](const A1& a) { return a.a1; }; + auto proj_a2 = [](const A2& a) { return a.a2; }; + + bool exp_res = std::ranges::includes(vec_a1, vec_a2, std::ranges::less{}, proj_a1, proj_a2); + + bool seq_res = oneapi::dpl::ranges::includes(oneapi::dpl::execution::seq, vec_a1, vec_a2, std::ranges::less{}, proj_a1, proj_a2); + EXPECT_EQ(seq_res, exp_res, "wrong result with seq policy"); + + bool unseq_res = oneapi::dpl::ranges::includes(oneapi::dpl::execution::unseq, vec_a1, vec_a2, std::ranges::less{}, proj_a1, proj_a2); + EXPECT_EQ(unseq_res, exp_res, "wrong result with unseq policy"); + + bool par_res = oneapi::dpl::ranges::includes(oneapi::dpl::execution::par, vec_a1, vec_a2, std::ranges::less{}, proj_a1, proj_a2); + EXPECT_EQ(par_res, exp_res, "wrong result with par policy"); + + bool par_unseq_res = oneapi::dpl::ranges::includes(oneapi::dpl::execution::par_unseq, vec_a1, vec_a2, std::ranges::less{}, proj_a1, proj_a2); + EXPECT_EQ(par_unseq_res, exp_res, "wrong result with par_unseq policy"); +} + +#if TEST_DPCPP_BACKEND_PRESENT +void test_mixed_types_device() +{ + auto policy = TestUtils::get_dpcpp_test_policy(); + sycl::queue q = policy.queue(); + if (q.get_device().has(sycl::aspect::usm_shared_allocations)) + { + A1* d_a1 = sycl::malloc_shared(3, q); + A2* d_a2 = sycl::malloc_shared(2, q); + + d_a1[0] = {1}; + d_a1[1] = {2}; + d_a1[2] = {3}; + + d_a2[0] = {2}; + d_a2[1] = {3}; + + std::ranges::subrange a1_range(d_a1, d_a1 + 3); + std::ranges::subrange a2_range(d_a2, d_a2 + 2); + + auto proj_a1 = [](const A1& a) { return a.a1; }; + auto proj_a2 = [](const A2& a) { return a.a2; }; + + bool exp_res = std::ranges::includes(a1_range, a2_range, std::ranges::less{}, proj_a1, proj_a2); + + bool dev_res = oneapi::dpl::ranges::includes(oneapi::dpl::execution::make_device_policy(q), + a1_range, a2_range, std::ranges::less{}, proj_a1, proj_a2); + EXPECT_EQ(dev_res, exp_res, "wrong result with device policy"); + + sycl::free(d_a1, q); + sycl::free(d_a2, q); + } +} +#endif // TEST_DPCPP_BACKEND_PRESENT +#endif //_ENABLE_STD_RANGES_TESTING + +int +main() +{ +#if _ENABLE_STD_RANGES_TESTING + using namespace test_std_ranges; + namespace dpl_ranges = oneapi::dpl::ranges; + + auto includes_checker = TEST_PREPARE_CALLABLE(std::ranges::includes); + + test_range_algo<0, int, data_in_in>{big_sz}(dpl_ranges::includes, includes_checker); + test_range_algo<1, int, data_in_in>{ }(dpl_ranges::includes, includes_checker, std::ranges::less{}); + test_range_algo<2, int, data_in_in>{ }(dpl_ranges::includes, includes_checker, std::ranges::less{}, proj); + test_range_algo<3 , int, data_in_in>{ }(dpl_ranges::includes, includes_checker, std::ranges::less{}, proj, proj); + + // Check with different projections, + // but when includes returns true - to make sure that the projections are applied correctly. + // The first sequence is [0, 3, 6, ...], the second is [0, 1, 2, ...], + // but the second is transformed to [0, 3, 6, ...] by its projection. + auto x1 = [](auto&& v) { return v; }; + auto x3 = [](auto&& v) { return v * 3; }; + test_range_algo<4, int, data_in_in, decltype(x3), decltype(x1)>{medium_size}(dpl_ranges::includes, includes_checker, std::ranges::less{}, x1, x3); + + test_range_algo<5, P2, data_in_in>{}(dpl_ranges::includes, includes_checker, std::ranges::less{}, &P2::x, &P2::x); + test_range_algo<6, P2, data_in_in>{}(dpl_ranges::includes, includes_checker, std::ranges::less{}, &P2::proj, &P2::proj); + + // Check if projections are applied to the right sequences and trigger a compile-time error if not + test_mixed_types_host(); +#if TEST_DPCPP_BACKEND_PRESENT + test_mixed_types_device(); +#endif +#endif //_ENABLE_STD_RANGES_TESTING + + return TestUtils::done(_ENABLE_STD_RANGES_TESTING); +} diff --git a/test/parallel_api/ranges/std_ranges_merge.pass.cpp b/test/parallel_api/ranges/std_ranges_merge.pass.cpp index be5226b2abb..bb384feaf48 100644 --- a/test/parallel_api/ranges/std_ranges_merge.pass.cpp +++ b/test/parallel_api/ranges/std_ranges_merge.pass.cpp @@ -48,7 +48,7 @@ struct merge_checker_fn if(it_out == it_out_e) return ret_type{it_1, it_2, it_out}; } - + if(it_1 == it_1_e) { for(; it_2 != it_2_e && it_out != it_out_e; ++it_2, (void) ++it_out) @@ -65,20 +65,20 @@ struct merge_checker_fn } merge_checker; #endif //_ENABLE_STD_RANGES_TESTING -std::int32_t +int main() { #if _ENABLE_STD_RANGES_TESTING using namespace test_std_ranges; namespace dpl_ranges = oneapi::dpl::ranges; - test_range_algo<0, int, data_in_in_out_lim>{big_sz}(dpl_ranges::merge, merge_checker, std::ranges::less{}, std::identity{}, std::identity{}); + test_range_algo<0, int, data_in_in_out_lim, mul1_t, div3_t>{big_sz}(dpl_ranges::merge, merge_checker, std::ranges::less{}, std::identity{}, std::identity{}); - test_range_algo<1, int, data_in_in_out_lim>{}(dpl_ranges::merge, merge_checker, std::ranges::less{}, proj); - test_range_algo<2, P2, data_in_in_out_lim>{}(dpl_ranges::merge, merge_checker, std::ranges::less{}, &P2::x, &P2::x); - test_range_algo<3, P2, data_in_in_out_lim>{}(dpl_ranges::merge, merge_checker, std::ranges::less{}, &P2::proj, &P2::proj); + test_range_algo<1, int, data_in_in_out_lim, mul1_t, div3_t>{}(dpl_ranges::merge, merge_checker, std::ranges::less{}, proj); + test_range_algo<2, P2, data_in_in_out_lim, mul1_t, div3_t>{}(dpl_ranges::merge, merge_checker, std::ranges::less{}, &P2::x, &P2::x); + test_range_algo<3, P2, data_in_in_out_lim, mul1_t, div3_t>{}(dpl_ranges::merge, merge_checker, std::ranges::less{}, &P2::proj, &P2::proj); - test_range_algo<7, int, data_in_in_out_lim>{}(dpl_ranges::merge, merge_checker); + test_range_algo<7, int, data_in_in_out_lim, mul1_t, div3_t>{}(dpl_ranges::merge, merge_checker); #endif //_ENABLE_STD_RANGES_TESTING return TestUtils::done(_ENABLE_STD_RANGES_TESTING); diff --git a/test/parallel_api/ranges/std_ranges_set_difference.pass.cpp b/test/parallel_api/ranges/std_ranges_set_difference.pass.cpp new file mode 100644 index 00000000000..3122029a7ff --- /dev/null +++ b/test/parallel_api/ranges/std_ranges_set_difference.pass.cpp @@ -0,0 +1,147 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Copyright (C) Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// This file incorporates work covered by the following copyright and permission +// notice: +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// +//===----------------------------------------------------------------------===// + +#include "std_ranges_test.h" + +#if _ENABLE_STD_RANGES_TESTING +namespace test_std_ranges +{ +template<> +inline int out_size_with_empty_in2>(int in1_size) +{ + return in1_size; +} +} + +struct A +{ + int a; + operator int() const { return a; } +}; + +struct B +{ + int b; + operator int() const { return b; } +}; + +void test_mixed_types_host() +{ + std::vector r1 = {{1}, {2}, {5}}; + std::vector r2 = {{0}, {2}, {2}, {3}}; + + std::vector out_expected = {1, 5}; + + std::vector out_seq(out_expected.size(), 0xCD); + std::vector out_par(out_expected.size(), 0xCD); + std::vector out_unseq(out_expected.size(), 0xCD); + std::vector out_par_unseq(out_expected.size(), 0xCD); + + auto proj_a = [](const A& a) { return a.a; }; + auto proj_b = [](const B& b) { return b.b; }; + + oneapi::dpl::ranges::set_difference( + oneapi::dpl::execution::seq, r1, r2, out_seq, std::ranges::less{}, proj_a, proj_b); + oneapi::dpl::ranges::set_difference( + oneapi::dpl::execution::par, r1, r2, out_par, std::ranges::less{}, proj_a, proj_b); + oneapi::dpl::ranges::set_difference( + oneapi::dpl::execution::unseq, r1, r2, out_unseq, std::ranges::less{}, proj_a, proj_b); + oneapi::dpl::ranges::set_difference( + oneapi::dpl::execution::par_unseq, r1, r2, out_par_unseq, std::ranges::less{}, proj_a, proj_b); + + EXPECT_EQ_RANGES(out_expected, out_seq, "wrong result with seq policy"); + EXPECT_EQ_RANGES(out_expected, out_par, "wrong result with par policy"); + EXPECT_EQ_RANGES(out_expected, out_unseq, "wrong result with unseq policy"); + EXPECT_EQ_RANGES(out_expected, out_par_unseq, "wrong result with par_unseq policy"); +} + +#if TEST_DPCPP_BACKEND_PRESENT +void test_mixed_types_device() +{ + auto policy = TestUtils::get_dpcpp_test_policy(); + sycl::queue q = policy.queue(); + if (q.get_device().has(sycl::aspect::usm_shared_allocations)) + { + using r1_alloc_t = sycl::usm_allocator; + using r2_alloc_t = sycl::usm_allocator; + using r_out_alloc_t = sycl::usm_allocator; + + std::vector v1({{1}, {2}, {5}}, r1_alloc_t(q)); + std::vector v2({{0}, {2}, {2}, {3}}, r2_alloc_t(q)); + std::vector out_expected = {1, 5}; + + std::vector out(out_expected.size(), 0xCD, r_out_alloc_t(q)); + + // Wrap vector with a USM allocator into the subrange because it is not device copyable + std::ranges::subrange r1(v1.data(), v1.data() + v1.size()); + std::ranges::subrange r2(v2.data(), v2.data() + v2.size()); + std::ranges::subrange r_out(out.data(), out.data() + out.size()); + + auto proj_a = [](const A& a) { return a.a; }; + auto proj_b = [](const B& b) { return b.b; }; + + oneapi::dpl::ranges::set_difference(policy, r1, r2, r_out, std::ranges::less{}, proj_a, proj_b); + EXPECT_EQ_RANGES(out_expected, out, "wrong result with device policy"); + } +} +#endif // TEST_DPCPP_BACKEND_PRESENT +#endif // _ENABLE_STD_RANGES_TESTING + +int +main() +{ + bool bProcessed = false; + +#if _ENABLE_STD_RANGES_TESTING + using namespace test_std_ranges; + namespace dpl_ranges = oneapi::dpl::ranges; + + // TODO: use data_in_in_out_lim when set_difference supports + // output range not-sufficiently large to hold all the processed elements + + // TODO: implement individual tests solely for seq policy + auto checker = [](auto&&... args) + { + return oneapi::dpl::ranges::set_difference(oneapi::dpl::execution::seq, + std::forward(args)...); + }; + + test_range_algo<0, int, data_in_in_out, div3_t, mul1_t>{big_sz}(dpl_ranges::set_difference, checker); + test_range_algo<1, int, data_in_in_out, div3_t, mul1_t>{big_sz}(dpl_ranges::set_difference, checker,std::ranges::less{}, proj); + + // Testing the cut-off with the serial implementation (less than __set_algo_cut_off) + test_range_algo<2, int, data_in_in_out, div3_t, mul1_t>{100}(dpl_ranges::set_difference, checker, std::ranges::less{}, proj, proj); + + test_range_algo<3, P2, data_in_in_out, div3_t, mul1_t>{}(dpl_ranges::set_difference, checker, std::ranges::less{}, &P2::x, &P2::x); + test_range_algo<4, P2, data_in_in_out, div3_t, mul1_t>{}(dpl_ranges::set_difference, checker, std::ranges::less{}, &P2::proj, &P2::proj); + + // Testing no intersection + auto large_shift = [](auto&& v) { return v + 5000; }; + using ls_t = decltype(large_shift); + test_range_algo<5, int, data_in_in_out, mul1_t, ls_t>{1000}(dpl_ranges::set_difference, checker); + test_range_algo<6, int, data_in_in_out, ls_t, mul1_t>{1000}(dpl_ranges::set_difference, checker); + + // Check if projections are applied to the right sequences and trigger a compile-time error if not + test_mixed_types_host(); +#if TEST_DPCPP_BACKEND_PRESENT + test_mixed_types_device(); +#endif + + bProcessed = true; + +#endif //_ENABLE_STD_RANGES_TESTING + + return TestUtils::done(bProcessed); +} diff --git a/test/parallel_api/ranges/std_ranges_set_intersection.pass.cpp b/test/parallel_api/ranges/std_ranges_set_intersection.pass.cpp new file mode 100644 index 00000000000..97a100f99d4 --- /dev/null +++ b/test/parallel_api/ranges/std_ranges_set_intersection.pass.cpp @@ -0,0 +1,143 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Copyright (C) Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// This file incorporates work covered by the following copyright and permission +// notice: +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// +//===----------------------------------------------------------------------===// + +#include "std_ranges_test.h" + +#if _ENABLE_STD_RANGES_TESTING +struct A +{ + int a; + operator int() const { return a; } +}; + +struct B +{ + int b; + operator int() const { return b; } +}; + +void test_mixed_types_host() +{ + std::vector r1 = {{1}, {2}, {3}}; + std::vector r2 = {{0}, {2}, {2}, {3}}; + + std::vector out_expected = {2, 3}; + + std::vector out_seq(out_expected.size(), 0xCD); + std::vector out_par(out_expected.size(), 0xCD); + std::vector out_unseq(out_expected.size(), 0xCD); + std::vector out_par_unseq(out_expected.size(), 0xCD); + + auto proj_a = [](const A& a) { return a.a; }; + auto proj_b = [](const B& b) { return b.b; }; + + oneapi::dpl::ranges::set_intersection( + oneapi::dpl::execution::seq, r1, r2, out_seq, std::ranges::less{}, proj_a, proj_b); + oneapi::dpl::ranges::set_intersection( + oneapi::dpl::execution::par, r1, r2, out_par, std::ranges::less{}, proj_a, proj_b); + oneapi::dpl::ranges::set_intersection( + oneapi::dpl::execution::unseq, r1, r2, out_unseq, std::ranges::less{}, proj_a, proj_b); + oneapi::dpl::ranges::set_intersection( + oneapi::dpl::execution::par_unseq, r1, r2, out_par_unseq, std::ranges::less{}, proj_a, proj_b); + + EXPECT_EQ_RANGES(out_expected, out_seq, "wrong result with seq policy"); + EXPECT_EQ_RANGES(out_expected, out_par, "wrong result with par policy"); + EXPECT_EQ_RANGES(out_expected, out_unseq, "wrong result with unseq policy"); + EXPECT_EQ_RANGES(out_expected, out_par_unseq, "wrong result with par_unseq policy"); +} + +#if TEST_DPCPP_BACKEND_PRESENT +void test_mixed_types_device() +{ + auto policy = TestUtils::get_dpcpp_test_policy(); + sycl::queue q = policy.queue(); + if (q.get_device().has(sycl::aspect::usm_shared_allocations)) + { + using r1_alloc_t = sycl::usm_allocator; + using r2_alloc_t = sycl::usm_allocator; + using r_out_alloc_t = sycl::usm_allocator; + + std::vector v1({{1}, {2}, {3}}, r1_alloc_t(q)); + std::vector v2({{0}, {2}, {2}, {3}}, r2_alloc_t(q)); + std::vector out_expected = {2, 3}; + + std::vector out(out_expected.size(), 0xCD, r_out_alloc_t(q)); + + // Wrap vector with a USM allocator into the subrange because it is not device copyable + std::ranges::subrange r1(v1.data(), v1.data() + v1.size()); + std::ranges::subrange r2(v2.data(), v2.data() + v2.size()); + std::ranges::subrange r_out(out.data(), out.data() + out.size()); + + auto proj_a = [](const A& a) { return a.a; }; + auto proj_b = [](const B& b) { return b.b; }; + + oneapi::dpl::ranges::set_intersection(policy, r1, r2, r_out, std::ranges::less{}, proj_a, proj_b); + EXPECT_EQ_RANGES(out_expected, out, "wrong result with device policy"); + } +} +#endif // TEST_DPCPP_BACKEND_PRESENT +#endif // _ENABLE_STD_RANGES_TESTING + +int +main() +{ + bool bProcessed = false; + +#if _ENABLE_STD_RANGES_TESTING + using namespace test_std_ranges; + namespace dpl_ranges = oneapi::dpl::ranges; + + // TODO: use data_in_in_out_lim when set_intersection supports + // output range not-sufficiently large to hold all the processed elements + + // TODO: implement individual tests solely for seq policy + auto set_intersection_checker = [](auto&&... args) + { + return oneapi::dpl::ranges::set_intersection(oneapi::dpl::execution::seq, + std::forward(args)...); + }; + + test_range_algo<0, int, data_in_in_out, mul1_t, div3_t>{big_sz}(dpl_ranges::set_intersection, set_intersection_checker); + test_range_algo<1, int, data_in_in_out, mul1_t, div3_t>{big_sz}(dpl_ranges::set_intersection, set_intersection_checker, std::ranges::less{}, proj); + + // Testing the cut-off with the serial implementation (less than __set_algo_cut_off) + test_range_algo<2, int, data_in_in_out, mul1_t, div3_t>{100}(dpl_ranges::set_intersection, set_intersection_checker, std::ranges::less{}, proj, proj); + + test_range_algo<3, P2, data_in_in_out, mul1_t, div3_t>{}(dpl_ranges::set_intersection, set_intersection_checker, std::ranges::less{}, &P2::x, &P2::x); + test_range_algo<4, P2, data_in_in_out, mul1_t, div3_t>{}(dpl_ranges::set_intersection, set_intersection_checker, std::ranges::less{}, &P2::proj, &P2::proj); + + // Testing partial intersection less than __set_algo_cut_off + auto medium_shift = [](auto&& v) { return v + 400; }; + using ms_t = decltype(medium_shift); + test_range_algo<5, int, data_in_in_out, mul1_t, ms_t>{600}(dpl_ranges::set_intersection, set_intersection_checker); + + // Testing no intersection + auto large_shift = [](auto&& v) { return v + 5000; }; + using ls_t = decltype(large_shift); + test_range_algo<6, int, data_in_in_out, mul1_t, ls_t>{1000}(dpl_ranges::set_intersection, set_intersection_checker); + test_range_algo<7, int, data_in_in_out, ls_t, mul1_t>{1000}(dpl_ranges::set_intersection, set_intersection_checker); + + // Check if projections are applied to the right sequences and trigger a compile-time error if not + test_mixed_types_host(); +#if TEST_DPCPP_BACKEND_PRESENT + test_mixed_types_device(); +#endif + + bProcessed = true; + +#endif //_ENABLE_STD_RANGES_TESTING + + return TestUtils::done(bProcessed); +} diff --git a/test/parallel_api/ranges/std_ranges_set_symmetric_difference.pass.cpp b/test/parallel_api/ranges/std_ranges_set_symmetric_difference.pass.cpp new file mode 100644 index 00000000000..a4127960279 --- /dev/null +++ b/test/parallel_api/ranges/std_ranges_set_symmetric_difference.pass.cpp @@ -0,0 +1,146 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Copyright (C) Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// This file incorporates work covered by the following copyright and permission +// notice: +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// +//===----------------------------------------------------------------------===// + +#include "std_ranges_test.h" + +#if _ENABLE_STD_RANGES_TESTING +namespace test_std_ranges +{ +template<> +int out_size_with_empty_in2>(int in1_size) +{ + return in1_size; +} +template<> +int out_size_with_empty_in1>(int in2_size) +{ + return in2_size; +} +} + +struct A +{ + int a; + operator int() const { return a; } +}; + +struct B +{ + int b; + operator int() const { return b; } +}; + +void test_mixed_types_host() +{ + std::vector r1 = {{1}, {2}, {5}}; + std::vector r2 = {{0}, {2}, {2}, {3}}; + + std::vector out_expected = {0, 1, 2, 3, 5}; + + std::vector out_seq(out_expected.size(), 0xCD); + std::vector out_par(out_expected.size(), 0xCD); + std::vector out_unseq(out_expected.size(), 0xCD); + std::vector out_par_unseq(out_expected.size(), 0xCD); + + auto proj_a = [](const A& a) { return a.a; }; + auto proj_b = [](const B& b) { return b.b; }; + + oneapi::dpl::ranges::set_symmetric_difference( + oneapi::dpl::execution::seq, r1, r2, out_seq, std::ranges::less{}, proj_a, proj_b); + oneapi::dpl::ranges::set_symmetric_difference( + oneapi::dpl::execution::par, r1, r2, out_par, std::ranges::less{}, proj_a, proj_b); + oneapi::dpl::ranges::set_symmetric_difference( + oneapi::dpl::execution::unseq, r1, r2, out_unseq, std::ranges::less{}, proj_a, proj_b); + oneapi::dpl::ranges::set_symmetric_difference( + oneapi::dpl::execution::par_unseq, r1, r2, out_par_unseq, std::ranges::less{}, proj_a, proj_b); + + EXPECT_EQ_RANGES(out_expected, out_seq, "wrong result with seq policy"); + EXPECT_EQ_RANGES(out_expected, out_par, "wrong result with par policy"); + EXPECT_EQ_RANGES(out_expected, out_unseq, "wrong result with unseq policy"); + EXPECT_EQ_RANGES(out_expected, out_par_unseq, "wrong result with par_unseq policy"); +} + +#if TEST_DPCPP_BACKEND_PRESENT +void test_mixed_types_device() +{ + auto policy = TestUtils::get_dpcpp_test_policy(); + sycl::queue q = policy.queue(); + if (q.get_device().has(sycl::aspect::usm_shared_allocations)) + { + using r1_alloc_t = sycl::usm_allocator; + using r2_alloc_t = sycl::usm_allocator; + using r_out_alloc_t = sycl::usm_allocator; + + std::vector v1({{1}, {2}, {5}}, r1_alloc_t(q)); + std::vector v2({{0}, {2}, {2}, {3}}, r2_alloc_t(q)); + std::vector out_expected = {0, 1, 2, 3, 5}; + + std::vector out(out_expected.size(), 0xCD, r_out_alloc_t(q)); + + // Wrap vector with a USM allocator into the subrange because it is not device copyable + std::ranges::subrange r1(v1.data(), v1.data() + v1.size()); + std::ranges::subrange r2(v2.data(), v2.data() + v2.size()); + std::ranges::subrange r_out(out.data(), out.data() + out.size()); + + auto proj_a = [](const A& a) { return a.a; }; + auto proj_b = [](const B& b) { return b.b; }; + + oneapi::dpl::ranges::set_symmetric_difference(policy, r1, r2, r_out, std::ranges::less{}, proj_a, proj_b); + EXPECT_EQ_RANGES(out_expected, out, "wrong result with device policy"); + } +} +#endif // TEST_DPCPP_BACKEND_PRESENT +#endif // _ENABLE_STD_RANGES_TESTING + +int +main() +{ + bool bProcessed = false; + +#if _ENABLE_STD_RANGES_TESTING + using namespace test_std_ranges; + namespace dpl_ranges = oneapi::dpl::ranges; + + // TODO: use data_in_in_out_lim when set_symmetric_difference supports + // output range not-sufficiently large to hold all the processed elements + + // TODO: implement individual tests solely for seq policy + auto checker = [](auto&&... args) + { + return oneapi::dpl::ranges::set_symmetric_difference(oneapi::dpl::execution::seq, + std::forward(args)...); + }; + + test_range_algo<0, int, data_in_in_out, div3_t, mul1_t>{big_sz}(dpl_ranges::set_symmetric_difference, checker); + test_range_algo<1, int, data_in_in_out, mul1_t, div3_t>{big_sz}(dpl_ranges::set_symmetric_difference, checker,std::ranges::less{}, proj); + + // Testing the cut-off with the serial implementation (less than __set_algo_cut_off) + test_range_algo<2, int, data_in_in_out, mul1_t, mul1_t>{100}(dpl_ranges::set_symmetric_difference, checker, std::ranges::less{}, proj, proj); + + test_range_algo<3, P2, data_in_in_out, mul1_t, div3_t>{}(dpl_ranges::set_symmetric_difference, checker, std::ranges::less{}, &P2::x, &P2::x); + test_range_algo<4, P2, data_in_in_out, mul1_t, div3_t>{}(dpl_ranges::set_symmetric_difference, checker, std::ranges::less{}, &P2::proj, &P2::proj); + + // Check if projections are applied to the right sequences and trigger a compile-time error if not + test_mixed_types_host(); +#if TEST_DPCPP_BACKEND_PRESENT + test_mixed_types_device(); +#endif + + bProcessed = true; + +#endif //_ENABLE_STD_RANGES_TESTING + + return TestUtils::done(bProcessed); +} diff --git a/test/parallel_api/ranges/std_ranges_set_union.pass.cpp b/test/parallel_api/ranges/std_ranges_set_union.pass.cpp new file mode 100644 index 00000000000..e55145b4018 --- /dev/null +++ b/test/parallel_api/ranges/std_ranges_set_union.pass.cpp @@ -0,0 +1,253 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Copyright (C) Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// This file incorporates work covered by the following copyright and permission +// notice: +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// +//===----------------------------------------------------------------------===// + +#include "std_ranges_test.h" + +#if _ENABLE_STD_RANGES_TESTING +namespace test_std_ranges +{ +template<> +inline int out_size_with_empty_in2>(int in1_size) +{ + return in1_size; +} +template<> +inline int out_size_with_empty_in1>(int in2_size) +{ + return in2_size; +} +} + +struct A +{ + int a; + operator int() const { return a; } +}; + +struct B +{ + int b; + operator int() const { return b; } +}; + +void test_serial_set_union() +{ + std::vector v1 = {1, 2, 3, 3, 3, 4, 5}; + std::vector v2 = {0, 2, 2, 3, 3, 7}; + std::vector out(10, 0xCD); + std::vector out_expected = {0, 1, 2, 2, 3, 3, 3, 4, 5, 7}; + { + // Smaller r2 + auto res = oneapi::dpl::ranges::set_union(oneapi::dpl::execution::seq, v1, v2, out); + EXPECT_EQ_RANGES(out_expected, out, "wrong result with seq policy"); + EXPECT_EQ(std::ranges::size(v1), res.in1 - std::ranges::begin(v1), "wrong res.in1"); + EXPECT_EQ(std::ranges::size(v2), res.in2 - std::ranges::begin(v2), "wrong res.in2"); + EXPECT_EQ(std::ranges::size(out), res.out - std::ranges::begin(out), "wrong res.out"); + } + { + // Insufficient output range capacity, predicate + std::ranges::fill(out, 0xCD); + const int out_n = 5; + auto out_subrange = std::ranges::subrange(out.data(), out.data() + out_n); + auto out_expected_subrange = std::ranges::subrange(out_expected.data(), out_expected.data() + out_n); + auto res = oneapi::dpl::ranges::set_union(oneapi::dpl::execution::seq, v1, v2, out_subrange, std::ranges::less{}); + EXPECT_EQ_RANGES(out_expected_subrange, out_subrange, + "wrong result with seq policy, case with insufficient out range capacity"); + EXPECT_EQ(3, res.in1 - std::ranges::begin(v1), "wrong res.in1"); + EXPECT_EQ(4, res.in2 - std::ranges::begin(v2), "wrong res.in2"); + EXPECT_EQ(out_n, res.out - std::ranges::begin(out_subrange), "wrong res.out"); + } + { + // Smaller r1, predicate + first projection + std::ranges::fill(out, 0xCD); + const int in1_n = 2; + const int in2_n = 4; + auto res = oneapi::dpl::ranges::set_union(oneapi::dpl::execution::seq, + std::ranges::take_view(v1, in1_n), + std::ranges::take_view(v2, in2_n), out, + std::ranges::less{}, [](auto x) { return x; }); + const int exp_out_n = 5; + auto out_expected_subrange = std::ranges::subrange(out_expected.data(), out_expected.data() + exp_out_n); + auto out_subrange = std::ranges::subrange(std::ranges::begin(out), res.out); + EXPECT_EQ_RANGES(out_expected_subrange, out_subrange, "wrong result with seq policy, smaller r1"); + } + { + // Empty sequences + std::vector v1_empty; + std::vector v2_empty; + auto res = oneapi::dpl::ranges::set_union(oneapi::dpl::execution::seq, v1_empty, v2_empty, out); + EXPECT_EQ(0, res.in1 - std::ranges::begin(v1_empty), "wrong res.in1"); + EXPECT_EQ(0, res.in2 - std::ranges::begin(v2_empty), "wrong res.in2"); + EXPECT_EQ(0, res.out - std::ranges::begin(out), "wrong res.out"); + } + { + // Empty r1 + std::ranges::fill(out, 0xCD); + std::vector v1_empty; + auto res = oneapi::dpl::ranges::set_union(oneapi::dpl::execution::seq, v1_empty, v2, out); + auto out_subrange_expected = std::views::all(v2); + auto out_subrange = std::ranges::subrange(std::ranges::begin(out), res.out); + EXPECT_EQ_RANGES(out_subrange_expected, out_subrange, "wrong result with seq policy, empty r1"); + EXPECT_EQ(0, res.in1 - std::ranges::begin(v1_empty), "wrong res.in1"); + EXPECT_EQ(std::ranges::size(v2), res.in2 - std::ranges::begin(v2), "wrong res.in2"); + EXPECT_EQ(std::ranges::size(v2), res.out - std::ranges::begin(out), "wrong res.out"); + } + { + // Empty r2 + std::ranges::fill(out, 0xCD); + std::vector v2_empty; + auto res = oneapi::dpl::ranges::set_union(oneapi::dpl::execution::seq, v1, v2_empty, out); + auto out_subrange_expected = std::views::all(v1); + auto out_subrange = std::ranges::subrange(std::ranges::begin(out), res.out); + EXPECT_EQ_RANGES(out_subrange_expected, out_subrange, "wrong result with seq policy, empty r2"); + EXPECT_EQ(std::ranges::size(v1), res.in1 - std::ranges::begin(v1), "wrong res.in1"); + EXPECT_EQ(0, res.in2 - std::ranges::begin(v2_empty), "wrong res.in2"); + EXPECT_EQ(std::ranges::size(v1), res.out - std::ranges::begin(out), "wrong res.out"); + } + { + // - When a pair matches, an element from the first sequence is copied + // - Order is preserved + // - Predicate + two projections + // TODO: use zip_view with c++23 onwards + std::vector> kv1 = {{1, 1}, {3, 11}, {3, 12}, {4, 1}}; + std::vector> kv2 = {{0, 2}, {3, 2}, {3, 2}, {3, 21}}; + std::vector> kv_out(6, {0xCD, 0xCD}); + std::vector k_out_expected = {0, 1, 3, 3, 3, 4}; + std::vector v_out_expected = {2, 1, 11, 12, 21, 1}; + std::vector k_out(std::ranges::size(kv_out)); + std::vector v_out(std::ranges::size(kv_out)); + + auto proj = [](const std::pair& p) { return p.first; }; + oneapi::dpl::ranges::set_union(oneapi::dpl::execution::seq, kv1, kv2, kv_out, std::ranges::less{}, proj, proj); + + std::ranges::transform(kv_out, k_out.begin(), [](const auto& p) { return p.first; }); + std::ranges::transform(kv_out, v_out.begin(), [](const auto& p) { return p.second; }); + EXPECT_EQ_RANGES(k_out_expected, k_out, "wrong result with seq policy, wrong keys"); + EXPECT_EQ_RANGES(v_out_expected, v_out, "wrong result with seq policy, wrong values"); + } + { + // Reverse order + std::vector v3 = {3, 2, 1}; + std::vector v4 = {2, 1, 0}; + std::vector out2(4, 0xCD); + std::vector out2_expected = {3, 2, 1, 0}; + oneapi::dpl::ranges::set_union(oneapi::dpl::execution::seq, v3, v4, out2, std::ranges::greater{}); + EXPECT_EQ_RANGES(out2_expected, out2, "wrong result with seq policy"); + } + // Different projections/types are tested in test_mixed_types_host + + // TODO: test type requirements +} + +void test_mixed_types_host() +{ + std::vector r1 = {{1}, {2}, {3}}; + std::vector r2 = {{0}, {2}, {2}, {3}}; + + std::vector out_expected = {0, 1, 2, 2, 3}; + + std::vector out_seq(5, 0xCD); + std::vector out_par(5, 0xCD); + std::vector out_unseq(5, 0xCD); + std::vector out_par_unseq(5, 0xCD); + + auto proj_a = [](const A& a) { return a.a; }; + auto proj_b = [](const B& b) { return b.b; }; + + oneapi::dpl::ranges::set_union( + oneapi::dpl::execution::seq, r1, r2, out_seq, std::ranges::less{}, proj_a, proj_b); + oneapi::dpl::ranges::set_union( + oneapi::dpl::execution::par, r1, r2, out_par, std::ranges::less{}, proj_a, proj_b); + oneapi::dpl::ranges::set_union( + oneapi::dpl::execution::unseq, r1, r2, out_unseq, std::ranges::less{}, proj_a, proj_b); + oneapi::dpl::ranges::set_union( + oneapi::dpl::execution::par_unseq, r1, r2, out_par_unseq, std::ranges::less{}, proj_a, proj_b); + + EXPECT_EQ_RANGES(out_expected, out_seq, "wrong result with seq policy"); + EXPECT_EQ_RANGES(out_expected, out_par, "wrong result with par policy"); + EXPECT_EQ_RANGES(out_expected, out_unseq, "wrong result with unseq policy"); + EXPECT_EQ_RANGES(out_expected, out_par_unseq, "wrong result with par_unseq policy"); +} + +#if TEST_DPCPP_BACKEND_PRESENT +void test_mixed_types_device() +{ + auto policy = TestUtils::get_dpcpp_test_policy(); + sycl::queue q = policy.queue(); + if (q.get_device().has(sycl::aspect::usm_shared_allocations)) + { + using r1_alloc_t = sycl::usm_allocator; + using r2_alloc_t = sycl::usm_allocator; + using r_out_alloc_t = sycl::usm_allocator; + + std::vector v1({{1}, {2}, {3}}, r1_alloc_t(q)); + std::vector v2({{0}, {2}, {2}, {3}}, r2_alloc_t(q)); + std::vector out(5, 0xCD, r_out_alloc_t(q)); + std::vector out_expected = {0, 1, 2, 2, 3}; + + // Wrap vector with a USM allocator into the subrange because it is not device copyable + std::ranges::subrange r1(v1.data(), v1.data() + 3); + std::ranges::subrange r2(v2.data(), v2.data() + 4); + std::ranges::subrange r_out(out.data(), out.data() + 5); + + auto proj_a = [](const A& a) { return a.a; }; + auto proj_b = [](const B& b) { return b.b; }; + + oneapi::dpl::ranges::set_union(policy, r1, r2, r_out, std::ranges::less{}, proj_a, proj_b); + EXPECT_EQ_RANGES(out_expected, out, "wrong result with device policy"); + } +} +#endif // TEST_DPCPP_BACKEND_PRESENT +#endif // _ENABLE_STD_RANGES_TESTING + +int +main() +{ + bool bProcessed = false; + +#if _ENABLE_STD_RANGES_TESTING + using namespace test_std_ranges; + namespace dpl_ranges = oneapi::dpl::ranges; + + test_serial_set_union(); + auto set_union_checker = [](auto&&... args) + { + return oneapi::dpl::ranges::set_union(oneapi::dpl::execution::seq, + std::forward(args)...); + }; + + // TODO: use data_in_in_out_lim when set_union supports + // output range not-sufficiently large to hold all the processed elements + + test_range_algo<0, int, data_in_in_out, mul1_t, div3_t>{big_sz}(dpl_ranges::set_union, set_union_checker); + test_range_algo<1, int, data_in_in_out, mul1_t, div3_t>{big_sz}(dpl_ranges::set_union, set_union_checker, std::ranges::less{}, proj); + + // Testing the cut-off with the serial implementation (less than __set_algo_cut_off) + test_range_algo<2, int, data_in_in_out, mul1_t, div3_t>{100}(dpl_ranges::set_union, set_union_checker, std::ranges::less{}, proj, proj); + + test_range_algo<3, P2, data_in_in_out, mul1_t, div3_t>{}(dpl_ranges::set_union, set_union_checker, std::ranges::less{}, &P2::x, &P2::x); + test_range_algo<4, P2, data_in_in_out, mul1_t, div3_t>{}(dpl_ranges::set_union, set_union_checker, std::ranges::less{}, &P2::proj, &P2::proj); + + test_mixed_types_host(); +#if TEST_DPCPP_BACKEND_PRESENT + test_mixed_types_device(); +#endif // TEST_DPCPP_BACKEND_PRESENT + + bProcessed = true; + +#endif //_ENABLE_STD_RANGES_TESTING + + return TestUtils::done(bProcessed); +} diff --git a/test/parallel_api/ranges/std_ranges_test.h b/test/parallel_api/ranges/std_ranges_test.h index 39f5e5224a2..7a2410b930f 100644 --- a/test/parallel_api/ranges/std_ranges_test.h +++ b/test/parallel_api/ranges/std_ranges_test.h @@ -89,6 +89,11 @@ auto pred1 = [](auto&& val) -> bool { return val > 0; }; auto pred2 = [](auto&& val) -> bool { return val == 4; }; auto pred3 = [](auto&& val) -> bool { return val < 0; }; +auto mul1 = [](auto&& v) { return v; }; +using mul1_t = decltype(mul1); +auto div3 = [](auto&& v) { return v / 3; }; +using div3_t = decltype(div3); + struct P2 { P2() {} @@ -171,6 +176,16 @@ constexpr int trivial_size{0}; template constexpr int calc_res_size(int n, int) { return n; } +// If in1 range is empty, then the out range is always empty +// Can be specialized with an algorithm type if the behaviour is different, e.g. see set_union test. +template +int out_size_with_empty_in1(int) { return 0; }; + +// If in2 range is empty, then the out range is always empty +// Can be specialized with an algorithm type if the behaviour is different, e.g. see set_union test. +template +int out_size_with_empty_in2(int) { return 0; }; + auto data_gen2_default = [](auto i) { return i % 5 ? i : 0;}; auto data_gen_zero = [](auto) { return 0;}; @@ -490,15 +505,6 @@ struct test test_dangling_pointers<2, 300>(exec, algo, std::forward(args)...); } - struct TransformOp - { - template - auto operator()(T i) const - { - return i / 3; - } - }; - template void process_data_in_in_out(int max_n, int n_in1, int n_in2, int n_out, Policy&& exec, Algo algo, Checker& checker, @@ -507,7 +513,7 @@ struct test static_assert(mode == data_in_in_out || mode == data_in_in_out_lim); Container cont_in1(exec, n_in1, DataGen1{}); - Container cont_in2(exec, n_in2, TransformOp{}); + Container cont_in2(exec, n_in2, DataGen2{}); Container cont_out(exec, n_out, data_gen_zero); Container cont_exp(exec, n_out, data_gen_zero); @@ -557,6 +563,8 @@ struct test //test cases with empty sequence(s) process_data_in_in_out(max_n, 0, 0, 0, CLONE_TEST_POLICY(exec), algo, checker, args...); + process_data_in_in_out(max_n, 0, r_size, out_size_with_empty_in1(r_size), CLONE_TEST_POLICY(exec), algo, checker, args...); + process_data_in_in_out(max_n, r_size, 0, out_size_with_empty_in2(r_size), CLONE_TEST_POLICY(exec), algo, checker, args...); } template