@@ -2948,6 +2948,40 @@ __pattern_remove_if(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec,
2948
2948
// merge
2949
2949
// ------------------------------------------------------------------------
2950
2950
2951
+ template <std::random_access_iterator It1, std::random_access_iterator It2, std::random_access_iterator ItOut, typename _Comp>
2952
+ std::pair<It1, It2>
2953
+ __brick_merge (It1 __it_1, It1 __it_1_e, It2 __it_2, It2 __it_2_e, ItOut __it_out, ItOut __it_out_e, _Comp __comp)
2954
+ {
2955
+ while (__it_1 != __it_1_e && __it_2 != __it_2_e)
2956
+ {
2957
+ if (__comp (*__it_1, *__it_2))
2958
+ {
2959
+ *__it_out = *__it_1;
2960
+ ++__it_out, ++__it_1;
2961
+ }
2962
+ else
2963
+ {
2964
+ *__it_out = *__it_2;
2965
+ ++__it_out, ++__it_2;
2966
+ }
2967
+ if (__it_out == __it_out_e)
2968
+ return {__it_1, __it_2};
2969
+ }
2970
+
2971
+ if (__it_1 == __it_1_e)
2972
+ {
2973
+ for (; __it_2 != __it_2_e && __it_out != __it_out_e; ++__it_2, ++__it_out)
2974
+ *__it_out = *__it_2;
2975
+ }
2976
+ else
2977
+ {
2978
+ // assert(__it_2 == __it_2_e);
2979
+ for (; __it_1 != __it_1_e && __it_out != __it_out_e; ++__it_1, ++__it_out)
2980
+ *__it_out = *__it_1;
2981
+ }
2982
+ return {__it_1, __it_2};
2983
+ }
2984
+
2951
2985
template <class _ForwardIterator1 , class _ForwardIterator2 , class _OutputIterator , class _Compare >
2952
2986
_OutputIterator
2953
2987
__brick_merge (_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
@@ -2980,13 +3014,106 @@ __pattern_merge(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIt
2980
3014
typename _Tag::__is_vector{});
2981
3015
}
2982
3016
3017
+ template <class ForwardIt , class T = typename std::iterator_traits<ForwardIt>::value_type,
3018
+ class Compare >
3019
+ ForwardIt lower_bound_2 (ForwardIt first, ForwardIt last, const T& value, Compare comp)
3020
+ {
3021
+ ForwardIt it;
3022
+ typename std::iterator_traits<ForwardIt>::difference_type count, step;
3023
+ count = std::distance (first, last);
3024
+
3025
+ while (count > 0 )
3026
+ {
3027
+ it = first;
3028
+ step = count / 2 ;
3029
+ std::advance (it, step);
3030
+
3031
+ std::cout << " it: " << *it << " " ;
3032
+ if (comp (*it, value))
3033
+ {
3034
+ first = ++it;
3035
+ count -= step + 1 ;
3036
+ }
3037
+ else
3038
+ count = step;
3039
+ }
3040
+
3041
+ std::cout << " first: " << *first << " " ;
3042
+ std::cout << std::endl;
3043
+ return first;
3044
+ }
3045
+
3046
+ template <typename _IsVector, typename _ExecutionPolicy, typename _It1, typename _Index1, typename _It2,
3047
+ typename _Index2, typename _OutIt, typename _Index3, typename _Comp>
3048
+ std::pair<_It1, _It2>
3049
+ __pattern_merge_2 (__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _It1 __it_1, _Index1 __n_1, _It2 __it_2,
3050
+ _Index2 __n_2, _OutIt __it_out, _Index3 __n_out, _Comp __comp)
3051
+ {
3052
+ using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag;
3053
+
3054
+ _It1 __it_res_1;
3055
+ _It2 __it_res_2;
3056
+
3057
+ __internal::__except_handler ([&]() {
3058
+ __par_backend::__parallel_for (__backend_tag{}, std::forward<_ExecutionPolicy>(__exec), _Index3 (0 ), __n_out,
3059
+ [=, &__it_res_1, &__it_res_2](_Index3 __i, _Index3 __j)
3060
+ {
3061
+ // a start merging point on the merge path; for each thread
3062
+ _Index1 __r = 0 ; // row index
3063
+ _Index2 __c = 0 ; // column index
3064
+
3065
+ if (__i > 0 )
3066
+ {
3067
+ // calc merge path intersection:
3068
+ const _Index3 __d_size = std::abs (std::max<_Index2>(0 , __i - __n_2) - (std::min<_Index1>(__i, __n_1) - 1 )) + 1 ;
3069
+
3070
+ auto __get_row = [__i, __n_1](auto __d) { return std::min<_Index1>(__i, __n_1) - __d - 1 ; };
3071
+ auto __get_column = [__i, __n_1](auto __d) { return std::max<_Index1>(0 , __i - __n_1 - 1 ) + __d + (__i / (__n_1 + 1 ) > 0 ? 1 : 0 ); };
3072
+
3073
+ oneapi::dpl::counting_iterator<_Index3> __it_d (0 );
3074
+
3075
+ auto __res_d = *std::lower_bound (__it_d, __it_d + __d_size, 1 ,
3076
+ [&](auto __d, auto __val) {
3077
+ auto __r = __get_row (__d);
3078
+ auto __c = __get_column (__d);
3079
+
3080
+ oneapi::dpl::__internal::__compare<_Comp, std::identity> __cmp{__comp, std::identity{}};
3081
+ const auto __res = (__cmp (__it_1[__r], __it_2[__c]) ? 1 : 0 );
3082
+
3083
+ return __res < __val;
3084
+ }
3085
+ );
3086
+
3087
+ // intersection point
3088
+ __r = __get_row (__res_d);
3089
+ __c = __get_column (__res_d);
3090
+ ++__r; // to get a merge matrix ceil, lying on the current diagonal
3091
+ }
3092
+
3093
+ // serial merge n elements, starting from input x and y, to [i, j) output range
3094
+ auto __res = __brick_merge (__it_1 + __r, __it_1 + __n_1,
3095
+ __it_2 + __c, __it_2 + __n_2,
3096
+ __it_out + __i, __it_out + __j, __comp);
3097
+
3098
+ if (__j == __n_out)
3099
+ {
3100
+ __it_res_1 = __res.first ;
3101
+ __it_res_2 = __res.second ;
3102
+ }
3103
+ }, /* _ONEDPL_MERGE_CUT_OFF*/ 10 );
3104
+ });
3105
+
3106
+ return {__it_res_1, __it_res_2};
3107
+ }
3108
+
2983
3109
template <class _IsVector , class _ExecutionPolicy , class _RandomAccessIterator1 , class _RandomAccessIterator2 ,
2984
3110
class _RandomAccessIterator3 , class _Compare >
2985
3111
_RandomAccessIterator3
2986
- __pattern_merge (__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1,
3112
+ __pattern_merge (__parallel_tag<_IsVector> __tag , _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1,
2987
3113
_RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2,
2988
3114
_RandomAccessIterator3 __d_first, _Compare __comp)
2989
3115
{
3116
+ #if 0
2990
3117
using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag;
2991
3118
2992
3119
return __internal::__except_handler([&]() {
@@ -2999,6 +3126,13 @@ __pattern_merge(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAcc
2999
3126
});
3000
3127
return __d_first + (__last1 - __first1) + (__last2 - __first2);
3001
3128
});
3129
+ #else
3130
+ auto __n_1 = __last1 - __first1;
3131
+ auto __n_2 = __last2 - __first2;
3132
+ auto __n_3 = __n_1 + __n_2;
3133
+ __pattern_merge_2 (__tag, std::forward<_ExecutionPolicy>(__exec), __first1, __n_1, __first2, __n_2, __d_first, __n_3, __comp);
3134
+ return __d_first + __n_3;
3135
+ #endif
3002
3136
}
3003
3137
3004
3138
// ------------------------------------------------------------------------
0 commit comments