@@ -256,7 +256,10 @@ class RangesInDataPartsBuilder
256
256
return ;
257
257
}
258
258
259
- ranges_in_data_parts[it->second ].ranges .push_back (mark_range);
259
+ if (ranges_in_data_parts[it->second ].ranges .back ().end == mark_range.begin )
260
+ ranges_in_data_parts[it->second ].ranges .back ().end = mark_range.end ;
261
+ else
262
+ ranges_in_data_parts[it->second ].ranges .push_back (mark_range);
260
263
}
261
264
262
265
RangesInDataParts & getCurrentRangesInDataParts ()
@@ -289,6 +292,10 @@ struct PartsRangesIterator
289
292
290
293
if (event == other.event )
291
294
{
295
+ if (!selected && other.selected )
296
+ return true ;
297
+ if (selected && !other.selected )
298
+ return false ;
292
299
if (part_index == other.part_index )
293
300
{
294
301
// / Within the same part we should process events in order of mark numbers,
@@ -347,6 +354,7 @@ struct PartsRangesIterator
347
354
MarkRange range;
348
355
size_t part_index;
349
356
EventType event;
357
+ bool selected; // / Whether this range was selected or rejected in skip index filtering
350
358
};
351
359
352
360
struct PartRangeIndex
@@ -475,7 +483,8 @@ SplitPartsRangesResult splitPartsRanges(RangesInDataParts ranges_in_data_parts,
475
483
in_reverse_order,
476
484
range,
477
485
part_index,
478
- PartsRangesIterator::EventType::RangeStart});
486
+ PartsRangesIterator::EventType::RangeStart,
487
+ false });
479
488
480
489
const bool value_is_defined_at_end_mark = range.end < index_granularity->getMarksCount ();
481
490
if (!value_is_defined_at_end_mark)
@@ -486,7 +495,8 @@ SplitPartsRangesResult splitPartsRanges(RangesInDataParts ranges_in_data_parts,
486
495
in_reverse_order,
487
496
range,
488
497
part_index,
489
- PartsRangesIterator::EventType::RangeEnd});
498
+ PartsRangesIterator::EventType::RangeEnd,
499
+ false });
490
500
}
491
501
}
492
502
@@ -713,7 +723,8 @@ SplitPartsByRanges splitIntersectingPartsRangesIntoLayers(
713
723
in_reverse_order,
714
724
range,
715
725
part_index,
716
- PartsRangesIterator::EventType::RangeStart};
726
+ PartsRangesIterator::EventType::RangeStart,
727
+ false };
717
728
PartRangeIndex parts_range_start_index (parts_range_start);
718
729
parts_ranges_queue.push ({std::move (parts_range_start), std::move (parts_range_start_index)});
719
730
@@ -726,7 +737,8 @@ SplitPartsByRanges splitIntersectingPartsRangesIntoLayers(
726
737
in_reverse_order,
727
738
range,
728
739
part_index,
729
- PartsRangesIterator::EventType::RangeEnd};
740
+ PartsRangesIterator::EventType::RangeEnd,
741
+ false };
730
742
PartRangeIndex parts_range_end_index (parts_range_end);
731
743
parts_ranges_queue.push ({std::move (parts_range_end), std::move (parts_range_end_index)});
732
744
}
@@ -911,6 +923,130 @@ static ASTs buildFilters(const KeyDescription & primary_key, const std::vector<V
911
923
return filters;
912
924
}
913
925
926
+ RangesInDataParts findPKRangesForFinalAfterSkipIndexImpl (RangesInDataParts & ranges_in_data_parts, bool cannot_sort_primary_key, const LoggerPtr & logger)
927
+ {
928
+ IndexAccess index_access (ranges_in_data_parts);
929
+ std::vector<PartsRangesIterator> selected_ranges;
930
+ std::vector<PartsRangesIterator> rejected_ranges;
931
+
932
+ RangesInDataPartsBuilder result (ranges_in_data_parts);
933
+
934
+ auto skip_and_return_all_part_ranges = [&]()
935
+ {
936
+ RangesInDataParts all_part_ranges (std::move (ranges_in_data_parts));
937
+ for (auto & all_part_range : all_part_ranges)
938
+ {
939
+ const auto & index_granularity = all_part_range.data_part ->index_granularity ;
940
+ all_part_range.ranges = MarkRanges{{MarkRange{0 , index_granularity->getMarksCountWithoutFinal ()}}};
941
+ }
942
+ return all_part_ranges;
943
+ };
944
+
945
+ if (cannot_sort_primary_key) // / just expand to all parts + ranges
946
+ {
947
+ return skip_and_return_all_part_ranges ();
948
+ }
949
+
950
+ for (size_t part_index = 0 ; part_index < ranges_in_data_parts.size (); ++part_index)
951
+ {
952
+ const auto & index_granularity = ranges_in_data_parts[part_index].data_part ->index_granularity ;
953
+ std::vector<bool > is_selected_range (index_granularity->getMarksCountWithoutFinal (), false );
954
+ for (const auto & range : ranges_in_data_parts[part_index].ranges )
955
+ {
956
+ const bool value_is_defined_at_end_mark = range.end < index_granularity->getMarksCount ();
957
+ if (!value_is_defined_at_end_mark)
958
+ {
959
+ return skip_and_return_all_part_ranges ();
960
+ }
961
+
962
+ selected_ranges.push_back (
963
+ {index_access.getValue (part_index, range.begin ), false , range, part_index, PartsRangesIterator::EventType::RangeStart, true });
964
+ for (auto i = range.begin ; i < range.end ;i++)
965
+ is_selected_range[i] = true ;
966
+ }
967
+
968
+ for (size_t range_begin = 0 ; range_begin < is_selected_range.size (); range_begin++)
969
+ {
970
+ const bool value_is_defined_at_end_mark = ((range_begin + 1 ) < index_granularity->getMarksCount ());
971
+ if (!value_is_defined_at_end_mark)
972
+ {
973
+ return skip_and_return_all_part_ranges ();
974
+ }
975
+
976
+ if (is_selected_range[range_begin])
977
+ continue ;
978
+ MarkRange rejected_range (range_begin, range_begin + 1 );
979
+ rejected_ranges.push_back (
980
+ {index_access.getValue (part_index, rejected_range.begin ), false , rejected_range, part_index, PartsRangesIterator::EventType::RangeStart, false });
981
+ }
982
+ }
983
+
984
+ ::sort (selected_ranges.begin(), selected_ranges.end());
985
+
986
+ ::sort (rejected_ranges.begin(), rejected_ranges.end());
987
+
988
+ LOG_TRACE (logger, " findPKRangesForFinalAfterSkipIndex : sorting phase complete" );
989
+
990
+ std::vector<PartsRangesIterator>::iterator selected_ranges_iter = selected_ranges.begin ();
991
+ std::vector<PartsRangesIterator>::iterator rejected_ranges_iter = rejected_ranges.begin ();
992
+ size_t more_ranges_added = 0 ;
993
+
994
+ while (selected_ranges_iter != selected_ranges.end () && rejected_ranges_iter != rejected_ranges.end ())
995
+ {
996
+ auto selected_range_start = selected_ranges_iter->value ;
997
+ auto selected_range_end = index_access.getValue (selected_ranges_iter->part_index , selected_ranges_iter->range .end );
998
+ auto rejected_range_start = rejected_ranges_iter->value ;
999
+
1000
+ int result1 = compareValues (rejected_range_start, selected_range_start, false );
1001
+ int result2 = compareValues (rejected_range_start, selected_range_end, false );
1002
+
1003
+ if (result1 == 0 || result2 == 0 || (result1 > 0 && result2 < 0 )) // / rejected_range_start inside [selected_range]
1004
+ {
1005
+ result.addRange (rejected_ranges_iter->part_index , rejected_ranges_iter->range );
1006
+ rejected_ranges_iter++;
1007
+ more_ranges_added++;
1008
+ }
1009
+ else if (result1 > 0 ) // / rejected_range_start beyond [selected_range]
1010
+ {
1011
+ result.addRange (selected_ranges_iter->part_index , selected_ranges_iter->range );
1012
+ selected_ranges_iter++;
1013
+ }
1014
+ else
1015
+ {
1016
+ auto rejected_range_end = index_access.getValue (rejected_ranges_iter->part_index , rejected_ranges_iter->range .end );
1017
+ int result3 = compareValues (rejected_range_end, selected_range_start, false );
1018
+ int result4 = compareValues (rejected_range_end, selected_range_end, false );
1019
+ // / rejected_range_end inside [selected range] OR [rejected range] encompasses [selected range]
1020
+ if (result3 == 0 || result4 == 0 || (result3 > 0 && result4 < 0 ) || (result1 < 0 && result4 > 0 ))
1021
+ {
1022
+ result.addRange (rejected_ranges_iter->part_index , rejected_ranges_iter->range );
1023
+ more_ranges_added++;
1024
+ }
1025
+ rejected_ranges_iter++;
1026
+ }
1027
+ }
1028
+
1029
+ while (selected_ranges_iter != selected_ranges.end ())
1030
+ {
1031
+ result.addRange (selected_ranges_iter->part_index , selected_ranges_iter->range );
1032
+ selected_ranges_iter++;
1033
+ }
1034
+
1035
+ auto result_final_ranges = result.getCurrentRangesInDataParts ();
1036
+ std::stable_sort (
1037
+ result_final_ranges.begin (),
1038
+ result_final_ranges.end (),
1039
+ [](const auto & lhs, const auto & rhs) { return lhs.part_index_in_query < rhs.part_index_in_query ; });
1040
+ for (auto & result_final_range : result_final_ranges)
1041
+ {
1042
+ std::sort (result_final_range.ranges .begin (), result_final_range.ranges .end ());
1043
+ }
1044
+
1045
+ LOG_TRACE (logger, " findPKRangesForFinalAfterSkipIndex : processed {} parts, initially selected {} ranges & rejected {}, more {} ranges added" , ranges_in_data_parts.size (), selected_ranges.size (), rejected_ranges.size (), more_ranges_added);
1046
+
1047
+ return result_final_ranges;
1048
+ }
1049
+
914
1050
static void reorderColumns (ActionsDAG & dag, const Block & header, const std::string & filter_column)
915
1051
{
916
1052
std::unordered_map<std::string_view, const ActionsDAG::Node *> inputs_map;
@@ -1056,4 +1192,18 @@ Pipes readByLayers(
1056
1192
return merging_pipes;
1057
1193
}
1058
1194
1195
+ RangesInDataParts findPKRangesForFinalAfterSkipIndex (
1196
+ const KeyDescription & primary_key,
1197
+ const KeyDescription & sorting_key,
1198
+ RangesInDataParts & ranges_in_data_parts,
1199
+ const LoggerPtr & logger)
1200
+ {
1201
+ bool cannot_sort_primary_key = false ;
1202
+ if (!isSafePrimaryKey (primary_key) || !sorting_key.reverse_flags .empty ())
1203
+ {
1204
+ LOG_TRACE (logger, " Primary key is not sortable, expanding PK range to entire due to exact_mode." );
1205
+ cannot_sort_primary_key = true ;
1206
+ }
1207
+ return findPKRangesForFinalAfterSkipIndexImpl (ranges_in_data_parts, cannot_sort_primary_key, logger);
1208
+ }
1059
1209
}
0 commit comments