Skip to content

Commit 0abde0f

Browse files
committed
[BugFix] Fix topn with large limit offset regression (backport #56590)
Signed-off-by: stdpain <[email protected]> Signed-off-by: stdpain <[email protected]>
1 parent 34973ab commit 0abde0f

6 files changed

+61
-34
lines changed

be/src/exec/chunks_sorter.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "runtime/current_thread.h"
2525
#include "runtime/runtime_state.h"
2626
#include "util/orlp/pdqsort.h"
27+
#include "util/runtime_profile.h"
2728
#include "util/stopwatch.hpp"
2829

2930
namespace starrocks {
@@ -58,6 +59,7 @@ void ChunksSorter::setup_runtime(RuntimeState* state, RuntimeProfile* profile, M
5859
_sort_timer = ADD_TIMER(profile, "SortingTime");
5960
_merge_timer = ADD_TIMER(profile, "MergingTime");
6061
_output_timer = ADD_TIMER(profile, "OutputTime");
62+
_sort_cnt = ADD_COUNTER(profile, "SortingCnt", TUnit::UNIT);
6163
profile->add_info_string("SortKeys", _sort_keys);
6264
profile->add_info_string("SortType", _is_topn ? "TopN" : "All");
6365
}

be/src/exec/chunks_sorter.h

+1
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ class ChunksSorter {
154154
RuntimeProfile::Counter* _sort_timer = nullptr;
155155
RuntimeProfile::Counter* _merge_timer = nullptr;
156156
RuntimeProfile::Counter* _output_timer = nullptr;
157+
RuntimeProfile::Counter* _sort_cnt = nullptr;
157158

158159
size_t _revocable_mem_bytes = 0;
159160
spill::SpillStrategy _spill_strategy = spill::SpillStrategy::NO_SPILL;

be/src/exec/chunks_sorter_topn.cpp

+29-23
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "runtime/runtime_state.h"
2828
#include "types/logical_type_infra.h"
2929
#include "util/orlp/pdqsort.h"
30+
#include "util/runtime_profile.h"
3031
#include "util/stopwatch.hpp"
3132

3233
namespace starrocks {
@@ -68,6 +69,8 @@ ChunksSorterTopn::ChunksSorterTopn(RuntimeState* state, const std::vector<ExprCo
6869
_topn_type(topn_type) {
6970
DCHECK_GT(_get_number_of_rows_to_sort(), 0) << "output rows can't be empty";
7071
DCHECK(_topn_type == TTopNType::ROW_NUMBER || _offset == 0);
72+
_init_buffered_chunks = tunning_buffered_chunks(_get_number_of_rows_to_sort());
73+
_buffered_chunks_capacity = _init_buffered_chunks;
7174
auto& raw_chunks = _raw_chunks.chunks;
7275
// avoid too large buffer chunks
7376
raw_chunks.reserve(std::min<size_t>(max_buffered_chunks, 256));
@@ -88,43 +91,35 @@ Status ChunksSorterTopn::update(RuntimeState* state, const ChunkPtr& chunk) {
8891
}
8992
auto& raw_chunks = _raw_chunks.chunks;
9093
size_t chunk_number = raw_chunks.size();
94+
size_t prev_chunk_memusage = 0;
9195
if (chunk_number <= 0) {
9296
raw_chunks.push_back(chunk);
9397
chunk_number++;
9498
} else if (raw_chunks[chunk_number - 1]->num_rows() + chunk->num_rows() > _state->chunk_size()) {
9599
raw_chunks.push_back(chunk);
96100
chunk_number++;
97101
} else {
102+
prev_chunk_memusage = raw_chunks[chunk_number - 1]->memory_usage();
98103
// Old planner will not remove duplicated sort column.
99104
// columns in chunk may have same column ptr
100105
// append_safe will check size of all columns in dest chunk
101106
// to ensure same column will not apppend repeatedly.
102107
raw_chunks[chunk_number - 1]->append_safe(*chunk);
103108
}
109+
_raw_chunks.update_mem_usage(raw_chunks[chunk_number - 1]->memory_usage() - prev_chunk_memusage);
104110
_raw_chunks.size_of_rows += chunk->num_rows();
105111

106112
// Avoid TOPN from using too much memory.
107-
bool exceed_mem_limit = _raw_chunks.mem_usage() > _max_buffered_bytes;
113+
bool exceed_mem_limit = _raw_chunks.mem_usage > _max_buffered_bytes;
108114
if (exceed_mem_limit) {
109115
return _sort_chunks(state);
110116
}
111117

112-
// Try to accumulate more chunks.
113-
size_t rows_to_sort = _get_number_of_rows_to_sort();
114-
if (_merged_runs.num_rows() + _raw_chunks.size_of_rows < rows_to_sort) {
115-
return Status::OK();
116-
}
117-
118-
// We have accumulated rows_to_sort rows to build merged runs.
119-
if (_merged_runs.num_rows() <= rows_to_sort) {
120-
return _sort_chunks(state);
121-
}
122-
123-
// When number of Chunks exceeds _limit or _max_buffered_chunks, run sort and then part of
118+
// When number of Chunks exceeds _buffered_chunks_capacity or rows greater than _max_buffered_rows , run sort and then part of
124119
// cached chunks can be dropped, so it can reduce the memory usage.
125120
// TopN caches _limit or _max_buffered_chunks primitive chunks,
126121
// performs sorting once, and discards extra rows
127-
if (chunk_number >= _max_buffered_chunks || _raw_chunks.size_of_rows > _max_buffered_rows) {
122+
if (chunk_number >= _buffered_chunks_capacity || _raw_chunks.size_of_rows > _max_buffered_rows) {
128123
return _sort_chunks(state);
129124
}
130125

@@ -228,6 +223,9 @@ size_t ChunksSorterTopn::get_output_rows() const {
228223
}
229224

230225
Status ChunksSorterTopn::_sort_chunks(RuntimeState* state) {
226+
if (_sort_cnt) {
227+
COUNTER_UPDATE(_sort_cnt, 1);
228+
}
231229
// Chunks for this batch.
232230
DataSegments segments;
233231

@@ -569,6 +567,7 @@ Status ChunksSorterTopn::_merge_sort_common(MergedRuns* dst, DataSegments& segme
569567
}
570568

571569
if (_merged_runs.num_chunks() > 1 || _merged_runs.mem_usage() > _max_buffered_bytes) {
570+
_adjust_chunks_capacity(true);
572571
// merge to multi sorted chunks
573572
RETURN_IF_ERROR(merge_sorted_chunks(_sort_desc, _sort_exprs, _merged_runs, std::move(right_unique_chunk),
574573
rows_to_keep, dst));
@@ -583,24 +582,31 @@ Status ChunksSorterTopn::_merge_sort_common(MergedRuns* dst, DataSegments& segme
583582
// prepare right chunk
584583
ChunkPtr right_chunk = std::move(right_unique_chunk);
585584

586-
Permutation merged_perm;
587-
merged_perm.reserve(left_chunk->num_rows() + right_chunk->num_rows());
588-
589-
RETURN_IF_ERROR(merge_sorted_chunks_two_way(_sort_desc, {left_chunk, left_columns},
590-
{right_chunk, right_columns}, &merged_perm));
591-
CHECK_GE(merged_perm.size(), rows_to_keep);
592-
merged_perm.resize(rows_to_keep);
585+
const SortedRun left = {left_chunk, left_columns};
586+
const SortedRun right = {right_chunk, right_columns};
587+
bool intersected = !left.empty() && !right.empty() && !left.intersect(_sort_desc, right);
588+
// adjust chunks capacity
589+
_adjust_chunks_capacity(intersected);
593590

594-
// materialize into the dst runs
595-
std::vector<ChunkPtr> chunks{left_chunk, right_chunk};
596591
ChunkUniquePtr big_chunk;
597592
if (dst->num_chunks() == 0) {
598593
big_chunk = segments[permutation_second[0].chunk_index].chunk->clone_empty(rows_to_keep);
599594
} else {
600595
big_chunk = std::move(dst->front().chunk);
601596
dst->pop_front();
602597
}
598+
599+
Permutation merged_perm;
600+
merged_perm.reserve(left_chunk->num_rows() + right_chunk->num_rows());
601+
RETURN_IF_ERROR(merge_sorted_chunks_two_way(_sort_desc, left, right, &merged_perm));
602+
CHECK_GE(merged_perm.size(), rows_to_keep);
603+
merged_perm.resize(rows_to_keep);
604+
605+
// materialize into the dst runs
606+
std::vector<ChunkPtr> chunks{left_chunk, right_chunk};
607+
603608
materialize_by_permutation(big_chunk.get(), chunks, merged_perm);
609+
604610
RETURN_IF_ERROR(big_chunk->upgrade_if_overflow());
605611
ASSIGN_OR_RETURN(auto run, MergedRun::build(std::move(big_chunk), *_sort_exprs));
606612
dst->push_back(std::move(run));

be/src/exec/chunks_sorter_topn.h

+27-9
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,11 @@ class ChunksSorterTopn : public ChunksSorter {
4848
if (limit <= 65536) {
4949
return 64;
5050
}
51-
return std::max<size_t>(256, limit / 4096);
51+
return 256;
52+
}
53+
54+
static constexpr size_t max_buffered_chunks(size_t rows_to_sort) {
55+
return std::max<size_t>(tunning_buffered_chunks(rows_to_sort), rows_to_sort / 4069);
5256
}
5357

5458
/**
@@ -78,7 +82,7 @@ class ChunksSorterTopn : public ChunksSorter {
7882

7983
size_t get_output_rows() const override;
8084

81-
int64_t mem_usage() const override { return _raw_chunks.mem_usage() + _merged_runs.mem_usage(); }
85+
int64_t mem_usage() const override { return _raw_chunks.mem_usage + _merged_runs.mem_usage(); }
8286

8387
void setup_runtime(RuntimeState* state, RuntimeProfile* profile, MemTracker* parent_mem_tracker) override;
8488

@@ -154,23 +158,20 @@ class ChunksSorterTopn : public ChunksSorter {
154158
struct RawChunks {
155159
std::vector<ChunkPtr> chunks;
156160
size_t size_of_rows = 0;
161+
size_t mem_usage = 0;
157162

158-
int64_t mem_usage() const {
159-
int64_t usage = 0;
160-
for (auto& chunk : chunks) {
161-
usage += chunk->memory_usage();
162-
}
163-
return usage;
164-
}
163+
void update_mem_usage(size_t delta) { mem_usage += delta; }
165164

166165
void clear() {
167166
chunks.clear();
168167
size_of_rows = 0;
168+
mem_usage = 0;
169169
}
170170
};
171171
const size_t _max_buffered_rows;
172172
const size_t _max_buffered_bytes;
173173
const size_t _max_buffered_chunks;
174+
size_t _init_buffered_chunks;
174175
RawChunks _raw_chunks;
175176
bool _init_merged_segment;
176177
MergedRuns _merged_runs;
@@ -179,6 +180,23 @@ class ChunksSorterTopn : public ChunksSorter {
179180
const size_t _offset;
180181
const TTopNType::type _topn_type;
181182

183+
int _highest_nozero_pos(size_t val) {
184+
if (val == 0) {
185+
return 0;
186+
}
187+
return (sizeof(size_t) * 8) - __builtin_clzll(val) - 1;
188+
}
189+
190+
void _adjust_chunks_capacity(bool inc) {
191+
if (inc) {
192+
size_t shift = (_highest_nozero_pos(_max_buffered_chunks) - _highest_nozero_pos(_init_buffered_chunks)) / 4;
193+
shift = std::max<size_t>(shift, 1);
194+
_buffered_chunks_capacity = _buffered_chunks_capacity << shift;
195+
_buffered_chunks_capacity = std::min(_buffered_chunks_capacity, _max_buffered_chunks);
196+
}
197+
}
198+
199+
size_t _buffered_chunks_capacity;
182200
std::vector<JoinRuntimeFilter*> _runtime_filter;
183201

184202
RuntimeProfile::Counter* _sort_filter_rows = nullptr;

be/src/exec/pipeline/sort/partition_sort_sink_operator.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ OperatorPtr PartitionSortSinkOperatorFactory::create(int32_t dop, int32_t driver
121121
runtime_state(), &(_sort_exec_exprs.lhs_ordering_expr_ctxs()), &_is_asc_order, &_is_null_first,
122122
_sort_keys, 0, _limit + _offset);
123123
} else {
124-
size_t max_buffered_chunks = ChunksSorterTopn::tunning_buffered_chunks(_limit);
124+
size_t max_buffered_chunks = ChunksSorterTopn::max_buffered_chunks(_limit);
125125
chunks_sorter = std::make_unique<ChunksSorterTopn>(
126126
runtime_state(), &(_sort_exec_exprs.lhs_ordering_expr_ctxs()), &_is_asc_order, &_is_null_first,
127127
_sort_keys, 0, _limit + _offset, _topn_type, _max_buffered_rows, _max_buffered_bytes,

be/src/exec/topn_node.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ Status TopNNode::_consume_chunks(RuntimeState* state, ExecNode* child) {
225225
_chunks_sorter = std::make_unique<ChunksSorterTopn>(
226226
state, &(_sort_exec_exprs.lhs_ordering_expr_ctxs()), &_is_asc_order, &_is_null_first, _sort_keys,
227227
_offset, _limit, TTopNType::ROW_NUMBER, ChunksSorterTopn::kDefaultMaxBufferRows,
228-
ChunksSorterTopn::kDefaultMaxBufferBytes, ChunksSorterTopn::tunning_buffered_chunks(_limit));
228+
ChunksSorterTopn::kDefaultMaxBufferBytes, ChunksSorterTopn::max_buffered_chunks(_limit));
229229
}
230230

231231
} else {

0 commit comments

Comments
 (0)