Skip to content

Commit c521306

Browse files
committed
limit the state size to avoid excessive memory consumption
Signed-off-by: Murphy <[email protected]>
1 parent 085cc1c commit c521306

File tree

2 files changed

+30
-0
lines changed

2 files changed

+30
-0
lines changed

be/src/util/json_flattener.cpp

+11
Original file line numberDiff line numberDiff line change
@@ -473,7 +473,18 @@ void JsonPathDeriver::_derived(const Column* col, size_t mark_row) {
473473
}
474474
}
475475

476+
// TODO: incrementally find the top-k, rather than visit all nodes first
477+
// recursively visit all JSON items to remember common paths
476478
void JsonPathDeriver::_visit_json_paths(const vpack::Slice& value, JsonFlatPath* root, size_t mark_row) {
479+
const size_t COMMON_PATH_STATE_FACTOR = 10;
480+
size_t state_limit = config::json_flat_column_max > 0 ? COMMON_PATH_STATE_FACTOR * config::json_flat_column_max
481+
: std::numeric_limits<size_t>::max();
482+
483+
// Optimize memory usage by limiting the number of paths to prevent excessive memory consumption
484+
if (_derived_maps.size() >= state_limit) {
485+
return;
486+
}
487+
477488
vpack::ObjectIterator it(value, false);
478489

479490
for (; it.valid(); it.next()) {

be/test/util/json_flattener_test.cpp

+19
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,25 @@ TEST_F(JsonFlattenerTest, testDeepJson3) {
261261
EXPECT_EQ("NULL", result[2]->debug_item(1));
262262
}
263263

264+
TEST_F(JsonFlattenerTest, testLargeJson) {
265+
std::vector<std::string> json;
266+
for (int k = 0; k < 10; k++) {
267+
json.emplace_back(R"({"k1": 1)");
268+
for (int i = 2; i <= 2000; i++) {
269+
json[k] += fmt::format(", \"k{0}\": {1}", i, i);
270+
}
271+
json[k] += "}";
272+
}
273+
274+
std::vector<std::string> paths = {"k1", "k2", "k1000"};
275+
std::vector<LogicalType> types = {TYPE_BIGINT, TYPE_BIGINT, TYPE_BIGINT};
276+
auto result = test_json(json, paths, types, false);
277+
EXPECT_EQ(3, result.size());
278+
EXPECT_EQ("1", result[0]->debug_item(0));
279+
EXPECT_EQ("2", result[1]->debug_item(0));
280+
EXPECT_EQ("1000", result[2]->debug_item(0));
281+
}
282+
264283
TEST_F(JsonFlattenerTest, testMiddleJson) {
265284
std::vector<std::string> json = {R"( {"k1": {"c1": {"d1": 123 }}, "k2": {"j1": "def", "j2": {"g1": [1,2,3]}}} )",
266285
R"( {"k1": {"c1": {"d1": "abc"}}, "k2": {"j1": "abc", "j2": {"g1": 123}}} )"};

0 commit comments

Comments
 (0)