From 8ed00d8a638c0ed8c569b8f41af2eb84bb1fa6c8 Mon Sep 17 00:00:00 2001 From: Murphy Date: Fri, 21 Mar 2025 10:01:40 +0800 Subject: [PATCH] limit the state size to avoid excessive memory consumption Signed-off-by: Murphy --- be/src/util/json_flattener.cpp | 11 +++++++++++ be/test/util/json_flattener_test.cpp | 19 +++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/be/src/util/json_flattener.cpp b/be/src/util/json_flattener.cpp index 4831c21bc1453..05f9d3a951404 100644 --- a/be/src/util/json_flattener.cpp +++ b/be/src/util/json_flattener.cpp @@ -473,7 +473,18 @@ void JsonPathDeriver::_derived(const Column* col, size_t mark_row) { } } +// TODO: incrementally find the top-k, rather than visit all nodes first +// recursively visit all JSON items to remember common paths void JsonPathDeriver::_visit_json_paths(const vpack::Slice& value, JsonFlatPath* root, size_t mark_row) { + const size_t COMMON_PATH_STATE_FACTOR = 10; + size_t state_limit = config::json_flat_column_max > 0 ? COMMON_PATH_STATE_FACTOR * config::json_flat_column_max + : std::numeric_limits::max(); + + // Optimize memory usage by limiting the number of paths to prevent excessive memory consumption + if (_derived_maps.size() >= state_limit) { + return; + } + vpack::ObjectIterator it(value, false); for (; it.valid(); it.next()) { diff --git a/be/test/util/json_flattener_test.cpp b/be/test/util/json_flattener_test.cpp index 8359a86395883..5c511c51ba495 100644 --- a/be/test/util/json_flattener_test.cpp +++ b/be/test/util/json_flattener_test.cpp @@ -261,6 +261,25 @@ TEST_F(JsonFlattenerTest, testDeepJson3) { EXPECT_EQ("NULL", result[2]->debug_item(1)); } +TEST_F(JsonFlattenerTest, testLargeJson) { + std::vector json; + for (int k = 0; k < 10; k++) { + json.emplace_back(R"({"k1": 1)"); + for (int i = 2; i <= 2000; i++) { + json[k] += fmt::format(", \"k{0}\": {1}", i, i); + } + json[k] += "}"; + } + + std::vector paths = {"k1", "k2", "k1000"}; + std::vector types = {TYPE_BIGINT, TYPE_BIGINT, TYPE_BIGINT}; + auto result = test_json(json, paths, types, false); + EXPECT_EQ(3, result.size()); + EXPECT_EQ("1", result[0]->debug_item(0)); + EXPECT_EQ("2", result[1]->debug_item(0)); + EXPECT_EQ("1000", result[2]->debug_item(0)); +} + TEST_F(JsonFlattenerTest, testMiddleJson) { std::vector json = {R"( {"k1": {"c1": {"d1": 123 }}, "k2": {"j1": "def", "j2": {"g1": [1,2,3]}}} )", R"( {"k1": {"c1": {"d1": "abc"}}, "k2": {"j1": "abc", "j2": {"g1": 123}}} )"};