Skip to content

Commit 024f2ab

Browse files
kevincaimergify[bot]
authored andcommitted
[Enhancement] defensive strategy on on large mem allocation (#57041)
Signed-off-by: Kevin Cai <[email protected]> (cherry picked from commit b9075b7)
1 parent fe8cdbb commit 024f2ab

File tree

6 files changed

+148
-1
lines changed

6 files changed

+148
-1
lines changed

be/src/common/config.h

+4
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,10 @@ CONF_String(mem_limit, "90%");
8383
// Enable the jemalloc tracker, which is responsible for reserving memory
8484
CONF_Bool(enable_jemalloc_memory_tracker, "true");
8585

86+
// Whether abort the process if a large memory allocation is detected which the requested
87+
// size is larger than the available physical memory without wrapping with TRY_CATCH_BAD_ALLOC
88+
CONF_mBool(abort_on_large_memory_allocation, "false");
89+
8690
// The port heartbeat service used.
8791
CONF_Int32(heartbeat_service_port, "9050");
8892
// The count of heart beat service.

be/src/common/daemon.cpp

+7
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
#include "runtime/time_types.h"
5353
#include "runtime/user_function_cache.h"
5454
#include "service/backend_options.h"
55+
#include "service/mem_hook.h"
5556
#include "storage/options.h"
5657
#include "storage/storage_engine.h"
5758
#include "util/cpu_info.h"
@@ -345,6 +346,12 @@ void Daemon::init(bool as_cn, const std::vector<StorePath>& paths) {
345346

346347
init_signals();
347348
init_minidump();
349+
350+
// Don't bother set the limit if the process is running with very limited memory capacity
351+
if (MemInfo::physical_mem() > 1024 * 1024 * 1024) {
352+
// set mem hook to reject the memory allocation if large than available physical memory detected.
353+
set_large_memory_alloc_failure_threshold(MemInfo::physical_mem());
354+
}
348355
}
349356

350357
void Daemon::stop() {

be/src/service/mem_hook.cpp

+35-1
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,12 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15+
#include "mem_hook.h"
16+
1517
#include <iostream>
1618

1719
#include "common/compiler_util.h"
20+
#include "common/config.h"
1821
#include "glog/logging.h"
1922
#include "jemalloc/jemalloc.h"
2023
#include "runtime/current_thread.h"
@@ -92,6 +95,17 @@ std::atomic<int64_t> g_mem_usage(0);
9295
#define IS_BAD_ALLOC_CATCHED() false
9396
#endif
9497

98+
static int64_t g_large_memory_alloc_failure_threshold = 0;
99+
100+
namespace starrocks {
101+
// thread-safety is not a concern here since this is a really rare op
102+
int64_t set_large_memory_alloc_failure_threshold(int64_t val) {
103+
int64_t old_val = g_large_memory_alloc_failure_threshold;
104+
g_large_memory_alloc_failure_threshold = val;
105+
return old_val;
106+
}
107+
} // namespace starrocks
108+
95109
const size_t large_memory_alloc_report_threshold = 1073741824;
96110
inline thread_local bool skip_report = false;
97111
inline void report_large_memory_alloc(size_t size) {
@@ -101,7 +115,8 @@ inline void report_large_memory_alloc(size_t size) {
101115
auto qid = starrocks::CurrentThread::current().query_id();
102116
auto fid = starrocks::CurrentThread::current().fragment_instance_id();
103117
LOG(WARNING) << "large memory alloc, query_id:" << print_id(qid) << " instance: " << print_id(fid)
104-
<< " acquire:" << size << " bytes, stack:\n"
118+
<< " acquire:" << size << " bytes, is_bad_alloc_caught: " << IS_BAD_ALLOC_CATCHED()
119+
<< ", stack:\n"
105120
<< starrocks::get_stack_trace();
106121
} catch (...) {
107122
// do nothing
@@ -111,6 +126,18 @@ inline void report_large_memory_alloc(size_t size) {
111126
}
112127
#define STARROCKS_REPORT_LARGE_MEM_ALLOC(size) report_large_memory_alloc(size)
113128

129+
inline bool block_large_memory_alloc(size_t size) {
130+
if (UNLIKELY(g_large_memory_alloc_failure_threshold > 0 && size > g_large_memory_alloc_failure_threshold)) {
131+
// DON'T try to allocate the memory at all
132+
if (starrocks::config::abort_on_large_memory_allocation) {
133+
std::abort();
134+
} else {
135+
return true;
136+
}
137+
}
138+
return false;
139+
}
140+
114141
DEFINE_SCOPED_FAIL_POINT(mem_alloc_error);
115142

116143
#ifdef FIU_ENABLE
@@ -142,6 +169,10 @@ void* my_malloc(size_t size) __THROW {
142169
}
143170
return ptr;
144171
} else {
172+
if (UNLIKELY(block_large_memory_alloc(size))) {
173+
return nullptr;
174+
}
175+
145176
void* ptr = STARROCKS_MALLOC(size);
146177
// NOTE: do NOT call `tc_malloc_size` here, it may call the new operator, which in turn will
147178
// call the `my_malloc`, and result in a deadloop.
@@ -228,6 +259,9 @@ void* my_calloc(size_t n, size_t size) __THROW {
228259
}
229260
return ptr;
230261
} else {
262+
if (UNLIKELY(block_large_memory_alloc(n * size))) {
263+
return nullptr;
264+
}
231265
void* ptr = STARROCKS_CALLOC(n, size);
232266
int64_t alloc_size = STARROCKS_MALLOC_SIZE(ptr);
233267
MEMORY_CONSUME_SIZE(alloc_size);

be/src/service/mem_hook.h

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
// Copyright 2021-present StarRocks, Inc. All rights reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// https://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#pragma once
16+
17+
#include <cstdint>
18+
19+
namespace starrocks {
20+
21+
int64_t set_large_memory_alloc_failure_threshold(int64_t);
22+
23+
} // namespace starrocks

be/test/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -486,6 +486,7 @@ set(EXEC_FILES
486486
./gutil/cpu_test.cc
487487
./gutil/sysinfo-test.cc
488488
./service/lake_service_test.cpp
489+
./service/mem_hook_test.cpp
489490
./service/service_be/internal_service_test.cpp
490491
./storage/metadata_util_test.cpp
491492
./storage/delta_writer_test.cpp

be/test/service/mem_hook_test.cpp

+78
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
// Copyright 2021-present StarRocks, Inc. All rights reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// https://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
// FIXME: The case doesn't work on ASAN case since the malloc will be hooked
16+
// when doing the ASAN init. Can't make it work so just disable it for now.
17+
#ifndef ADDRESS_SANITIZER
18+
19+
#include "service/mem_hook.h"
20+
21+
#include <gtest/gtest.h>
22+
23+
#include <vector>
24+
25+
namespace starrocks {
26+
27+
static void try_malloc_memory(size_t size) {
28+
std::vector<int8_t> arr;
29+
arr.reserve(size);
30+
EXPECT_EQ(size, arr.capacity());
31+
}
32+
33+
static void try_calloc_memory(size_t n, size_t size) {
34+
void* ptr = calloc(n, size);
35+
if (ptr == nullptr) {
36+
throw std::bad_alloc();
37+
}
38+
free(ptr);
39+
}
40+
41+
TEST(MemhookTest, test_malloc_mem_hook_block_without_try_catch) {
42+
set_large_memory_alloc_failure_threshold(0);
43+
int64_t blocking_size = 1024 * 1024;
44+
45+
set_large_memory_alloc_failure_threshold(blocking_size);
46+
// successful because blocking_size <= g_large_memory_alloc_failure_threshold
47+
EXPECT_NO_THROW(try_malloc_memory(blocking_size));
48+
49+
set_large_memory_alloc_failure_threshold(blocking_size - 1);
50+
// fail with std::bad_alloc for the same size
51+
EXPECT_THROW(try_malloc_memory(blocking_size), std::bad_alloc);
52+
53+
// reset to no limit
54+
set_large_memory_alloc_failure_threshold(0);
55+
}
56+
57+
TEST(MemhookTest, test_malloc_mem_hook_block_with_try_catch) {
58+
// IS_BAD_ALLOC_CATCHED is always `false` when builds with -DBE_TEST.
59+
// There is no easy way to simulate the mem_tracker limit check here.
60+
// Leave the test body empty as intended for now.
61+
}
62+
63+
TEST(MemhookTest, test_calloc_mem_hook_block_without_try_catch) {
64+
int64_t blocking_size = 1024 * 1024;
65+
66+
set_large_memory_alloc_failure_threshold(blocking_size);
67+
// successful because blocking_size <= g_large_memory_alloc_failure_threshold
68+
EXPECT_NO_THROW(try_calloc_memory(blocking_size / sizeof(int), sizeof(int)));
69+
70+
set_large_memory_alloc_failure_threshold(blocking_size - 1);
71+
// fail with std::bad_alloc for the same size
72+
EXPECT_THROW(try_calloc_memory(blocking_size / sizeof(int), sizeof(int)), std::bad_alloc);
73+
74+
// reset to no limit
75+
set_large_memory_alloc_failure_threshold(0);
76+
}
77+
} // namespace starrocks
78+
#endif

0 commit comments

Comments
 (0)