Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
fd5f798
Fix GetLineNumberTable JVMTI memory leak
jbachorik Jan 9, 2026
f008c7d
Document profiler memory requirements and AGCT constraints
jbachorik Jan 9, 2026
7dfb743
Fix GetLineNumberTableLeakTest to detect JVMTI leaks
jbachorik Jan 9, 2026
6281c46
Fix NumberFormatException handling in NativeMemoryTracking
jbachorik Jan 10, 2026
c31e8b5
Update ddprof-test/src/test/java/com/datadoghq/profiler/memleak/GetLi…
jbachorik Jan 10, 2026
693d3bc
Update ddprof-test/src/test/java/com/datadoghq/profiler/memleak/GetLi…
jbachorik Jan 10, 2026
4f2c30d
Update ddprof-test/src/test/java/com/datadoghq/profiler/memleak/GetLi…
jbachorik Jan 10, 2026
8506bac
Update ddprof-test/src/test/java/com/datadoghq/profiler/memleak/GetLi…
jbachorik Jan 10, 2026
ae5cc44
Update ddprof-test/src/test/java/com/datadoghq/profiler/memleak/GetLi…
jbachorik Jan 10, 2026
0824cb4
Update ddprof-test/src/test/java/com/datadoghq/profiler/memleak/GetLi…
jbachorik Jan 12, 2026
46fac98
Fix test comment to reflect actual restart interval (5 vs 200) (#329)
Copilot Jan 12, 2026
6e6f12a
PR cleanup
jbachorik Jan 12, 2026
cd0c279
Log a JVMTI error during attempt to deallocate linenumber table
jbachorik Jan 12, 2026
2ab1d26
Implement age-based method_map cleanup to prevent unbounded growth
jbachorik Jan 12, 2026
5b881a7
Update memory docs with method_map cleanup implementation
jbachorik Jan 12, 2026
ebf7f17
Remove stop/restart test, enable NMT by default
jbachorik Jan 12, 2026
89acf7e
Remove bogus plateau detection logic from test
jbachorik Jan 12, 2026
fc0e2af
Allow class unloading for better memory behavior
jbachorik Jan 12, 2026
6e7729f
Update test javadoc to reflect actual behavior
jbachorik Jan 12, 2026
bee9080
Add comparison test to validate cleanup effectiveness
jbachorik Jan 12, 2026
caf6ed1
Lower comparison test threshold to 10%
jbachorik Jan 12, 2026
5ea19ad
Track JVMTI deallocation and validate cleanup with RSS
jbachorik Jan 13, 2026
f48420f
Reduce test iterations for faster execution
jbachorik Jan 13, 2026
3ce7672
Handle unreliable RSS measurements on Zing JDK
jbachorik Jan 13, 2026
257d982
Use Counter for line number table tracking
jbachorik Jan 13, 2026
032d9a1
Skip tests on assumption failures instead of retrying
jbachorik Jan 13, 2026
05d870c
Remove debug logging for method cleanup setting
jbachorik Jan 13, 2026
33f6c5c
Detect RSS unreliability when NMT and RSS diverge
jbachorik Jan 13, 2026
4bbfcfe
Add diagnostic output explaining RSS unreliability
jbachorik Jan 13, 2026
00da241
Remove memory-based assertions from cleanup test
jbachorik Jan 13, 2026
7ed1e7e
Remove NMT/RSS tracking from cleanup test
jbachorik Jan 13, 2026
d2c7dee
Address Copilot review comments
jbachorik Jan 13, 2026
937745c
Update memory docs and restore build-and-summarize
jbachorik Jan 13, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 0 additions & 22 deletions .claude/settings.local.json

This file was deleted.

42 changes: 31 additions & 11 deletions ddprof-lib/src/main/cpp/arguments.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -319,17 +319,37 @@ Error Arguments::parse(const char *args) {
_lightweight = false;
}
}
CASE("wallsampler")
if (value != NULL) {
switch (value[0]) {
case 'j':
_wallclock_sampler = JVMTI;
break;
case 'a':
default:
_wallclock_sampler = ASGCT;
}
}

CASE("mcleanup")
if (value != NULL) {
switch (value[0]) {
case 'n': // no
case 'f': // false
case '0': // 0
_enable_method_cleanup = false;
break;
case 'y': // yes
case 't': // true
case '1': // 1
default:
_enable_method_cleanup = true;
}
} else {
// No value means enable
_enable_method_cleanup = true;
}

CASE("wallsampler")
if (value != NULL) {
switch (value[0]) {
case 'j':
_wallclock_sampler = JVMTI;
break;
case 'a':
default:
_wallclock_sampler = ASGCT;
}
}

DEFAULT()
if (_unknown_arg == NULL)
Expand Down
4 changes: 3 additions & 1 deletion ddprof-lib/src/main/cpp/arguments.h
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,7 @@ class Arguments {
int _jfr_options;
std::vector<std::string> _context_attributes;
bool _lightweight;
bool _enable_method_cleanup;

Arguments(bool persistent = false)
: _buf(NULL),
Expand Down Expand Up @@ -219,7 +220,8 @@ class Arguments {
_jfr_options(0),
_context_attributes({}),
_wallclock_sampler(ASGCT),
_lightweight(false) {}
_lightweight(false),
_enable_method_cleanup(true) {}

~Arguments();

Expand Down
3 changes: 2 additions & 1 deletion ddprof-lib/src/main/cpp/counters.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@
X(SKIPPED_WALLCLOCK_UNWINDS, "skipped_wallclock_unwinds") \
X(UNWINDING_TIME_ASYNC, "unwinding_ticks_async") \
X(UNWINDING_TIME_JVMTI, "unwinding_ticks_jvmti") \
X(CALLTRACE_STORAGE_DROPPED, "calltrace_storage_dropped_traces")
X(CALLTRACE_STORAGE_DROPPED, "calltrace_storage_dropped_traces") \
X(LINE_NUMBER_TABLES, "line_number_tables")
#define X_ENUM(a, b) a,
typedef enum CounterId : int {
DD_COUNTER_TABLE(X_ENUM) DD_NUM_COUNTERS
Expand Down
125 changes: 111 additions & 14 deletions ddprof-lib/src/main/cpp/flightRecorder.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
* Copyright The async-profiler authors
* Copyright 2025, Datadog, Inc.
* Copyright 2025, 2026 Datadog, Inc.
* SPDX-License-Identifier: Apache-2.0
*/

Expand Down Expand Up @@ -43,10 +43,26 @@
static const char *const SETTING_RING[] = {NULL, "kernel", "user", "any"};
static const char *const SETTING_CSTACK[] = {NULL, "no", "fp", "dwarf", "lbr"};

static void deallocateLineNumberTable(void *ptr) {}

SharedLineNumberTable::~SharedLineNumberTable() {
VM::jvmti()->Deallocate((unsigned char *)_ptr);
// Always attempt to deallocate if we have a valid pointer
// JVMTI spec requires that memory allocated by GetLineNumberTable
// must be freed with Deallocate
if (_ptr != nullptr) {
jvmtiEnv *jvmti = VM::jvmti();
if (jvmti != nullptr) {
jvmtiError err = jvmti->Deallocate((unsigned char *)_ptr);
// If Deallocate fails, log it for debugging (this could indicate a JVM bug)
// JVMTI_ERROR_ILLEGAL_ARGUMENT means the memory wasn't allocated by JVMTI
// which would be a serious bug in GetLineNumberTable
if (err != JVMTI_ERROR_NONE) {
TEST_LOG("Unexpected error while deallocating linenumber table: %d", err);
}
} else {
TEST_LOG("WARNING: Cannot deallocate line number table - JVMTI is null");
}
// Decrement counter whenever destructor runs (symmetric with increment at creation)
Counters::decrement(LINE_NUMBER_TABLES);
}
}

void Lookup::fillNativeMethodInfo(MethodInfo *mi, const char *name,
Expand Down Expand Up @@ -151,24 +167,44 @@ void Lookup::fillJavaMethodInfo(MethodInfo *mi, jmethodID method,
jvmti->GetMethodName(method, &method_name, &method_sig, NULL) == 0) {

if (first_time) {
jvmti->GetLineNumberTable(method, &line_number_table_size,
jvmtiError line_table_error = jvmti->GetLineNumberTable(method, &line_number_table_size,
&line_number_table);
// Defensive: if GetLineNumberTable failed, clean up any potentially allocated memory
// Some buggy JVMTI implementations might allocate despite returning an error
if (line_table_error != JVMTI_ERROR_NONE) {
if (line_number_table != nullptr) {
// Try to deallocate to prevent leak from buggy JVM
jvmti->Deallocate((unsigned char *)line_number_table);
}
line_number_table = nullptr;
line_number_table_size = 0;
}
}

// Check if the frame is Thread.run or inherits from it
if (strncmp(method_name, "run", 4) == 0 &&
strncmp(method_sig, "()V", 3) == 0) {
jclass Thread_class = jni->FindClass("java/lang/Thread");
jmethodID equals = jni->GetMethodID(jni->FindClass("java/lang/Class"),
"equals", "(Ljava/lang/Object;)Z");
jclass klass = method_class;
do {
entry = jni->CallBooleanMethod(Thread_class, equals, klass);
jniExceptionCheck(jni);
if (entry) {
break;
jclass Class_class = jni->FindClass("java/lang/Class");
if (Thread_class != nullptr && Class_class != nullptr) {
jmethodID equals = jni->GetMethodID(Class_class,
"equals", "(Ljava/lang/Object;)Z");
if (equals != nullptr) {
jclass klass = method_class;
do {
entry = jni->CallBooleanMethod(Thread_class, equals, klass);
if (jniExceptionCheck(jni)) {
entry = false;
break;
}
if (entry) {
break;
}
} while ((klass = jni->GetSuperclass(klass)) != NULL);
}
} while ((klass = jni->GetSuperclass(klass)) != NULL);
}
// Clear any exceptions from the reflection calls above
jniExceptionCheck(jni);
} else if (strncmp(method_name, "main", 5) == 0 &&
strncmp(method_sig, "(Ljava/lang/String;)V", 21)) {
// public static void main(String[] args) - 'public static' translates
Expand Down Expand Up @@ -250,6 +286,8 @@ void Lookup::fillJavaMethodInfo(MethodInfo *mi, jmethodID method,
if (line_number_table != nullptr) {
mi->_line_number_table = std::make_shared<SharedLineNumberTable>(
line_number_table_size, line_number_table);
// Increment counter for tracking live line number tables
Counters::increment(LINE_NUMBER_TABLES);
}

// strings are null or came from JVMTI
Expand Down Expand Up @@ -484,6 +522,12 @@ off_t Recording::finishChunk(bool end_recording) {

void Recording::switchChunk(int fd) {
_chunk_start = finishChunk(fd > -1);

// Cleanup unreferenced methods after finishing the chunk
cleanupUnreferencedMethods();

TEST_LOG("MethodMap: %zu methods after cleanup", _method_map.size());

_start_time = _stop_time;
_start_ticks = _stop_ticks;
_bytes_written = 0;
Expand Down Expand Up @@ -558,6 +602,52 @@ void Recording::cpuMonitorCycle() {
_last_times = times;
}

void Recording::cleanupUnreferencedMethods() {
if (!_args._enable_method_cleanup) {
return; // Feature disabled
}

const int AGE_THRESHOLD = 3; // Remove after 3 consecutive chunks without reference
size_t removed_count = 0;
size_t removed_with_line_tables = 0;
size_t total_before = _method_map.size();

for (MethodMap::iterator it = _method_map.begin(); it != _method_map.end(); ) {
MethodInfo& mi = it->second;

if (!mi._referenced) {
// Method not referenced in this chunk
mi._age++;

if (mi._age >= AGE_THRESHOLD) {
// Method hasn't been used for N chunks, safe to remove
// SharedLineNumberTable will be automatically deallocated via shared_ptr destructor
bool has_line_table = (mi._line_number_table != nullptr && mi._line_number_table->_ptr != nullptr);
if (has_line_table) {
removed_with_line_tables++;
}
it = _method_map.erase(it);
removed_count++;
continue;
}
} else {
// Method was referenced, reset age
mi._age = 0;
}

++it;
}

if (removed_count > 0) {
TEST_LOG("Cleaned up %zu unreferenced methods (age >= %d chunks, %zu with line tables, total: %zu -> %zu)",
removed_count, AGE_THRESHOLD, removed_with_line_tables, total_before, _method_map.size());

// Log current count of live line number tables
long long live_tables = Counters::getCounter(LINE_NUMBER_TABLES);
TEST_LOG("Live line number tables after cleanup: %lld", live_tables);
}
}

void Recording::appendRecording(const char *target_file, size_t size) {
int append_fd = open(target_file, O_WRONLY);
if (append_fd >= 0) {
Expand Down Expand Up @@ -1113,6 +1203,11 @@ void Recording::writeThreads(Buffer *buf) {
}

void Recording::writeStackTraces(Buffer *buf, Lookup *lookup) {
// Reset all referenced flags before processing
for (MethodMap::iterator it = _method_map.begin(); it != _method_map.end(); ++it) {
it->second._referenced = false;
}

// Use safe trace processing with guaranteed lifetime during callback execution
Profiler::instance()->processCallTraces([this, buf, lookup](const std::unordered_set<CallTrace*>& traces) {
buf->putVar64(T_STACK_TRACE);
Expand All @@ -1124,6 +1219,7 @@ void Recording::writeStackTraces(Buffer *buf, Lookup *lookup) {
if (trace->num_frames > 0) {
MethodInfo *mi =
lookup->resolveMethod(trace->frames[trace->num_frames - 1]);
mi->_referenced = true; // Mark method as referenced
if (mi->_type < FRAME_NATIVE) {
buf->put8(mi->_is_entry ? 0 : 1);
} else {
Expand All @@ -1133,6 +1229,7 @@ void Recording::writeStackTraces(Buffer *buf, Lookup *lookup) {
buf->putVar64(trace->num_frames);
for (int i = 0; i < trace->num_frames; i++) {
MethodInfo *mi = lookup->resolveMethod(trace->frames[i]);
mi->_referenced = true; // Mark method as referenced
buf->putVar64(mi->_key);
jint bci = trace->frames[i].bci;
if (mi->_type < FRAME_NATIVE) {
Expand Down
7 changes: 6 additions & 1 deletion ddprof-lib/src/main/cpp/flightRecorder.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,11 +68,13 @@ class SharedLineNumberTable {
class MethodInfo {
public:
MethodInfo()
: _mark(false), _is_entry(false), _key(0), _class(0),
: _mark(false), _is_entry(false), _referenced(false), _age(0), _key(0), _class(0),
_name(0), _sig(0), _modifiers(0), _line_number_table(nullptr), _type() {}

bool _mark;
bool _is_entry;
bool _referenced; // Tracked during writeStackTraces() for cleanup
int _age; // Consecutive chunks without reference (0 = recently used)
u32 _key;
u32 _class;
u32 _name;
Expand Down Expand Up @@ -258,6 +260,9 @@ class Recording {
float machine_total);

void addThread(int lock_index, int tid);

private:
void cleanupUnreferencedMethods();
};

class Lookup {
Expand Down
11 changes: 11 additions & 0 deletions ddprof-lib/src/main/cpp/vmEntry.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*
* Copyright The async-profiler authors
* Copyright 2026, Datadog, Inc.
* SPDX-License-Identifier: Apache-2.0
*/

Expand Down Expand Up @@ -514,6 +515,16 @@ void VM::loadMethodIDs(jvmtiEnv *jvmti, JNIEnv *jni, jclass klass) {
}
}

// CRITICAL: GetClassMethods must be called to preallocate jmethodIDs for AsyncGetCallTrace.
// AGCT operates in signal handlers where lock acquisition is forbidden, so jmethodIDs must
// exist before profiling encounters them. Without preallocation, AGCT cannot identify methods
// in stack traces, breaking profiling functionality.
//
// JVM-internal allocation: This triggers JVM to allocate jmethodIDs internally, which persist
// until class unload. High class churn causes significant memory growth, but this is inherent
// to AGCT architecture and necessary for signal-safe profiling.
//
// See: https://mostlynerdless.de/blog/2023/07/17/jmethodids-in-profiling-a-tale-of-nightmares/
jint method_count;
jmethodID *methods;
if (jvmti->GetClassMethods(klass, &method_count, &methods) == 0) {
Expand Down
20 changes: 17 additions & 3 deletions ddprof-test/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def addCommonTestDependencies(Configuration configuration) {
configuration.dependencies.add(project.dependencies.create('org.lz4:lz4-java:1.8.0'))
configuration.dependencies.add(project.dependencies.create('org.xerial.snappy:snappy-java:1.1.10.1'))
configuration.dependencies.add(project.dependencies.create('com.github.luben:zstd-jni:1.5.5-4'))
configuration.dependencies.add(project.dependencies.create('org.ow2.asm:asm:9.6'))
configuration.dependencies.add(project.dependencies.project(path: ":ddprof-test-tracer"))
}

Expand Down Expand Up @@ -277,9 +278,22 @@ tasks.withType(Test).configureEach {
def keepRecordings = project.hasProperty("keepJFRs") || Boolean.parseBoolean(System.getenv("KEEP_JFRS"))
environment("CI", project.hasProperty("CI") || Boolean.parseBoolean(System.getenv("CI")))

jvmArgs "-Dddprof_test.keep_jfrs=${keepRecordings}", '-Djdk.attach.allowAttachSelf', '-Djol.tryWithSudo=true',
"-Dddprof_test.config=${config}", "-Dddprof_test.ci=${project.hasProperty('CI')}", "-Dddprof.disable_unsafe=true", '-XX:ErrorFile=build/hs_err_pid%p.log', '-XX:+ResizeTLAB',
'-Xmx512m', '-XX:OnError=/tmp/do_stuff.sh', "-Djava.library.path=${outputLibDir.absolutePath}"
// Base JVM arguments
def jvmArgsList = [
"-Dddprof_test.keep_jfrs=${keepRecordings}",
'-Djdk.attach.allowAttachSelf',
'-Djol.tryWithSudo=true',
"-Dddprof_test.config=${config}",
"-Dddprof_test.ci=${project.hasProperty('CI')}",
"-Dddprof.disable_unsafe=true",
'-XX:ErrorFile=build/hs_err_pid%p.log',
'-XX:+ResizeTLAB',
'-Xmx512m',
'-XX:OnError=/tmp/do_stuff.sh',
"-Djava.library.path=${outputLibDir.absolutePath}"
]

jvmArgs jvmArgsList

def javaHome = System.getenv("JAVA_TEST_HOME")
if (javaHome == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,11 @@ public Object resolveParameter(ParameterContext parameterContext, ExtensionConte
);
extensions.add(
(TestExecutionExceptionHandler) (extensionContext, throwable) -> {
// Don't retry on assumption failures - they should skip the test
if (throwable instanceof TestAbortedException) {
throw throwable;
}

int attempt = 0;
while (++attempt < retryCount) {
System.out.println("[Retrying] Attempt " + attempt + "/" + retryCount + " due to failure: " + throwable.getMessage());
Expand Down
Loading
Loading