diff --git a/.claude/commands/build-and-summarize b/.claude/commands/build-and-summarize index fa59823fd..ee58b799c 100755 --- a/.claude/commands/build-and-summarize +++ b/.claude/commands/build-and-summarize @@ -72,4 +72,4 @@ echo echo "Delegating to gradle-logs-analyst agent…" # If your CLI supports non-streaming, set it here to avoid verbose output. # Example (uncomment if supported): export CLAUDE_NO_STREAM=1 -claude "Act as the gradle-logs-analyst agent to parse the build log at: $LOG. Generate the required Gradle summary artifacts as specified in the gradle-logs-analyst agent definition." \ No newline at end of file +claude "Act as the gradle-logs-analyst agent to parse the build log at: $LOG. Generate the required Gradle summary artifacts as specified in the gradle-logs-analyst agent definition." diff --git a/.claude/commands/build-and-summarize.md b/.claude/commands/build-and-summarize.md index f05f21157..dc615d992 100644 --- a/.claude/commands/build-and-summarize.md +++ b/.claude/commands/build-and-summarize.md @@ -1,7 +1,11 @@ # build-and-summarize -Runs `./gradlew` with full output captured to a timestamped log, shows minimal live progress (task starts + final build/test summary), then asks the `gradle-logs-analyst` agent to produce structured artifacts from the log. +Execute the bash script `~/.claude/commands/build-and-summarize` with all provided arguments. -## Usage -```bash -./.claude/commands/build-and-summarize [...] \ No newline at end of file +This script will: +- Run `./gradlew` with the specified arguments (defaults to 'build' if none provided) +- Capture full output to a timestamped log in `build/logs/` +- Show minimal live progress in the console +- Delegate to the `gradle-logs-analyst` agent for structured analysis + +Pass through all arguments exactly as provided by the user. diff --git a/README.md b/README.md index 7455d5a3a..e3a074318 100644 --- a/README.md +++ b/README.md @@ -402,6 +402,25 @@ Improved thread-local storage initialization to prevent race conditions: These architectural improvements focus on eliminating race conditions, improving performance in high-throughput scenarios, and providing better debugging capabilities for the native profiling engine. +### Remote Symbolication Support (2025) + +Added support for remote symbolication to enable offloading symbol resolution from the agent to backend services: + +- **Build-ID extraction**: Automatically extracts GNU build-id from ELF binaries on Linux +- **Raw addressing information**: Stores build-id and PC offset instead of resolved symbol names +- **Remote symbolication mode**: Enable with `remotesym=true` profiler argument +- **JFR integration**: Remote frames serialized with build-id and offset for backend resolution +- **Zero encoding overhead**: Uses dedicated frame type (FRAME_NATIVE_REMOTE) for efficient serialization + +**Benefits**: +- Reduces agent overhead by eliminating local symbol resolution +- Enables centralized symbol resolution with better caching +- Supports scenarios where debug symbols are not available locally + +**Key files**: `elfBuildId.h`, `elfBuildId.cpp`, `profiler.cpp`, `flightRecorder.cpp` + +For detailed documentation, see [doc/RemoteSymbolication.md](doc/RemoteSymbolication.md). + ## Contributing 1. Fork the repository 2. Create a feature branch diff --git a/ddprof-lib/src/main/cpp/arguments.cpp b/ddprof-lib/src/main/cpp/arguments.cpp index 6dd01d032..72b8aec22 100644 --- a/ddprof-lib/src/main/cpp/arguments.cpp +++ b/ddprof-lib/src/main/cpp/arguments.cpp @@ -1,5 +1,6 @@ /* * Copyright 2017 Andrei Pangin + * Copyright 2026, Datadog, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -88,7 +89,10 @@ static const Multiplier UNIVERSAL[] = { // samples // generations - track surviving generations // lightweight[=BOOL] - enable lightweight profiling - events without -// stacktraces (default: true) jfr - dump events in Java +// stacktraces (default: true) +// remotesym[=BOOL] - enable remote symbolication for native frames +// (stores build-id and PC offset instead of symbol names) +// jfr - dump events in Java // Flight Recorder format interval=N - sampling interval in ns // (default: 10'000'000, i.e. 10 ms) jstackdepth=N - maximum Java stack // depth (default: 2048) safemode=BITS - disable stack recovery @@ -339,16 +343,35 @@ Error Arguments::parse(const char *args) { _enable_method_cleanup = true; } - CASE("wallsampler") + CASE("remotesym") if (value != NULL) { switch (value[0]) { - case 'j': - _wallclock_sampler = JVMTI; + case 'n': // no + case 'f': // false + case '0': // 0 + _remote_symbolication = false; break; - case 'a': + case 'y': // yes + case 't': // true + case '1': // 1 default: - _wallclock_sampler = ASGCT; + _remote_symbolication = true; } + } else { + // No value means enable + _remote_symbolication = true; + } + + CASE("wallsampler") + if (value != NULL) { + switch (value[0]) { + case 'j': + _wallclock_sampler = JVMTI; + break; + case 'a': + default: + _wallclock_sampler = ASGCT; + } } DEFAULT() diff --git a/ddprof-lib/src/main/cpp/arguments.h b/ddprof-lib/src/main/cpp/arguments.h index babfd4336..3f2542705 100644 --- a/ddprof-lib/src/main/cpp/arguments.h +++ b/ddprof-lib/src/main/cpp/arguments.h @@ -188,6 +188,7 @@ class Arguments { std::vector _context_attributes; bool _lightweight; bool _enable_method_cleanup; + bool _remote_symbolication; // Enable remote symbolication for native frames Arguments(bool persistent = false) : _buf(NULL), @@ -221,7 +222,8 @@ class Arguments { _context_attributes({}), _wallclock_sampler(ASGCT), _lightweight(false), - _enable_method_cleanup(true) {} + _enable_method_cleanup(true), + _remote_symbolication(false) {} ~Arguments(); diff --git a/ddprof-lib/src/main/cpp/codeCache.cpp b/ddprof-lib/src/main/cpp/codeCache.cpp index 1479ceb6f..8a3217a94 100644 --- a/ddprof-lib/src/main/cpp/codeCache.cpp +++ b/ddprof-lib/src/main/cpp/codeCache.cpp @@ -37,6 +37,11 @@ CodeCache::CodeCache(const char *name, short lib_index, _plt_size = 0; _debug_symbols = false; + // Initialize build-id fields + _build_id = nullptr; + _build_id_len = 0; + _load_bias = 0; + memset(_imports, 0, sizeof(_imports)); _imports_patchable = imports_patchable; @@ -54,10 +59,27 @@ CodeCache::CodeCache(const CodeCache &other) { _min_address = other._min_address; _max_address = other._max_address; _text_base = other._text_base; + _image_base = other._image_base; - _imports_patchable = other._imports_patchable; _plt_offset = other._plt_offset; _plt_size = other._plt_size; + _debug_symbols = other._debug_symbols; + + // Copy build-id information + _build_id_len = other._build_id_len; + if (other._build_id != nullptr && other._build_id_len > 0) { + size_t hex_str_len = strlen(other._build_id); + _build_id = static_cast(malloc(hex_str_len + 1)); + if (_build_id != nullptr) { + strcpy(_build_id, other._build_id); + } + } else { + _build_id = nullptr; + } + _load_bias = other._load_bias; + + memset(_imports, 0, sizeof(_imports)); + _imports_patchable = other._imports_patchable; _dwarf_table_length = other._dwarf_table_length; _dwarf_table = new FrameDesc[_dwarf_table_length]; @@ -77,17 +99,34 @@ CodeCache &CodeCache::operator=(const CodeCache &other) { delete _name; delete _dwarf_table; delete _blobs; + free(_build_id); // Free existing build-id _name = NativeFunc::create(other._name, -1); _lib_index = other._lib_index; _min_address = other._min_address; _max_address = other._max_address; _text_base = other._text_base; - - _imports_patchable = other._imports_patchable; + _image_base = other._image_base; _plt_offset = other._plt_offset; _plt_size = other._plt_size; + _debug_symbols = other._debug_symbols; + + // Copy build-id information + _build_id_len = other._build_id_len; + if (other._build_id != nullptr && other._build_id_len > 0) { + size_t hex_str_len = strlen(other._build_id); + _build_id = static_cast(malloc(hex_str_len + 1)); + if (_build_id != nullptr) { + strcpy(_build_id, other._build_id); + } + } else { + _build_id = nullptr; + } + _load_bias = other._load_bias; + + memset(_imports, 0, sizeof(_imports)); + _imports_patchable = other._imports_patchable; _dwarf_table_length = other._dwarf_table_length; _dwarf_table = new FrameDesc[_dwarf_table_length]; @@ -110,6 +149,7 @@ CodeCache::~CodeCache() { NativeFunc::destroy(_name); delete[] _blobs; delete _dwarf_table; + free(_build_id); // Free build-id memory } void CodeCache::expand() { @@ -387,3 +427,24 @@ FrameDesc CodeCache::findFrameDesc(const void *pc) { return FrameDesc::default_frame; } } + +void CodeCache::setBuildId(const char* build_id, size_t build_id_len) { + // Free existing build-id if any + free(_build_id); + _build_id = nullptr; + _build_id_len = 0; + + if (build_id != nullptr && build_id_len > 0) { + // build_id is a hex string, allocate based on actual string length + size_t hex_str_len = strlen(build_id); + _build_id = static_cast(malloc(hex_str_len + 1)); + + if (_build_id != nullptr) { + // Copy the hex string + strcpy(_build_id, build_id); + // Store the original byte length (not hex string length) + _build_id_len = build_id_len; + } + } +} + diff --git a/ddprof-lib/src/main/cpp/codeCache.h b/ddprof-lib/src/main/cpp/codeCache.h index a1c804b82..6ff627d5a 100644 --- a/ddprof-lib/src/main/cpp/codeCache.h +++ b/ddprof-lib/src/main/cpp/codeCache.h @@ -116,6 +116,11 @@ class CodeCache { unsigned int _plt_offset; unsigned int _plt_size; + // Build-ID and load bias for remote symbolication + char *_build_id; // GNU build-id (hex string, null if not available) + size_t _build_id_len; // Build-id length in bytes (raw, not hex string length) + uintptr_t _load_bias; // Load bias (image_base - file_base address) + void **_imports[NUM_IMPORTS][NUM_IMPORT_TYPES]; bool _imports_patchable; bool _debug_symbols; @@ -169,10 +174,30 @@ class CodeCache { void setDebugSymbols(bool debug_symbols) { _debug_symbols = debug_symbols; } + // Build-ID and remote symbolication support + const char* buildId() const { return _build_id; } + size_t buildIdLen() const { return _build_id_len; } + bool hasBuildId() const { return _build_id != nullptr; } + uintptr_t loadBias() const { return _load_bias; } + short libIndex() const { return _lib_index; } + + // Sets the build-id (hex string) and stores the original byte length + // build_id: null-terminated hex string (e.g., "abc123..." for 40-char string) + // build_id_len: original byte length before hex conversion (e.g., 20 bytes) + void setBuildId(const char* build_id, size_t build_id_len); + void setLoadBias(uintptr_t load_bias) { _load_bias = load_bias; } + void add(const void *start, int length, const char *name, bool update_bounds = false); void updateBounds(const void *start, const void *end); void sort(); + + /** + * Mark symbols matching the predicate with the given mark value. + * + * This is called during profiler initialization to mark JVM internal functions + * (MARK_VM_RUNTIME, MARK_INTERPRETER, MARK_COMPILER_ENTRY, MARK_ASYNC_PROFILER). + */ template inline void mark(NamePredicate predicate, char value) { for (int i = 0; i < _count; i++) { @@ -225,7 +250,7 @@ class CodeCacheArray { memset(_libs, 0, MAX_NATIVE_LIBS * sizeof(CodeCache *)); } - CodeCache *operator[](int index) { return _libs[index]; } + CodeCache *operator[](int index) const { return __atomic_load_n(&_libs[index], __ATOMIC_ACQUIRE); } int count() const { return __atomic_load_n(&_count, __ATOMIC_RELAXED); } @@ -247,7 +272,7 @@ class CodeCacheArray { return lib; } - size_t memoryUsage() { + size_t memoryUsage() const { return __atomic_load_n(&_used_memory, __ATOMIC_RELAXED); } }; diff --git a/ddprof-lib/src/main/cpp/counters.h b/ddprof-lib/src/main/cpp/counters.h index af26a88f3..e3d26b1d2 100644 --- a/ddprof-lib/src/main/cpp/counters.h +++ b/ddprof-lib/src/main/cpp/counters.h @@ -64,7 +64,10 @@ X(UNWINDING_TIME_ASYNC, "unwinding_ticks_async") \ X(UNWINDING_TIME_JVMTI, "unwinding_ticks_jvmti") \ X(CALLTRACE_STORAGE_DROPPED, "calltrace_storage_dropped_traces") \ - X(LINE_NUMBER_TABLES, "line_number_tables") + X(LINE_NUMBER_TABLES, "line_number_tables") \ + X(REMOTE_SYMBOLICATION_FRAMES, "remote_symbolication_frames") \ + X(REMOTE_SYMBOLICATION_LIBS_WITH_BUILD_ID, "remote_symbolication_libs_with_build_id") \ + X(REMOTE_SYMBOLICATION_BUILD_ID_CACHE_HITS, "remote_symbolication_build_id_cache_hits") #define X_ENUM(a, b) a, typedef enum CounterId : int { DD_COUNTER_TABLE(X_ENUM) DD_NUM_COUNTERS diff --git a/ddprof-lib/src/main/cpp/flightRecorder.cpp b/ddprof-lib/src/main/cpp/flightRecorder.cpp index 6de788e3f..c4b26a1b5 100644 --- a/ddprof-lib/src/main/cpp/flightRecorder.cpp +++ b/ddprof-lib/src/main/cpp/flightRecorder.cpp @@ -1,10 +1,11 @@ /* * Copyright The async-profiler authors - * Copyright 2025, 2026 Datadog, Inc. + * Copyright 2026, Datadog, Inc. * SPDX-License-Identifier: Apache-2.0 */ #include +#include #include "buffers.h" #include "callTraceHashTable.h" @@ -114,6 +115,25 @@ void Lookup::fillNativeMethodInfo(MethodInfo *mi, const char *name, } } +void Lookup::fillRemoteFrameInfo(MethodInfo *mi, const RemoteFrameInfo *rfi) { + // Store build-id in the class name field + mi->_class = _classes->lookup(rfi->build_id); + + // Store PC offset in hex format in the signature field + char offset_hex[32]; + snprintf(offset_hex, sizeof(offset_hex), "0x%" PRIxPTR, rfi->pc_offset); + mi->_sig = _symbols.lookup(offset_hex); + + // Use same modifiers as regular native frames (0x100 = ACC_NATIVE for consistency) + mi->_modifiers = 0x100; + // Use FRAME_NATIVE_REMOTE type to indicate remote symbolication + mi->_type = FRAME_NATIVE_REMOTE; + mi->_line_number_table = nullptr; + + // Method name indicates need for remote symbolication + mi->_name = _symbols.lookup(""); +} + void Lookup::cutArguments(char *func) { char *p = strrchr(func, ')'); if (p == NULL) @@ -305,7 +325,9 @@ void Lookup::fillJavaMethodInfo(MethodInfo *mi, jmethodID method, } MethodInfo *Lookup::resolveMethod(ASGCT_CallFrame &frame) { + jint bci = frame.bci; jmethodID method = frame.method_id; + MethodInfo *mi = &(*_method_map)[method]; if (!mi->_mark) { @@ -316,12 +338,35 @@ MethodInfo *Lookup::resolveMethod(ASGCT_CallFrame &frame) { } if (method == NULL) { fillNativeMethodInfo(mi, "unknown", NULL); - } else if (frame.bci == BCI_ERROR) { + } else if (bci == BCI_ERROR) { fillNativeMethodInfo(mi, (const char *)method, NULL); - } else if (frame.bci == BCI_NATIVE_FRAME) { + } else if (bci == BCI_NATIVE_FRAME) { const char *name = (const char *)method; fillNativeMethodInfo(mi, name, Profiler::instance()->getLibraryName(name)); + } else if (bci == BCI_NATIVE_FRAME_REMOTE) { + // Unpack remote symbolication data using utility struct + // Layout: pc_offset (44 bits) | mark (3 bits) | lib_index (17 bits) + uintptr_t pc_offset = Profiler::RemoteFramePacker::unpackPcOffset(method); + char mark = Profiler::RemoteFramePacker::unpackMark(method); + uint32_t lib_index = Profiler::RemoteFramePacker::unpackLibIndex(method); + + TEST_LOG("Unpacking remote frame: packed=0x%lx, pc_offset=0x%lx, mark=%d, lib_index=%u", + (uintptr_t)method, pc_offset, (int)mark, lib_index); + + // Lookup library by index to get build_id + // Note: This is called during JFR serialization with lockAll() held (see Profiler::dump), + // so the library array is stable - no concurrent dlopen_hook calls can modify it. + CodeCache* lib = Libraries::instance()->getLibraryByIndex(lib_index); + if (lib != nullptr) { + TEST_LOG("Found library: %s, build_id=%s", lib->name(), lib->buildId()); + // Create temporary RemoteFrameInfo for fillRemoteFrameInfo + RemoteFrameInfo rfi(lib->buildId(), pc_offset, lib_index); + fillRemoteFrameInfo(mi, &rfi); + } else { + TEST_LOG("WARNING: Library lookup failed for index %u", lib_index); + fillNativeMethodInfo(mi, "unknown_library", NULL); + } } else { fillJavaMethodInfo(mi, method, first_time); } @@ -1036,18 +1081,23 @@ void Recording::writeNativeLibraries(Buffer *buf) { if (_recorded_lib_count < 0) return; - Profiler *profiler = Profiler::instance(); - CodeCacheArray &native_libs = profiler->_native_libs; + Libraries *libraries = Libraries::instance(); + const CodeCacheArray &native_libs = libraries->native_libs(); int native_lib_count = native_libs.count(); for (int i = _recorded_lib_count; i < native_lib_count; i++) { + CodeCache* lib = native_libs[i]; + + // Emit jdk.NativeLibrary event with extended fields (buildId and loadBias) flushIfNeeded(buf, RECORDING_BUFFER_LIMIT - MAX_STRING_LENGTH); int start = buf->skip(5); buf->putVar64(T_NATIVE_LIBRARY); buf->putVar64(_start_ticks); - buf->putUtf8(native_libs[i]->name()); - buf->putVar64((uintptr_t)native_libs[i]->minAddress()); - buf->putVar64((uintptr_t)native_libs[i]->maxAddress()); + buf->putUtf8(lib->name()); + buf->putVar64((uintptr_t)lib->minAddress()); + buf->putVar64((uintptr_t)lib->maxAddress()); + buf->putUtf8(lib->hasBuildId() ? lib->buildId() : ""); + buf->putVar64((uintptr_t)lib->loadBias()); buf->putVar32(start, buf->offset() - start); flushIfNeeded(buf); } diff --git a/ddprof-lib/src/main/cpp/flightRecorder.h b/ddprof-lib/src/main/cpp/flightRecorder.h index 4ee2b09ad..ca670b42d 100644 --- a/ddprof-lib/src/main/cpp/flightRecorder.h +++ b/ddprof-lib/src/main/cpp/flightRecorder.h @@ -276,6 +276,7 @@ class Lookup { private: void fillNativeMethodInfo(MethodInfo *mi, const char *name, const char *lib_name); + void fillRemoteFrameInfo(MethodInfo *mi, const RemoteFrameInfo *rfi); void cutArguments(char *func); void fillJavaMethodInfo(MethodInfo *mi, jmethodID method, bool first_time); bool has_prefix(const char *str, const char *prefix) const { diff --git a/ddprof-lib/src/main/cpp/frame.h b/ddprof-lib/src/main/cpp/frame.h index e53212782..46a60288a 100644 --- a/ddprof-lib/src/main/cpp/frame.h +++ b/ddprof-lib/src/main/cpp/frame.h @@ -9,6 +9,7 @@ enum FrameTypeId { FRAME_CPP = 4, FRAME_KERNEL = 5, FRAME_C1_COMPILED = 6, + FRAME_NATIVE_REMOTE = 7, // Native frame with remote symbolication (build-id + pc-offset) }; class FrameType { diff --git a/ddprof-lib/src/main/cpp/jfrMetadata.cpp b/ddprof-lib/src/main/cpp/jfrMetadata.cpp index cf81ae4d4..6991a8a12 100644 --- a/ddprof-lib/src/main/cpp/jfrMetadata.cpp +++ b/ddprof-lib/src/main/cpp/jfrMetadata.cpp @@ -323,7 +323,9 @@ void JfrMetadata::initialize( << field("startTime", T_LONG, "Start Time", F_TIME_TICKS) << field("name", T_STRING, "Name") << field("baseAddress", T_LONG, "Base Address", F_ADDRESS) - << field("topAddress", T_LONG, "Top Address", F_ADDRESS)) + << field("topAddress", T_LONG, "Top Address", F_ADDRESS) + << field("buildId", T_STRING, "GNU Build ID") + << field("loadBias", T_LONG, "Load Bias", F_ADDRESS)) << (type("profiler.Log", T_LOG, "Log Message") << category("Profiler") diff --git a/ddprof-lib/src/main/cpp/libraries.cpp b/ddprof-lib/src/main/cpp/libraries.cpp index a9b62c509..a39965d95 100644 --- a/ddprof-lib/src/main/cpp/libraries.cpp +++ b/ddprof-lib/src/main/cpp/libraries.cpp @@ -1,8 +1,10 @@ #include "codeCache.h" +#include "common.h" #include "libraries.h" #include "libraryPatcher.h" #include "log.h" #include "symbols.h" +#include "symbols_linux_dd.h" #include "vmEntry.h" #include "vmStructs.h" @@ -38,6 +40,9 @@ void Libraries::updateSymbols(bool kernel_symbols) { LibraryPatcher::patch_libraries(); } +// Platform-specific implementation of updateBuildIds() is in libraries_linux.cpp (Linux) +// or stub implementation for other platforms + const void *Libraries::resolveSymbol(const char *name) { char mangled_name[256]; if (strstr(name, "::") != NULL) { diff --git a/ddprof-lib/src/main/cpp/libraries.h b/ddprof-lib/src/main/cpp/libraries.h index 4c4aa132a..55672b02d 100644 --- a/ddprof-lib/src/main/cpp/libraries.h +++ b/ddprof-lib/src/main/cpp/libraries.h @@ -12,6 +12,7 @@ class Libraries { public: Libraries() : _native_libs(), _runtime_stubs("runtime stubs") {} void updateSymbols(bool kernel_symbols); + void updateBuildIds(); // Extract build-ids for all loaded libraries const void *resolveSymbol(const char *name); // In J9 the 'libjvm' functionality is spread across multiple libraries // This function will return the 'libjvm' on non-J9 VMs and the library with the given name on J9 VMs @@ -19,6 +20,15 @@ class Libraries { CodeCache *findLibraryByName(const char *lib_name); CodeCache *findLibraryByAddress(const void *address); + // Get library by index (used for remote symbolication unpacking) + // Note: Parameter is uint32_t to match lib_index packing (17 bits = max 131K libraries) + CodeCache *getLibraryByIndex(uint32_t index) const { + if (index < _native_libs.count()) { + return _native_libs[index]; + } + return nullptr; + } + static Libraries *instance() { static Libraries instance; return &instance; diff --git a/ddprof-lib/src/main/cpp/libraries_linux.cpp b/ddprof-lib/src/main/cpp/libraries_linux.cpp new file mode 100644 index 000000000..d1617126e --- /dev/null +++ b/ddprof-lib/src/main/cpp/libraries_linux.cpp @@ -0,0 +1,87 @@ +/* + * Copyright 2026, Datadog, Inc + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifdef __linux__ + +#include "common.h" +#include "counters.h" +#include "libraries.h" +#include "symbols_linux_dd.h" + +#include +#include + +// Track which libraries have had build-ID extraction attempted +// Mirrors the _parsed_inodes pattern from symbols_linux.cpp for O(1) skip +static std::unordered_set _build_id_processed; +static std::mutex _build_id_lock; + +void Libraries::updateBuildIds() { + std::lock_guard lock(_build_id_lock); + + int lib_count = _native_libs.count(); + + for (int i = 0; i < lib_count; i++) { + CodeCache* lib = _native_libs.at(i); + if (lib == nullptr) { + continue; + } + + // O(1) check: Skip if already processed + // Mirrors _parsed_inodes pattern from symbols_linux.cpp for optimal performance + if (_build_id_processed.find(lib) != _build_id_processed.end()) { + Counters::increment(REMOTE_SYMBOLICATION_BUILD_ID_CACHE_HITS); + continue; + } + + // Mark as processed immediately to avoid re-processing on errors + _build_id_processed.insert(lib); + + // Skip if already has build-id (e.g., from JFR metadata load) + if (lib->hasBuildId()) { + continue; + } + + const char* lib_name = lib->name(); + if (lib_name == nullptr) { + continue; + } + + // Extract build-id from library file (only happens once per library) + size_t build_id_len; + char* build_id = ddprof::SymbolsLinux::extractBuildId(lib_name, &build_id_len); + + if (build_id != nullptr) { + // Set build-id and calculate load bias + lib->setBuildId(build_id, build_id_len); + + // Calculate load bias: difference between runtime address and file base + // For now, use image_base as the load bias base + if (lib->imageBase() != nullptr) { + lib->setLoadBias((uintptr_t)lib->imageBase()); + } + + free(build_id); // setBuildId makes its own copy + + // Track libraries with build-IDs + Counters::increment(REMOTE_SYMBOLICATION_LIBS_WITH_BUILD_ID); + } else { + TEST_LOG("updateBuildIds: NO build-id found for %s", lib_name); + } + } +} + +#endif // __linux__ diff --git a/ddprof-lib/src/main/cpp/libraries_macos.cpp b/ddprof-lib/src/main/cpp/libraries_macos.cpp new file mode 100644 index 000000000..b1270b2d0 --- /dev/null +++ b/ddprof-lib/src/main/cpp/libraries_macos.cpp @@ -0,0 +1,27 @@ +/* + * Copyright 2026, Datadog, Inc + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __linux__ + +#include "libraries.h" + +// Stub implementation for non-Linux platforms +// Remote symbolication with build-ID extraction is Linux-only +void Libraries::updateBuildIds() { + // No-op on non-Linux platforms +} + +#endif // !__linux__ diff --git a/ddprof-lib/src/main/cpp/profiler.cpp b/ddprof-lib/src/main/cpp/profiler.cpp index 5f83a14d2..bed447434 100644 --- a/ddprof-lib/src/main/cpp/profiler.cpp +++ b/ddprof-lib/src/main/cpp/profiler.cpp @@ -233,17 +233,18 @@ const void *Profiler::resolveSymbol(const char *name) { } size_t len = strlen(name); - int native_lib_count = _native_libs.count(); + const CodeCacheArray& native_libs = _libs->native_libs(); + int native_lib_count = native_libs.count(); if (len > 0 && name[len - 1] == '*') { for (int i = 0; i < native_lib_count; i++) { - const void *address = _native_libs[i]->findSymbolByPrefix(name, len - 1); + const void *address = native_libs[i]->findSymbolByPrefix(name, len - 1); if (address != NULL) { return address; } } } else { for (int i = 0; i < native_lib_count; i++) { - const void *address = _native_libs[i]->findSymbol(name); + const void *address = native_libs[i]->findSymbol(name); if (address != NULL) { return address; } @@ -256,8 +257,9 @@ const void *Profiler::resolveSymbol(const char *name) { // For BCI_NATIVE_FRAME, library index is encoded ahead of the symbol name const char *Profiler::getLibraryName(const char *native_symbol) { short lib_index = NativeFunc::libIndex(native_symbol); - if (lib_index >= 0 && lib_index < _native_libs.count()) { - const char *s = _native_libs[lib_index]->name(); + const CodeCacheArray& native_libs = _libs->native_libs(); + if (lib_index >= 0 && lib_index < native_libs.count()) { + const char *s = native_libs[lib_index]->name(); if (s != NULL) { const char *p = strrchr(s, '/'); return p != NULL ? p + 1 : s; @@ -290,7 +292,7 @@ bool Profiler::isAddressInCode(const void *pc, bool include_stubs) { int Profiler::getNativeTrace(void *ucontext, ASGCT_CallFrame *frames, int event_type, int tid, StackContext *java_ctx, - bool *truncated) { + bool *truncated, int lock_index) { if (_cstack == CSTACK_NO || (event_type == BCI_ALLOC || event_type == BCI_ALLOC_OUTSIDE_TLAB) || (event_type != BCI_CPU && event_type != BCI_WALL && @@ -318,31 +320,146 @@ int Profiler::getNativeTrace(void *ucontext, ASGCT_CallFrame *frames, java_ctx, truncated); } - return convertNativeTrace(native_frames, callchain, frames); + return convertNativeTrace(native_frames, callchain, frames, lock_index); } +/** + * Populates an ASGCT_CallFrame with remote symbolication data. + * + * Packs pc_offset, mark, and lib_index into the jmethodID field for deferred + * symbol resolution during JFR serialization. This approach defers expensive + * symbol lookups to post-processing while still capturing marks needed for + * correct stack walk termination. + * + * @param frame The ASGCT_CallFrame to populate + * @param pc The program counter address + * @param lib The CodeCache library containing build-ID information + * @param mark The mark value (0 for regular frames, non-zero for JVM internals) + */ +void Profiler::populateRemoteFrame(ASGCT_CallFrame* frame, uintptr_t pc, CodeCache* lib, char mark) { + uintptr_t pc_offset = pc - (uintptr_t)lib->imageBase(); + uint32_t lib_index = (uint32_t)lib->libIndex(); + + jmethodID packed = RemoteFramePacker::pack(pc_offset, mark, lib_index); + + TEST_LOG("populateRemoteFrame: lib=%s, build_id=%s, pc=0x%lx, pc_offset=0x%lx, mark=%d, lib_index=%u, packed=0x%lx", + lib->name(), lib->buildId(), pc, pc_offset, (int)mark, lib_index, (uintptr_t)packed); + + frame->bci = BCI_NATIVE_FRAME_REMOTE; + frame->method_id = packed; + + // Track remote symbolication usage + Counters::increment(REMOTE_SYMBOLICATION_FRAMES); +} + +/** + * Resolve native frame for upstream StackWalker (called from signal handler). + * Returns resolution decision without writing to ASGCT_CallFrame. + * + * For remote symbolication with build-ID: + * - Does symbol resolution to check marks (O(log n) binarySearch + O(1) mark check) + * - Packs mark + lib_index + pc_offset into method_id field + * - Returns is_marked=true if mark indicates termination (MARK_INTERPRETER, etc.) + * + * For traditional symbolication: + * - Does full symbol resolution via findNativeMethod() + * - Checks marks after symbol resolution (same O(log n) + O(1) cost) + */ +Profiler::NativeFrameResolution Profiler::resolveNativeFrameForWalkVM(uintptr_t pc, int lock_index) { + if (_remote_symbolication) { + CodeCache* lib = _libs->findLibraryByAddress((void*)pc); + if (lib != nullptr && lib->hasBuildId()) { + // Get symbol name and check mark + const char *method_name = nullptr; + lib->binarySearch((void*)pc, &method_name); + char mark = (method_name != nullptr) ? NativeFunc::mark(method_name) : 0; + + if (mark != 0) { + return {nullptr, BCI_NATIVE_FRAME, true}; // Marked - stop processing + } + + // Pack remote symbolication data using utility struct + uintptr_t pc_offset = pc - (uintptr_t)lib->imageBase(); + uint32_t lib_index = (uint32_t)lib->libIndex(); + jmethodID packed = RemoteFramePacker::pack(pc_offset, mark, lib_index); + + TEST_LOG("resolveNativeFrameForWalkVM: lib=%s, build_id=%s, pc=0x%lx, pc_offset=0x%lx, mark=%d, lib_index=%u, packed=0x%lx", + lib->name(), lib->buildId(), pc, pc_offset, (int)mark, lib_index, (uintptr_t)packed); + + return {packed, BCI_NATIVE_FRAME_REMOTE, false}; + } + } + + // Fallback: Traditional symbol resolution + const char *method_name = findNativeMethod((void*)pc); + if (method_name != nullptr && NativeFunc::isMarked(method_name)) { + return {nullptr, BCI_NATIVE_FRAME, true}; + } + + return {(jmethodID)method_name, BCI_NATIVE_FRAME, false}; +} + +/** + * Converts a native callchain to ASGCT_CallFrame array. + * + * For libraries with build-IDs, uses remote symbolication (deferred resolution). + * For libraries without build-IDs, performs traditional symbol resolution. + * + * In both cases, performs symbol resolution via binarySearch() to check for + * marked frames (JVM internals) that should terminate the stack walk. + */ int Profiler::convertNativeTrace(int native_frames, const void **callchain, - ASGCT_CallFrame *frames) { + ASGCT_CallFrame *frames, int lock_index) { int depth = 0; - jmethodID prev_method = NULL; + void* prev_identifier = NULL; // Can be jmethodID or frame pointer for remote for (int i = 0; i < native_frames; i++) { - const char *current_method_name = findNativeMethod(callchain[i]); - if (current_method_name != NULL && - NativeFunc::isMarked(current_method_name)) { - // This is C++ interpreter frame, this and later frames should be reported - // as Java frames returned by AGCT. Terminate the scan here. + uintptr_t pc = (uintptr_t)callchain[i]; + + // Try remote symbolication first if enabled + if (_remote_symbolication) { + CodeCache* lib = _libs->findLibraryByAddress((void*)pc); + if (lib != nullptr && lib->hasBuildId()) { + // Check for marked frames via symbol resolution + // binarySearch() returns symbol name, then we check mark (O(1)) + const char *method_name = nullptr; + lib->binarySearch((void*)pc, &method_name); + char mark = (method_name != nullptr) ? NativeFunc::mark(method_name) : 0; + + if (mark != 0) { + // Terminate scan at marked frame + return depth; + } + + // Populate remote frame inline - no allocation needed! + // Pass the mark we already retrieved to avoid duplicate binarySearch + populateRemoteFrame(&frames[depth], pc, lib, mark); + + // Use frame address as identifier for duplicate detection + void* current_identifier = (void*)&frames[depth]; + if (current_identifier != prev_identifier || _cstack != CSTACK_LBR) { + prev_identifier = current_identifier; + depth++; + } + continue; + } + } + + // Fallback: Traditional symbol resolution + const char *method_name = findNativeMethod((void*)pc); + if (method_name != nullptr && NativeFunc::isMarked(method_name)) { + // Terminate scan at marked frame return depth; } - jmethodID current_method = (jmethodID)current_method_name; - if (current_method == prev_method && _cstack == CSTACK_LBR) { - // Skip duplicates in LBR stack, where branch_stack[N].from == - // branch_stack[N+1].to - prev_method = NULL; - } else { + // Store standard frame + jmethodID current_method = (jmethodID)method_name; + if (current_method == prev_identifier && _cstack == CSTACK_LBR) { + prev_identifier = NULL; + } else if (current_method != NULL) { frames[depth].bci = BCI_NATIVE_FRAME; - frames[depth].method_id = prev_method = current_method; + frames[depth].method_id = current_method; + prev_identifier = current_method; depth++; } } @@ -438,6 +555,7 @@ int Profiler::getJavaTraceAsync(void *ucontext, ASGCT_CallFrame *frames, } JitWriteProtection jit(false); + // AsyncGetCallTrace writes to ASGCT_CallFrame array ASGCT_CallTrace trace = {jni, 0, frames}; VM::_asyncGetCallTrace(&trace, max_depth, ucontext); @@ -700,6 +818,7 @@ void Profiler::recordSample(void *ucontext, u64 counter, int tid, // passage of CPU or wall time, we use the same event definitions but we // record a null stacktrace we can skip the unwind if we've got a // call_trace_id determined to be reusable at a higher level + if (!_omit_stacktraces && call_trace_id == 0) { u64 startTime = TSC::ticks(); ASGCT_CallFrame *frames = _calltrace_buffer[lock_index]->_asgct_frames; @@ -709,14 +828,18 @@ void Profiler::recordSample(void *ucontext, u64 counter, int tid, StackContext java_ctx = {0}; ASGCT_CallFrame *native_stop = frames + num_frames; num_frames += getNativeTrace(ucontext, native_stop, event_type, tid, - &java_ctx, &truncated); + &java_ctx, &truncated, lock_index); if (num_frames < _max_stack_depth) { int max_remaining = _max_stack_depth - num_frames; if (_features.mixed) { - num_frames += ddprof::StackWalker::walkVM(ucontext, frames + num_frames, max_remaining, _features, eventTypeFromBCI(event_type), &truncated); + int vm_start = num_frames; + int vm_frames = ddprof::StackWalker::walkVM(ucontext, frames + vm_start, max_remaining, _features, eventTypeFromBCI(event_type), &truncated, lock_index); + num_frames += vm_frames; } else if (event_type == BCI_CPU || event_type == BCI_WALL) { if (_cstack >= CSTACK_VM) { - num_frames += ddprof::StackWalker::walkVM(ucontext, frames + num_frames, max_remaining, _features, eventTypeFromBCI(event_type), &truncated); + int vm_start = num_frames; + int vm_frames = ddprof::StackWalker::walkVM(ucontext, frames + vm_start, max_remaining, _features, eventTypeFromBCI(event_type), &truncated, lock_index); + num_frames += vm_frames; } else { // Async events AsyncSampleMutex mutex(ProfiledThread::currentSignalSafe()); @@ -794,10 +917,16 @@ void Profiler::recordExternalSample(u64 weight, int tid, int num_frames, CriticalSection cs; atomicIncRelaxed(_total_samples); - u64 call_trace_id = - _call_trace_storage.put(num_frames, frames, truncated, weight); - + // Convert ASGCT_CallFrame to ASGCT_CallFrame + // External samplers (like ObjectSampler) provide standard frames only u32 lock_index = getLockIndex(tid); + ASGCT_CallFrame *extended_frames = _calltrace_buffer[lock_index]->_asgct_frames; + for (int i = 0; i < num_frames; i++) { + extended_frames[i] = frames[i]; + } + + u64 call_trace_id = + _call_trace_storage.put(num_frames, extended_frames, truncated, weight); if (!_locks[lock_index].tryLock() && !_locks[lock_index = (lock_index + 1) % CONCURRENCY_LEVEL].tryLock() && !_locks[lock_index = (lock_index + 2) % CONCURRENCY_LEVEL].tryLock()) { @@ -853,6 +982,11 @@ void *Profiler::dlopen_hook(const char *filename, int flags) { // Static function of Profiler -> can not use the instance variable _libs // Since Libraries is a singleton, this does not matter Libraries::instance()->updateSymbols(false); + // Extract build-ids for newly loaded libraries if remote symbolication is enabled + Profiler* profiler = instance(); + if (profiler != nullptr && profiler->_remote_symbolication) { + Libraries::instance()->updateBuildIds(); + } } return result; } @@ -1137,6 +1271,7 @@ Error Profiler::start(Arguments &args, bool reset) { // Validate TLS priming for signal-based profiling safety if (ProfiledThread::wasTlsPrimingAttempted() && !ProfiledThread::isTlsPrimingAvailable()) { _omit_stacktraces = args._lightweight; + _remote_symbolication = args._remote_symbolication; _event_mask = ((args._event != NULL && strcmp(args._event, EVENT_NOOP) != 0) ? EM_CPU : 0) | @@ -1158,6 +1293,7 @@ Error Profiler::start(Arguments &args, bool reset) { } } else { _omit_stacktraces = args._lightweight; + _remote_symbolication = args._remote_symbolication; _event_mask = ((args._event != NULL && strcmp(args._event, EVENT_NOOP) != 0) ? EM_CPU : 0) | @@ -1211,6 +1347,9 @@ Error Profiler::start(Arguments &args, bool reset) { } } + // Remote symbolication is now inline in ASGCT_CallFrame + // No separate pool allocation needed! + _features = args._features; if (VM::hotspot_version() < 8) { _features.java_anchor = 0; @@ -1262,9 +1401,14 @@ Error Profiler::start(Arguments &args, bool reset) { // Kernel symbols are useful only for perf_events without --all-user _libs->updateSymbols(_cpu_engine == &perf_events && (args._ring & RING_KERNEL)); + // Extract build-ids for remote symbolication if enabled + if (_remote_symbolication) { + _libs->updateBuildIds(); + } + enableEngines(); - switchLibraryTrap(_cstack == CSTACK_DWARF); + switchLibraryTrap(_cstack == CSTACK_DWARF || _remote_symbolication); JfrMetadata::initialize(args._context_attributes); _num_context_attributes = args._context_attributes.size(); @@ -1334,8 +1478,6 @@ Error Profiler::stop() { return Error("Profiler is not active"); } - LibraryPatcher::unpatch_libraries(); - disableEngines(); if (_event_mask & EM_ALLOC) @@ -1359,9 +1501,14 @@ Error Profiler::stop() { // Acquire all spinlocks to avoid race with remaining signals lockAll(); - _jfr.stop(); + _jfr.stop(); // JFR serialization must complete before unpatching libraries unlockAll(); + // Unpatch libraries AFTER JFR serialization completes + // Remote symbolication RemoteFrameInfo structs contain pointers to build-ID strings + // owned by library metadata, so we must keep library patches active until after serialization + LibraryPatcher::unpatch_libraries(); + _state = IDLE; return Error::OK; } @@ -1431,10 +1578,11 @@ Error Profiler::dump(const char *path, const int length) { updateJavaThreadNames(); updateNativeThreadNames(); - Counters::set(CODECACHE_NATIVE_COUNT, _native_libs.count()); - Counters::set(CODECACHE_NATIVE_SIZE_BYTES, _native_libs.memoryUsage()); + const CodeCacheArray& native_libs = _libs->native_libs(); + Counters::set(CODECACHE_NATIVE_COUNT, native_libs.count()); + Counters::set(CODECACHE_NATIVE_SIZE_BYTES, native_libs.memoryUsage()); Counters::set(CODECACHE_RUNTIME_STUBS_SIZE_BYTES, - _native_libs.memoryUsage()); + native_libs.memoryUsage()); lockAll(); Error err = _jfr.dump(path, length); diff --git a/ddprof-lib/src/main/cpp/profiler.h b/ddprof-lib/src/main/cpp/profiler.h index 141b21539..d98d821c2 100644 --- a/ddprof-lib/src/main/cpp/profiler.h +++ b/ddprof-lib/src/main/cpp/profiler.h @@ -150,11 +150,11 @@ class alignas(alignof(SpinLock)) Profiler { Libraries* _libs; SpinLock _stubs_lock; CodeCache _runtime_stubs; - CodeCacheArray _native_libs; const void *_call_stub_begin; const void *_call_stub_end; u32 _num_context_attributes; bool _omit_stacktraces; + bool _remote_symbolication; // Enable remote symbolication for native frames // dlopen() hook support void **_dlopen_entry; @@ -174,7 +174,7 @@ class alignas(alignof(SpinLock)) Profiler { u32 getLockIndex(int tid); bool isAddressInCode(uintptr_t addr); int getNativeTrace(void *ucontext, ASGCT_CallFrame *frames, int event_type, - int tid, StackContext *java_ctx, bool *truncated); + int tid, StackContext *java_ctx, bool *truncated, int lock_index); int getJavaTraceAsync(void *ucontext, ASGCT_CallFrame *frames, int max_depth, StackContext *java_ctx, bool *truncated); void fillFrameTypes(ASGCT_CallFrame *frames, int num_frames, @@ -204,7 +204,7 @@ class alignas(alignof(SpinLock)) Profiler { _notify_class_unloaded_func(NULL), _thread_filter(), _call_trace_storage(), _jfr(), _start_time(0), _epoch(0), _timer_id(NULL), _max_stack_depth(0), _safe_mode(0), _thread_events_state(JVMTI_DISABLE), - _libs(Libraries::instance()), _stubs_lock(), _runtime_stubs("[stubs]"), _native_libs(), + _libs(Libraries::instance()), _stubs_lock(), _runtime_stubs("[stubs]"), _call_stub_begin(NULL), _call_stub_end(NULL), _dlopen_entry(NULL), _num_context_attributes(0), _class_map(1), _string_label_map(2), _context_value_map(3), _cpu_engine(), _alloc_engine(), _event_mask(0), @@ -279,8 +279,84 @@ class alignas(alignof(SpinLock)) Profiler { Error dump(const char *path, const int length); void logStats(); void switchThreadEvents(jvmtiEventMode mode); + + /** + * Remote symbolication packed data layout (BCI_NATIVE_FRAME_REMOTE): + * + * When remote symbolication is enabled and a library has a build-ID, we defer + * full symbol resolution to post-processing and pack essential data into the + * 64-bit jmethodID field: + * + * Bits 0-43: pc_offset (44 bits, 16 TB range) + * Bits 44-46: mark (3 bits, 0-7 values) + * Bits 47-63: lib_index (17 bits, 128K libraries) + * + * Mark values indicate JVM internal frames that should terminate stack walks: + * 0 = no mark (regular native frame) + * MARK_VM_RUNTIME = 1 + * MARK_INTERPRETER = 2 + * MARK_COMPILER_ENTRY = 3 + * MARK_ASYNC_PROFILER = 4 + * + * During stack walking, we perform symbol resolution (binarySearch) to check + * marks and pack the mark value for later use. The performance is O(log n) for + * binarySearch + O(1) for mark extraction, same as traditional symbolication + * but simpler than maintaining a separate marked ranges index. + */ + struct RemoteFramePacker { + static const int PC_OFFSET_BITS = 44; + static const int MARK_BITS = 3; + static const int LIB_INDEX_BITS = 17; + + static const uintptr_t PC_OFFSET_MASK = (1ULL << PC_OFFSET_BITS) - 1; // 0xFFFFFFFFFFF (44 bits) + static const uintptr_t MARK_MASK = (1ULL << MARK_BITS) - 1; // 0x7 (3 bits) + static const uintptr_t LIB_INDEX_MASK = (1ULL << LIB_INDEX_BITS) - 1; // 0x1FFFF (17 bits) + + /** + * Pack remote symbolication data into a 64-bit jmethodID. + * Layout: pc_offset (44 bits) | mark (3 bits) | lib_index (17 bits) + */ + static inline jmethodID pack(uintptr_t pc_offset, char mark, uint32_t lib_index) { + return (jmethodID)( + (pc_offset & PC_OFFSET_MASK) | // Bits 0-43 + (((uintptr_t)mark & MARK_MASK) << PC_OFFSET_BITS) | // Bits 44-46 + (((uintptr_t)lib_index & LIB_INDEX_MASK) << (PC_OFFSET_BITS + MARK_BITS)) // Bits 47-63 + ); + } + + /** + * Unpack pc_offset from packed data. + */ + static inline uintptr_t unpackPcOffset(jmethodID packed) { + return (uintptr_t)packed & PC_OFFSET_MASK; + } + + /** + * Unpack mark from packed data. + */ + static inline char unpackMark(jmethodID packed) { + return (char)(((uintptr_t)packed >> PC_OFFSET_BITS) & MARK_MASK); + } + + /** + * Unpack lib_index from packed data. + */ + static inline uint32_t unpackLibIndex(jmethodID packed) { + return (uint32_t)(((uintptr_t)packed >> (PC_OFFSET_BITS + MARK_BITS)) & LIB_INDEX_MASK); + } + }; + + // Result of resolving a native frame for symbolication + struct NativeFrameResolution { + jmethodID method_id; // Packed remote frame data (pc_offset|mark|lib_index) or const char* symbol name + int bci; // BCI_NATIVE_FRAME_REMOTE or BCI_NATIVE_FRAME + bool is_marked; // true if this is a marked C++ interpreter frame (stop processing) + }; + + void populateRemoteFrame(ASGCT_CallFrame* frame, uintptr_t pc, CodeCache* lib, char mark); + NativeFrameResolution resolveNativeFrameForWalkVM(uintptr_t pc, int lock_index); int convertNativeTrace(int native_frames, const void **callchain, - ASGCT_CallFrame *frames); + ASGCT_CallFrame *frames, int lock_index); void recordSample(void *ucontext, u64 weight, int tid, jint event_type, u64 call_trace_id, Event *event); u64 recordJVMTISample(u64 weight, int tid, jthread thread, jint event_type, Event *event, bool deferred); diff --git a/ddprof-lib/src/main/cpp/stackWalker_dd.h b/ddprof-lib/src/main/cpp/stackWalker_dd.h index 0d88f87cc..1ba05a64e 100644 --- a/ddprof-lib/src/main/cpp/stackWalker_dd.h +++ b/ddprof-lib/src/main/cpp/stackWalker_dd.h @@ -50,8 +50,9 @@ namespace ddprof { return walked; } - inline static int walkVM(void* ucontext, ASGCT_CallFrame* frames, int max_depth, StackWalkFeatures features, EventType event_type, bool* truncated) { - int walked = ::StackWalker::walkVM(ucontext, frames, max_depth + 1, features, event_type); + inline static int walkVM(void* ucontext, ASGCT_CallFrame* frames, int max_depth, StackWalkFeatures features, EventType event_type, bool* truncated, int lock_index) { + int walked = ::StackWalker::walkVM(ucontext, frames, max_depth + 1, features, event_type, lock_index); + if (walked > max_depth) { *truncated = true; walked = max_depth; @@ -64,8 +65,9 @@ namespace ddprof { return walked; } - inline static int walkVM(void* ucontext, ASGCT_CallFrame* frames, int max_depth, JavaFrameAnchor* anchor, EventType event_type, bool* truncated) { - int walked = ::StackWalker::walkVM(ucontext, frames, max_depth + 1, anchor, event_type); + inline static int walkVM(void* ucontext, ASGCT_CallFrame* frames, int max_depth, JavaFrameAnchor* anchor, EventType event_type, bool* truncated, int lock_index) { + int walked = ::StackWalker::walkVM(ucontext, frames, max_depth + 1, anchor, event_type, lock_index); + if (walked > max_depth) { *truncated = true; walked = max_depth; diff --git a/ddprof-lib/src/main/cpp/symbols_linux_dd.cpp b/ddprof-lib/src/main/cpp/symbols_linux_dd.cpp new file mode 100644 index 000000000..7deb77a80 --- /dev/null +++ b/ddprof-lib/src/main/cpp/symbols_linux_dd.cpp @@ -0,0 +1,205 @@ +/* + * Copyright 2025, Datadog, Inc. + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifdef __linux__ + +#include "symbols_linux_dd.h" +#include +#include +#include +#include +#include +#include +#include +#include + +// GNU build-id extraction implementation +// +// The build-id is a unique identifier embedded in ELF binaries and shared libraries. +// It is stored in a PT_NOTE program header segment as an ELF note with type NT_GNU_BUILD_ID. +// +// References: +// - ELF Specification: https://refspecs.linuxfoundation.org/elf/elf.pdf +// - ELF Note Section: https://refspecs.linuxfoundation.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/noteobject.html +// - GNU build-id: https://fedoraproject.org/wiki/Releases/FeatureBuildId +// - GNU binutils ld --build-id: https://sourceware.org/binutils/docs/ld/Options.html +// - readelf(1) --notes: https://man7.org/linux/man-pages/man1/readelf.1.html +// +// Build-ID format: +// - Located in PT_NOTE program header segments (p_type == PT_NOTE) +// - Stored as ELF note with: +// - n_namesz = 4 (length of "GNU\0") +// - n_descsz = build-id length (typically 20 bytes for SHA1) +// - n_type = NT_GNU_BUILD_ID (3) +// - name = "GNU\0" +// - desc = build-id bytes +// - All fields are 4-byte aligned as per ELF note format + +// GNU build-id note constants +#define NT_GNU_BUILD_ID 3 +#define GNU_BUILD_ID_NAME "GNU" + +namespace ddprof { + +char* SymbolsLinux::extractBuildId(const char* file_path, size_t* build_id_len) { + if (!file_path || !build_id_len) { + return nullptr; + } + + int fd = open(file_path, O_RDONLY); + if (fd < 0) { + return nullptr; + } + + struct stat st; + if (fstat(fd, &st) < 0) { + close(fd); + return nullptr; + } + + void* elf_base = mmap(nullptr, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + close(fd); + + if (elf_base == MAP_FAILED) { + return nullptr; + } + + char* result = extractBuildIdFromMemory(elf_base, st.st_size, build_id_len); + + munmap(elf_base, st.st_size); + return result; +} + +char* SymbolsLinux::extractBuildIdFromMemory(const void* elf_base, size_t elf_size, size_t* build_id_len) { + if (!elf_base || !build_id_len || elf_size < sizeof(Elf64_Ehdr)) { + return nullptr; + } + + const Elf64_Ehdr* ehdr = static_cast(elf_base); + + // Verify ELF magic + if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0) { + return nullptr; + } + + // Only handle 64-bit ELF for now + if (ehdr->e_ident[EI_CLASS] != ELFCLASS64) { + return nullptr; + } + + // Check if we have program headers + if (ehdr->e_phoff == 0 || ehdr->e_phnum == 0) { + return nullptr; + } + + // Verify program header table is within file bounds + if (ehdr->e_phoff + ehdr->e_phnum * sizeof(Elf64_Phdr) > elf_size) { + return nullptr; + } + + // Verify program header offset is properly aligned + if (ehdr->e_phoff % alignof(Elf64_Phdr) != 0) { + return nullptr; + } + + const char* base = static_cast(elf_base); + const Elf64_Phdr* phdr = reinterpret_cast(base + ehdr->e_phoff); + + // Search for PT_NOTE segments + for (int i = 0; i < ehdr->e_phnum; i++) { + if (phdr[i].p_type == PT_NOTE && phdr[i].p_filesz > 0) { + // Ensure note segment is within file bounds + if (phdr[i].p_offset + phdr[i].p_filesz > elf_size) { + continue; + } + + const void* note_data = base + phdr[i].p_offset; + const uint8_t* build_id_bytes = findBuildIdInNotes(note_data, phdr[i].p_filesz, build_id_len); + + if (build_id_bytes) { + return buildIdToHex(build_id_bytes, *build_id_len); + } + } + } + + return nullptr; +} + +const uint8_t* SymbolsLinux::findBuildIdInNotes(const void* note_data, size_t note_size, size_t* build_id_len) { + const char* data = static_cast(note_data); + size_t offset = 0; + + // Parse ELF note entries within the PT_NOTE segment + // Each note has the structure: + // typedef struct { + // Elf64_Word n_namesz; // Length of name field (including null terminator) + // Elf64_Word n_descsz; // Length of descriptor (build-id bytes) + // Elf64_Word n_type; // Note type (NT_GNU_BUILD_ID == 3) + // // Followed by name (4-byte aligned) + // // Followed by descriptor (4-byte aligned) + // } Elf64_Nhdr; + // Reference: https://refspecs.linuxfoundation.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/noteobject.html + while (offset < note_size) { + // Ensure there is enough space for the note header itself + if (note_size - offset < sizeof(Elf64_Nhdr)) { + break; + } + + const Elf64_Nhdr* nhdr = reinterpret_cast(data + offset); + + // Calculate aligned sizes (4-byte alignment as per ELF spec) + size_t name_size_aligned = (nhdr->n_namesz + 3) & ~static_cast(3); + size_t desc_size_aligned = (nhdr->n_descsz + 3) & ~static_cast(3); + + // Check bounds using subtraction to avoid overflow + size_t remaining = note_size - offset - sizeof(Elf64_Nhdr); + if (name_size_aligned > remaining) { + break; + } + remaining -= name_size_aligned; + if (desc_size_aligned > remaining) { + break; + } + + // Check if this is a GNU build-id note + if (nhdr->n_type == NT_GNU_BUILD_ID && nhdr->n_namesz > 0 && nhdr->n_descsz > 0) { + const char* name = data + offset + sizeof(Elf64_Nhdr); + + // Verify GNU build-id name (including null terminator) + if (nhdr->n_namesz == 4 && strncmp(name, GNU_BUILD_ID_NAME, 3) == 0 && name[3] == '\0') { + const uint8_t* desc = reinterpret_cast(data + offset + sizeof(Elf64_Nhdr) + name_size_aligned); + *build_id_len = nhdr->n_descsz; + return desc; + } + } + + offset += sizeof(Elf64_Nhdr) + name_size_aligned + desc_size_aligned; + } + + return nullptr; +} + +char* SymbolsLinux::buildIdToHex(const uint8_t* build_id_bytes, size_t byte_len) { + if (!build_id_bytes || byte_len == 0) { + return nullptr; + } + + // Allocate string for hex representation (2 chars per byte + null terminator) + char* hex_str = static_cast(malloc(byte_len * 2 + 1)); + if (!hex_str) { + return nullptr; + } + + for (size_t i = 0; i < byte_len; i++) { + snprintf(hex_str + i * 2, 3, "%02x", build_id_bytes[i]); + } + + hex_str[byte_len * 2] = '\0'; + return hex_str; +} + +} // namespace ddprof + +#endif // __linux__ diff --git a/ddprof-lib/src/main/cpp/symbols_linux_dd.h b/ddprof-lib/src/main/cpp/symbols_linux_dd.h new file mode 100644 index 000000000..0282f1e32 --- /dev/null +++ b/ddprof-lib/src/main/cpp/symbols_linux_dd.h @@ -0,0 +1,68 @@ +/* + * Copyright 2025, Datadog, Inc. + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _SYMBOLS_LINUX_DD_H +#define _SYMBOLS_LINUX_DD_H + +#ifdef __linux__ + +#include +#include + +namespace ddprof { + +/** + * Datadog-specific extensions to Linux symbol handling. + * Provides build-id extraction for remote symbolication support. + */ +class SymbolsLinux { +public: + /** + * Extract GNU build-id from ELF file on disk. + * Build-id is stored in .note.gnu.build-id section and provides + * unique identification for libraries/executables for remote symbolication. + * + * @param file_path Path to ELF file + * @param build_id_len Output parameter for build-id length in bytes + * @return Hex-encoded build-id string (caller must free), or NULL on error + */ + static char* extractBuildId(const char* file_path, size_t* build_id_len); + + /** + * Extract GNU build-id from ELF file already mapped in memory. + * + * @param elf_base Base address of mapped ELF file + * @param elf_size Size of mapped ELF file + * @param build_id_len Output parameter for build-id length in bytes + * @return Hex-encoded build-id string (caller must free), or NULL on error + */ + static char* extractBuildIdFromMemory(const void* elf_base, size_t elf_size, size_t* build_id_len); + +private: + /** + * Convert binary build-id to hex string. + * + * @param build_id_bytes Raw build-id bytes + * @param byte_len Length of raw build-id in bytes + * @return Hex-encoded string (caller must free) + */ + static char* buildIdToHex(const uint8_t* build_id_bytes, size_t byte_len); + + /** + * Parse ELF note section to find GNU build-id. + * + * @param note_data Start of note section data + * @param note_size Size of note section + * @param build_id_len Output parameter for build-id length + * @return Raw build-id bytes, or NULL if not found + */ + static const uint8_t* findBuildIdInNotes(const void* note_data, size_t note_size, size_t* build_id_len); +}; + +} // namespace ddprof + +#endif // __linux__ + +#endif // _SYMBOLS_LINUX_DD_H diff --git a/ddprof-lib/src/main/cpp/vmEntry.h b/ddprof-lib/src/main/cpp/vmEntry.h index 2047193d4..ed8adf2eb 100644 --- a/ddprof-lib/src/main/cpp/vmEntry.h +++ b/ddprof-lib/src/main/cpp/vmEntry.h @@ -32,6 +32,7 @@ enum ASGCT_CallFrameType { BCI_PARK = -16, // class name of the park() blocker BCI_THREAD_ID = -17, // method_id designates a thread BCI_ERROR = -18, // method_id is an error string + BCI_NATIVE_FRAME_REMOTE = -19, // method_id points to RemoteFrameInfo for remote symbolication }; // See hotspot/src/share/vm/prims/forte.cpp @@ -57,6 +58,21 @@ typedef struct { jmethodID method_id; } ASGCT_CallFrame; +/** + * Information for native frames requiring remote symbolication. + * Used when bci == BCI_NATIVE_FRAME_REMOTE. + */ +typedef struct RemoteFrameInfo { + const char* build_id; // GNU build-id for library identification (null-terminated hex string) + uintptr_t pc_offset; // PC offset within the library/module + short lib_index; // Index into CodeCache library table for fast lookup + +#ifdef __cplusplus + // Constructor for C++ convenience + RemoteFrameInfo(const char* bid, uintptr_t offset, short lib_idx) + : build_id(bid), pc_offset(offset), lib_index(lib_idx) {} +#endif +} RemoteFrameInfo; typedef struct { JNIEnv *env; diff --git a/ddprof-lib/src/test/cpp/remoteargs_ut.cpp b/ddprof-lib/src/test/cpp/remoteargs_ut.cpp new file mode 100644 index 000000000..1778fd455 --- /dev/null +++ b/ddprof-lib/src/test/cpp/remoteargs_ut.cpp @@ -0,0 +1,136 @@ +/* + * Copyright 2025, Datadog, Inc. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include "arguments.h" +#include "../../main/cpp/gtest_crash_handler.h" + +static constexpr char REMOTE_ARGS_TEST_NAME[] = "RemoteArgsTest"; + +class RemoteArgsGlobalSetup { +public: + RemoteArgsGlobalSetup() { + installGtestCrashHandler(); + } + ~RemoteArgsGlobalSetup() { + restoreDefaultSignalHandlers(); + } +}; + +static RemoteArgsGlobalSetup global_setup; + +class RemoteArgsTest : public ::testing::Test { +protected: + void SetUp() override { + // Test setup + } + + void TearDown() override { + // Test cleanup + } +}; + +TEST_F(RemoteArgsTest, DefaultRemoteSymbolicationDisabled) { + Arguments args; + + // Remote symbolication should be disabled by default + EXPECT_FALSE(args._remote_symbolication); +} + +TEST_F(RemoteArgsTest, EnableRemoteSymbolication) { + Arguments args; + + // Test enabling remote symbolication + Error error = args.parse("remotesym=true"); + EXPECT_FALSE(error); + EXPECT_TRUE(args._remote_symbolication); +} + +TEST_F(RemoteArgsTest, EnableRemoteSymbolicationShort) { + Arguments args; + + // Test short form + Error error = args.parse("remotesym=y"); + EXPECT_FALSE(error); + EXPECT_TRUE(args._remote_symbolication); +} + +TEST_F(RemoteArgsTest, DisableRemoteSymbolication) { + Arguments args; + + // Test explicitly disabling + Error error = args.parse("remotesym=false"); + EXPECT_FALSE(error); + EXPECT_FALSE(args._remote_symbolication); +} + +TEST_F(RemoteArgsTest, MultipleArgsWithRemoteSymbolication) { + Arguments args; + + // Test with multiple arguments + Error error = args.parse("event=cpu,interval=1000000,remotesym=true"); + EXPECT_FALSE(error); + EXPECT_TRUE(args._remote_symbolication); + EXPECT_STREQ(args._event, "cpu"); + EXPECT_EQ(args._interval, 1000000); +} + +TEST_F(RemoteArgsTest, RemoteSymbolicationWithOtherFlags) { + Arguments args; + + // Test interaction with lightweight flag + Error error = args.parse("lightweight=true,remotesym=true"); + EXPECT_FALSE(error); + EXPECT_TRUE(args._lightweight); + EXPECT_TRUE(args._remote_symbolication); +} + +TEST_F(RemoteArgsTest, RemoteSymbolicationNoValue) { + Arguments args; + + // Test no value (should enable) + Error error = args.parse("remotesym"); + EXPECT_FALSE(error); + EXPECT_TRUE(args._remote_symbolication); +} + +TEST_F(RemoteArgsTest, RemoteSymbolicationNumericValues) { + Arguments args; + + // Test numeric 1 (should enable) + Error error = args.parse("remotesym=1"); + EXPECT_FALSE(error); + EXPECT_TRUE(args._remote_symbolication); + + // Test numeric 0 (should disable) + args = Arguments(); + error = args.parse("remotesym=0"); + EXPECT_FALSE(error); + EXPECT_FALSE(args._remote_symbolication); +} + +TEST_F(RemoteArgsTest, RemoteSymbolicationNoVariant) { + Arguments args; + + // Test "no" (should disable) + Error error = args.parse("remotesym=no"); + EXPECT_FALSE(error); + EXPECT_FALSE(args._remote_symbolication); + + // Test "n" (should disable) + args = Arguments(); + error = args.parse("remotesym=n"); + EXPECT_FALSE(error); + EXPECT_FALSE(args._remote_symbolication); +} + +TEST_F(RemoteArgsTest, RemoteSymbolicationInvalidValue) { + Arguments args; + + // Test invalid value that starts with unrecognized char (should enable due to default) + Error error = args.parse("remotesym=invalid"); + EXPECT_FALSE(error); + EXPECT_TRUE(args._remote_symbolication); +} \ No newline at end of file diff --git a/ddprof-lib/src/test/cpp/remotesymbolication_ut.cpp b/ddprof-lib/src/test/cpp/remotesymbolication_ut.cpp new file mode 100644 index 000000000..5346639de --- /dev/null +++ b/ddprof-lib/src/test/cpp/remotesymbolication_ut.cpp @@ -0,0 +1,120 @@ +/* + * Copyright 2025, Datadog, Inc. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include +#include +#include "symbols_linux_dd.h" +#include "vmEntry.h" +#include "../../main/cpp/gtest_crash_handler.h" + +#ifdef __linux__ + +static constexpr char REMOTE_TEST_NAME[] = "RemoteSymbolicationTest"; + +class RemoteSymbolicationGlobalSetup { +public: + RemoteSymbolicationGlobalSetup() { + installGtestCrashHandler(); + } + ~RemoteSymbolicationGlobalSetup() { + restoreDefaultSignalHandlers(); + } +}; + +static RemoteSymbolicationGlobalSetup global_setup; + +class RemoteSymbolicationTest : public ::testing::Test { +protected: + void SetUp() override { + // Test setup + } + + void TearDown() override { + // Test cleanup + } +}; + +TEST_F(RemoteSymbolicationTest, RemoteFrameInfoConstruction) { + const char* test_build_id = "deadbeefcafebabe"; + uintptr_t test_offset = 0x1234; + short test_lib_index = 5; + + RemoteFrameInfo rfi(test_build_id, test_offset, test_lib_index); + + EXPECT_STREQ(rfi.build_id, test_build_id); + EXPECT_EQ(rfi.pc_offset, test_offset); + EXPECT_EQ(rfi.lib_index, test_lib_index); +} + +TEST_F(RemoteSymbolicationTest, BciFrameTypeConstants) { + // Verify that the new BCI constant is defined + EXPECT_EQ(BCI_NATIVE_FRAME_REMOTE, -19); + + // Verify it doesn't conflict with existing constants + EXPECT_NE(BCI_NATIVE_FRAME_REMOTE, BCI_NATIVE_FRAME); + EXPECT_NE(BCI_NATIVE_FRAME_REMOTE, BCI_ERROR); + EXPECT_NE(BCI_NATIVE_FRAME_REMOTE, BCI_ALLOC); +} + +// Test build-id extraction from a minimal ELF +TEST_F(RemoteSymbolicationTest, BuildIdExtractionBasic) { + // Create a minimal ELF file with a build-id note section + // This test would be more comprehensive with a real ELF file + // For now, just test the function doesn't crash on invalid input + + size_t build_id_len = 0; + char* build_id = ddprof::SymbolsLinux::extractBuildId("/nonexistent", &build_id_len); + + // Should return null for non-existent file + EXPECT_EQ(build_id, nullptr); + EXPECT_EQ(build_id_len, 0); +} + +TEST_F(RemoteSymbolicationTest, BuildIdExtractionInvalidInput) { + size_t build_id_len = 0; + + // Test null inputs + char* build_id1 = ddprof::SymbolsLinux::extractBuildId(nullptr, &build_id_len); + EXPECT_EQ(build_id1, nullptr); + + char* build_id2 = ddprof::SymbolsLinux::extractBuildId("/some/file", nullptr); + EXPECT_EQ(build_id2, nullptr); + + // Test non-ELF file + const char* test_content = "This is not an ELF file"; + char temp_file[] = "/tmp/not_an_elf_XXXXXX"; + + int fd = mkstemp(temp_file); + if (fd >= 0) { + write(fd, test_content, strlen(test_content)); + close(fd); + + char* build_id3 = ddprof::SymbolsLinux::extractBuildId(temp_file, &build_id_len); + EXPECT_EQ(build_id3, nullptr); + + unlink(temp_file); + } +} + +TEST_F(RemoteSymbolicationTest, BuildIdFromMemoryInvalidInput) { + size_t build_id_len = 0; + + // Test null pointer + char* build_id1 = ddprof::SymbolsLinux::extractBuildIdFromMemory(nullptr, 100, &build_id_len); + EXPECT_EQ(build_id1, nullptr); + + // Test invalid size + char dummy_data[10] = {0}; + char* build_id2 = ddprof::SymbolsLinux::extractBuildIdFromMemory(dummy_data, 0, &build_id_len); + EXPECT_EQ(build_id2, nullptr); + + // Test null output parameter + char* build_id3 = ddprof::SymbolsLinux::extractBuildIdFromMemory(dummy_data, 10, nullptr); + EXPECT_EQ(build_id3, nullptr); +} + +#endif // __linux__ \ No newline at end of file diff --git a/ddprof-stresstest/README.md b/ddprof-stresstest/README.md index dbe013d98..f0b62333d 100644 --- a/ddprof-stresstest/README.md +++ b/ddprof-stresstest/README.md @@ -90,7 +90,7 @@ Measure end-to-end profiling engine performance including signal handlers, stack ProfilerThroughputSlotExhaustionBenchmark ``` -**Documentation**: See `doc/CallTraceStorage.md` for detailed CallTraceStorage architecture, benchmark results analysis, and optimization recommendations. +**Documentation**: See `doc/architecture/CallTraceStorage.md` for detailed CallTraceStorage architecture, benchmark results analysis, and optimization recommendations. ### ThreadContext Benchmarks @@ -275,7 +275,7 @@ ddprof-stresstest/ ## Documentation -- **CallTraceStorage Architecture**: `doc/CallTraceStorage.md` - Detailed triple-buffer architecture, benchmark results, and optimization guide +- **CallTraceStorage Architecture**: `doc/architecture/CallTraceStorage.md` - Detailed triple-buffer architecture, benchmark results, and optimization guide - **Main README**: `README.md` (project root) - General project overview - **Build Configuration**: `CLAUDE.md` - Build system and development guidelines diff --git a/ddprof-test/build.gradle b/ddprof-test/build.gradle index 83a3829ea..ff82cd2ab 100644 --- a/ddprof-test/build.gradle +++ b/ddprof-test/build.gradle @@ -49,6 +49,7 @@ tasks.register('buildNativeJniLibrary', Exec) { args "-I${System.getenv('JAVA_HOME')}/include/linux" // Linux-specific includes args "-fPIC" args "-shared" // Build a shared library on Linux + args "-Wl,--build-id" // Embed GNU build-id for remote symbolication testing } args nativeSrcDir.listFiles()*.getAbsolutePath() // Source files args "-o", "${outputLibDir.absolutePath}/${libraryFileName}" // Output file path diff --git a/ddprof-test/src/test/cpp/remotesym.c b/ddprof-test/src/test/cpp/remotesym.c new file mode 100644 index 000000000..7f4c647bf --- /dev/null +++ b/ddprof-test/src/test/cpp/remotesym.c @@ -0,0 +1,62 @@ +/* + * Copyright 2025, Datadog, Inc. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include + +/** + * Recursive CPU-burning function that creates a distinct call stack. + * This ensures native frames from this library appear in profiling samples. + */ +static uint64_t burn_cpu_recursive(uint64_t n, uint64_t depth) { + if (depth == 0) { + // Base case: perform actual computation + uint64_t sum = 0; + for (uint64_t i = 0; i < n; i++) { + sum += i * i; + } + return sum; + } + + // Recursive case: go deeper + return burn_cpu_recursive(n, depth - 1) + depth; +} + +/** + * Entry point for CPU-burning work. + * Called from Java to generate CPU samples with this library on the stack. + */ +JNIEXPORT jlong JNICALL +Java_com_datadoghq_profiler_RemoteSymHelper_burnCpu(JNIEnv *env, jclass clazz, jlong iterations, jint depth) { + return (jlong)burn_cpu_recursive((uint64_t)iterations, (uint32_t)depth); +} + +/** + * Additional function to create more stack depth. + */ +static uint64_t compute_fibonacci(uint32_t n) { + if (n <= 1) return n; + + uint64_t a = 0, b = 1; + for (uint32_t i = 2; i <= n; i++) { + uint64_t temp = a + b; + a = b; + b = temp; + } + return b; +} + +/** + * Another entry point with different stack signature. + */ +JNIEXPORT jlong JNICALL +Java_com_datadoghq_profiler_RemoteSymHelper_computeFibonacci(JNIEnv *env, jclass clazz, jint n) { + // Call multiple times to increase likelihood of sampling + uint64_t result = 0; + for (int i = 0; i < 1000; i++) { + result += compute_fibonacci((uint32_t)n); + } + return (jlong)result; +} diff --git a/ddprof-test/src/test/java/com/datadoghq/profiler/RemoteSymHelper.java b/ddprof-test/src/test/java/com/datadoghq/profiler/RemoteSymHelper.java new file mode 100644 index 000000000..af6cf86bd --- /dev/null +++ b/ddprof-test/src/test/java/com/datadoghq/profiler/RemoteSymHelper.java @@ -0,0 +1,35 @@ +/* + * Copyright 2025, Datadog, Inc. + * SPDX-License-Identifier: Apache-2.0 + */ + +package com.datadoghq.profiler; + +/** + * Helper class for remote symbolication testing. + * Provides JNI methods that burn CPU to ensure native frames appear in profiling samples. + * The native library is built with GNU build-id on Linux for remote symbolication testing. + */ +public class RemoteSymHelper { + static { + System.loadLibrary("ddproftest"); + } + + /** + * Burns CPU cycles by performing recursive computation. + * This creates a distinctive call stack that should appear in profiling samples. + * + * @param iterations Number of iterations for computation + * @param depth Recursion depth + * @return Computed result (to prevent optimization) + */ + public static native long burnCpu(long iterations, int depth); + + /** + * Computes Fibonacci numbers repeatedly to burn CPU. + * + * @param n Fibonacci number to compute + * @return Computed result (to prevent optimization) + */ + public static native long computeFibonacci(int n); +} diff --git a/ddprof-test/src/test/java/com/datadoghq/profiler/cpu/RemoteSymbolicationTest.java b/ddprof-test/src/test/java/com/datadoghq/profiler/cpu/RemoteSymbolicationTest.java new file mode 100644 index 000000000..eb3e852bd --- /dev/null +++ b/ddprof-test/src/test/java/com/datadoghq/profiler/cpu/RemoteSymbolicationTest.java @@ -0,0 +1,215 @@ +/* + * Copyright 2025, Datadog, Inc. + * SPDX-License-Identifier: Apache-2.0 + */ + +package com.datadoghq.profiler.cpu; + +import com.datadoghq.profiler.CStackAwareAbstractProfilerTest; +import com.datadoghq.profiler.Platform; +import com.datadoghq.profiler.RemoteSymHelper; +import com.datadoghq.profiler.junit.CStack; +import com.datadoghq.profiler.junit.RetryTest; +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.TestTemplate; +import org.junit.jupiter.params.provider.ValueSource; +import org.openjdk.jmc.common.IMCFrame; +import org.openjdk.jmc.common.IMCMethod; +import org.openjdk.jmc.common.IMCStackTrace; +import org.openjdk.jmc.common.IMCType; +import org.openjdk.jmc.common.item.Attribute; +import org.openjdk.jmc.common.item.IAttribute; +import org.openjdk.jmc.common.item.IItem; +import org.openjdk.jmc.common.item.IItemCollection; +import org.openjdk.jmc.common.item.IItemIterable; +import org.openjdk.jmc.common.item.IMemberAccessor; +import org.openjdk.jmc.flightrecorder.jdk.JdkAttributes; + +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; +import static org.openjdk.jmc.common.unit.UnitLookup.PLAIN_TEXT; + +/** + * Integration test for remote symbolication feature. + * + *

Tests that when remotesym=true is enabled: + *

    + *
  • Native frames contain build-id instead of symbol names
  • + *
  • PC offsets are stored instead of symbol addresses
  • + *
  • Build-ids are valid hex strings
  • + *
+ */ +public class RemoteSymbolicationTest extends CStackAwareAbstractProfilerTest { + public RemoteSymbolicationTest(@CStack String cstack) { + super(cstack); + } + + @BeforeEach + public void checkPlatform() { + // Remote symbolication with build-id extraction is Linux-only + Assumptions.assumeTrue(Platform.isLinux(), "Remote symbolication test requires Linux"); + // Zing JVM forces cstack=no which disables native stack walking + Assumptions.assumeFalse(Platform.isZing(), "Remote symbolication test requires native stack walking (incompatible with Zing)"); + // OpenJ9 uses JVMTI-based CPU profiling which only captures Java frames, not native library frames + // This test requires native frames from libddproftest.so to validate remote symbolication + Assumptions.assumeFalse(Platform.isJ9(), "Remote symbolication requires native frames; OpenJ9 JVMTI engine only captures Java frames"); + } + + @RetryTest(10) + @TestTemplate + @ValueSource(strings = {"vm", "vmx", "fp", "dwarf"}) + public void testRemoteSymbolicationEnabled(@CStack String cstack) throws Exception { + try (ProfiledCode profiledCode = new ProfiledCode(profiler)) { + for (int i = 0, id = 1; i < 100; i++, id += 3) { + profiledCode.method1(id); + // Call native functions from our test library to ensure + // native frames with build-id appear in the samples + // Increased iterations to ensure profiler captures these frames + RemoteSymHelper.burnCpu(1000000, 10); + RemoteSymHelper.computeFibonacci(35); + } + stopProfiler(); + + verifyCStackSettings(); + + // First verify that our test library (libddproftest) has a build-id + // We use the extended jdk.NativeLibrary event which now includes buildId and loadBias fields + IItemCollection libraryEvents = verifyEvents("jdk.NativeLibrary"); + String testLibBuildId = null; + boolean foundTestLib = false; + + // Create attributes for the custom fields we added to jdk.NativeLibrary + IAttribute buildIdAttr = Attribute.attr("buildId", "buildId", "GNU Build ID", PLAIN_TEXT); + IAttribute nameAttr = Attribute.attr("name", "name", "Name", PLAIN_TEXT); + + for (IItemIterable libItems : libraryEvents) { + IMemberAccessor buildIdAccessor = buildIdAttr.getAccessor(libItems.getType()); + IMemberAccessor nameAccessor = nameAttr.getAccessor(libItems.getType()); + + for (IItem libItem : libItems) { + String name = nameAccessor.getMember(libItem); + String buildId = buildIdAccessor.getMember(libItem); + + System.out.println("Library: " + name + " -> build-id: " + + (buildId != null && !buildId.isEmpty() ? buildId : "")); + + // Check if this is our test library + if (name != null && name.contains("libddproftest")) { + foundTestLib = true; + testLibBuildId = buildId; + System.out.println("Found test library: " + name + " with build-id: " + buildId); + } + } + } + + // Our test library MUST be present and have a build-id + Assumptions.assumeTrue(foundTestLib, + "Test library libddproftest not found in jdk.NativeLibrary events. " + + "The test needs this library to verify remote symbolication."); + Assumptions.assumeTrue(testLibBuildId != null && !testLibBuildId.isEmpty(), + "Test library libddproftest found but has no build-id. " + + "Cannot test remote symbolication without build-id."); + + IItemCollection events = verifyEvents("datadog.ExecutionSample"); + + boolean foundTestLibFrame = false; + boolean foundTestLibRemoteFrame = false; + int sampleCount = 0; + int printCount = 0; + int testLibFrameCount = 0; + + for (IItemIterable cpuSamples : events) { + IMemberAccessor stackTraceAccessor = + STACK_TRACE.getAccessor(cpuSamples.getType()); + + for (IItem sample : cpuSamples) { + IMCStackTrace stackTrace = stackTraceAccessor.getMember(sample); + if (stackTrace == null) { + continue; + } + + sampleCount++; + + // Iterate through frames to check for test library frames + List frames = stackTrace.getFrames(); + + for (IMCFrame frame : frames) { + IMCMethod method = frame.getMethod(); + if (method == null) { + continue; + } + + // Check for jvmtiError in method name + String methodName = method.getMethodName(); + if (methodName != null && methodName.contains("jvmtiError")) { + fail("Found jvmtiError in frame method name: " + methodName); + } + + // Get class name (contains build-id for remote symbolication frames) + IMCType type = method.getType(); + String className = type != null ? type.getFullName() : null; + + // Check if this is a remote symbolication frame from our test library + // Format in JFR: type.name = build-ID (bare, no suffix), method.name = "" + if (methodName != null && methodName.equals("") && + className != null && className.equals(testLibBuildId)) { + foundTestLibRemoteFrame = true; + testLibFrameCount++; + foundTestLibFrame = true; + + // Print first remote frame for debugging + if (printCount == 0) { + System.out.println("=== First remote symbolication frame ==="); + System.out.println("Class: " + className); + System.out.println("Method: " + methodName); + System.out.println("Signature: " + (method.getFormalDescriptor() != null ? method.getFormalDescriptor() : "null")); + printCount++; + } + } + + // With remote symbolication, we should see method names, not resolved symbols + // Log a warning if we find resolved symbols (indicates remote symbolication didn't work for this frame) + if (methodName != null && (methodName.equals("burn_cpu") || methodName.equals("compute_fibonacci"))) { + System.out.println("WARNING: Found resolved symbol instead of remote frame: " + methodName + " (class: " + className + ")"); + } + + // Also count frames with resolved symbols from libddproftest + // (for fallback case or if library name appears in class name) + if ((methodName != null && (methodName.contains("burn_cpu") || methodName.contains("compute_fibonacci"))) || + (className != null && className.contains("libddproftest"))) { + foundTestLibFrame = true; + // Don't increment testLibFrameCount here to avoid double-counting + } + } + } + } + + System.out.println("Total samples analyzed: " + sampleCount); + System.out.println("Samples with test lib frames: " + testLibFrameCount); + System.out.println("Found test lib frames: " + foundTestLibFrame); + System.out.println("Found test lib remote frames: " + foundTestLibRemoteFrame); + System.out.println("Test library build-id: " + testLibBuildId); + + // We call the test library functions extensively, so we MUST see frames from it + assertTrue(foundTestLibFrame, + "No frames from libddproftest found in any samples. " + + "The test called RemoteSymHelper.burnCpu() and computeFibonacci() extensively. " + + "Analyzed " + sampleCount + " samples."); + + // Those frames MUST be in remote symbolication format (not resolved) + assertTrue(foundTestLibRemoteFrame, + "Found frames from libddproftest but they are not in remote symbolication format. " + + "Expected format: " + testLibBuildId + ".(0x). " + + "Analyzed " + testLibFrameCount + " samples with test lib frames."); + } + } + + @Override + protected String getProfilerCommand() { + return "cpu=10ms,remotesym=true"; + } +} \ No newline at end of file diff --git a/doc/event-type-system.md b/doc/EventTypeSystem.md similarity index 100% rename from doc/event-type-system.md rename to doc/EventTypeSystem.md diff --git a/doc/ModifierAllocation.md b/doc/ModifierAllocation.md new file mode 100644 index 000000000..9511082ae --- /dev/null +++ b/doc/ModifierAllocation.md @@ -0,0 +1,134 @@ +# Frame Type vs Modifier: Design Decision for Remote Symbolication + +## Final Solution: Use Frame Type Instead of Modifier + +After evaluating multiple approaches, **we chose to use a new frame type (`FRAME_NATIVE_REMOTE = 7`) instead of a custom modifier flag**. This eliminates: +- ✅ All varint encoding overhead +- ✅ Any potential conflicts with Java modifiers +- ✅ Ambiguity in semantics + +Remote native frames now use: +- **Modifier**: `0x0100` (ACC_NATIVE, same as regular native frames) +- **Frame Type**: `FRAME_NATIVE_REMOTE` (7) + +## Java Access Modifiers (from JVM Spec) + +The Java Virtual Machine specification defines the following access modifiers for classes, methods, and fields: + +| Modifier | Value | Applies To | Description | +|----------|-------|------------|-------------| +| ACC_PUBLIC | 0x0001 | All | Public access | +| ACC_PRIVATE | 0x0002 | Methods/Fields | Private access | +| ACC_PROTECTED | 0x0004 | Methods/Fields | Protected access | +| ACC_STATIC | 0x0008 | Methods/Fields | Static member | +| ACC_FINAL | 0x0010 | All | Final/non-overridable | +| ACC_SYNCHRONIZED | 0x0020 | Methods | Synchronized method | +| ACC_SUPER | 0x0020 | Classes | Treat superclass invokes specially | +| ACC_BRIDGE | 0x0040 | Methods | Compiler-generated bridge method | +| ACC_VOLATILE | 0x0040 | Fields | Volatile field | +| ACC_VARARGS | 0x0080 | Methods | Variable arity method | +| ACC_TRANSIENT | 0x0080 | Fields | Not serialized | +| ACC_NATIVE | 0x0100 | Methods | Native implementation | +| **ACC_INTERFACE** | **0x0200** | **Classes** | **Interface declaration** | +| ACC_ABSTRACT | 0x0400 | Classes/Methods | Abstract class/method | +| ACC_STRICT | 0x0800 | Methods | Use strict floating-point | +| ACC_SYNTHETIC | 0x1000 | All | Compiler-generated | +| ACC_ANNOTATION | 0x2000 | Classes | Annotation type | +| ACC_ENUM | 0x4000 | Classes/Fields | Enum type/constant | +| ACC_MANDATED | 0x8000 | Parameters | Implicitly declared | + +## Profiler Custom Modifiers + +For the Java profiler's internal use, we define custom modifier flags that don't conflict with Java's standard modifiers: + +| Modifier | Value | Usage | Notes | +|----------|-------|-------|-------| +| ACC_NATIVE | 0x0100 | Native frames | Reuses Java's ACC_NATIVE for consistency | +| ACC_SYNTHETIC | 0x1000 | Compiler-generated | Reuses Java's ACC_SYNTHETIC | +| ACC_BRIDGE | 0x0040 | Bridge methods | Reuses Java's ACC_BRIDGE | +| **ACC_REMOTE_SYMBOLICATION** | **0x10000** | **Remote native frames** | **Custom profiler flag (bit 16, outside Java range)** | + +## Modifier Conflict Analysis + +### Evolution of the Design + +**Version 1 (Initial)**: Used `0x200` +- ❌ **CONFLICT**: Java's `ACC_INTERFACE` (0x0200) +- Issues: Could confuse JFR parsers, clash with standard modifiers + +**Version 2**: Changed to `0x2000` +- ⚠️ **CONFLICT**: Java's `ACC_ANNOTATION` (0x2000) +- While theoretically safe for methods (annotations only apply to classes), still within Java's reserved range + +**Version 3 (Final)**: Changed to `0x10000` (bit 16) +- ✅ **NO CONFLICTS**: Completely outside Java's standard modifier range (0x0001-0x8000) +- ✅ Clean separation from JVM specification +- ✅ Future-proof against new Java modifiers + +### Why 0x10000 is the Correct Choice + +**Java Modifier Range:** +- Java uses bits 0-15 (0x0001 to 0x8000) +- Highest standard modifier: `ACC_MANDATED = 0x8000` (bit 15) + +**Custom Profiler Range:** +- Bits 16-30 available for custom flags (0x10000 to 0x40000000) +- `0x10000` (bit 16) is first bit outside Java range +- Clean power of 2, easy to test and debug + +**Benefits:** +1. **Zero theoretical conflicts** with any Java modifier (current or future) +2. **Clear separation** between JVM standard (bits 0-15) and profiler custom (bits 16+) +3. **32-bit safe**: Well within `jint` range (signed 32-bit) +4. **JFR compatible**: `_modifiers` field supports full 32-bit values +5. **Extensible**: Room for additional custom flags (0x20000, 0x40000, etc.) + +### Varint Encoding Analysis + +JFR uses LEB128 variable-length encoding for modifiers. The encoding size depends on the value: + +| Value Range | Example | Bytes | Notes | +|-------------|---------|-------|-------| +| 0x00-0x7F | 0 | 1 | Most compact | +| 0x80-0x3FFF | 0x0100 (ACC_NATIVE) | 2 | Standard native frames | +| 0x4000-0x1FFFFF | 0x1000 (ACC_SYNTHETIC) | 2 | High standard modifiers | +| 0x10000+ | 0x10000 | 3 | **+1 byte overhead!** | + +**Critical insight**: Using `0x10000` would add **1 extra byte per remote native frame**. Over millions of frames, this becomes significant! + +### Alternative Approaches Rejected + +1. **Use modifier 0x0200 (bit 9)**: + - ❌ Conflicts with ACC_INTERFACE + +2. **Use modifier 0x2000 (bit 13)**: + - ❌ Conflicts with ACC_ANNOTATION (theoretically) + - ⚠️ Would be safe in practice (annotations only for classes) + +3. **Use modifier 0x10000 (bit 16)**: + - ❌ 3-byte varint encoding vs 2-byte for regular frames + - ❌ **+1 byte overhead per frame** = significant space impact + +4. **Use a separate field**: + - ❌ Would require JFR metadata changes + - ❌ Breaks backward compatibility + +5. **Use frame type FRAME_NATIVE_REMOTE (CHOSEN)**: + - ✅ Zero encoding overhead (type already serialized) + - ✅ No modifier conflicts + - ✅ Clear semantics + +## Best Practices + +When adding custom modifiers in the future: + +1. **Check JVM Spec**: Always verify against latest JVM specification +2. **Consider Context**: Modifiers for methods vs classes vs fields +3. **Document Clearly**: Explain why the bit is safe to use +4. **Test Compatibility**: Verify JFR parsers handle custom modifiers correctly + +## References + +- Java Virtual Machine Specification (JVMS §4.1, §4.5, §4.6) +- JFR Format Specification +- Original Implementation: [elfBuildId.cpp, flightRecorder.cpp] \ No newline at end of file diff --git a/doc/profiler-memory-requirements.md b/doc/ProfilerMemoryRequirements.md similarity index 100% rename from doc/profiler-memory-requirements.md rename to doc/ProfilerMemoryRequirements.md diff --git a/doc/RemoteSymbolication.md b/doc/RemoteSymbolication.md new file mode 100644 index 000000000..d9f715363 --- /dev/null +++ b/doc/RemoteSymbolication.md @@ -0,0 +1,318 @@ +# Remote Symbolication Implementation + +This document describes the implementation of build-id and pc/offset storage in native frames for remote symbolication in the Java profiler. + +## Overview + +The enhancement allows the Java profiler to store raw build-id and PC offset information for native frames instead of resolving symbols locally. This enables remote symbolication services to handle symbol resolution, which is especially useful for: + +- Distributed profiling scenarios where symbol files aren't available locally +- Reduced profiler overhead by deferring symbol resolution +- Better support for stripped binaries +- Centralized symbol management + +## Implementation Summary + +### 1. **Build-ID Extraction** (`symbols_linux_dd.h/cpp`) + +- **SymbolsLinux**: Utility class to extract GNU build-id from ELF files +- Supports both file-based and memory-based extraction +- Handles .note.gnu.build-id section parsing +- Returns hex-encoded build-id strings + +### 2. **Enhanced CodeCache** (`codeCache.h/cpp`) + +Added fields to store build-id information: +- `_build_id`: Hex-encoded build-id string +- `_build_id_len`: Raw build-id length in bytes +- `_load_bias`: Load bias for address calculations +- Methods: `hasBuildId()`, `buildId()`, `setBuildId()`, etc. + +### 3. **Packed Remote Frame Data** (`profiler.h`) + +- **RemoteFramePacker**: Utility struct for packing/unpacking remote symbolication data + - Packs into 64-bit jmethodID: `pc_offset (44 bits) | mark (3 bits) | lib_index (17 bits)` + - PC offset: 44 bits = 16 TB address range + - Mark: 3 bits = 0-7 values (JVM internal frame markers) + - Library index: 17 bits = 131K libraries max +- **RemoteFrameInfo**: Structure for JFR serialization (vmEntry.h): + - `build_id`: Library build-id string + - `pc_offset`: PC offset within library + - `lib_index`: Library table index +- **BCI_NATIVE_FRAME_REMOTE**: Frame encoding (-19) indicates packed remote data + +### 4. **Enhanced Frame Collection** (`profiler.cpp`, `stackWalker_dd.h`) + +Modified frame collection to support dual modes: +- **Traditional mode**: Stores resolved symbol names (existing behavior) +- **Remote mode**: Stores RemoteFrameInfo with build-id and offset + +**Key Functions**: +- `populateRemoteFrame()`: Packs pc_offset, mark, and lib_index into jmethodID field +- `resolveNativeFrameForWalkVM()`: Resolves native frames for walkVM/walkVMX modes + - Performs binarySearch() to get symbol name + - Extracts mark via NativeFunc::mark() (O(1)) + - Packs data using RemoteFramePacker::pack() +- `convertNativeTrace()`: Converts raw PCs to frames for walkFP/walkDwarf modes + - Checks marks to terminate at JVM internal frames + - Calls populateRemoteFrame() to pack data + +**Mark Checking**: +- Uses binarySearch() + NativeFunc::mark() approach (O(log n) + O(1)) +- Performance identical to traditional symbolication +- Simpler than maintaining separate marked ranges index +- Mark values packed into jmethodID for later unpacking + +**Stack Walker Integration**: +- **walkFP/walkDwarf**: Return raw PCs → `convertNativeTrace()` → `populateRemoteFrame()` +- **walkVM/walkVMX**: Directly call `resolveNativeFrameForWalkVM(pc, lock_index)` during stack walk (patched via gradle/patching.gradle) + +### 5. **JFR Serialization** (`flightRecorder.cpp/h`) + +- **resolveMethod()**: Unpacks remote frame data during JFR serialization + - Uses RemoteFramePacker::unpackPcOffset/Mark/LibIndex() + - Looks up library by index via Libraries::getLibraryByIndex() + - Creates temporary RemoteFrameInfo with build_id and pc_offset +- **fillRemoteFrameInfo()**: Serializes remote frame data to JFR format + - Stores `.` in class name field (e.g., `deadbeef1234567890abcdef.`) + - Stores PC offset in signature field (e.g., `(0x1234)`) + - Uses modifier flag 0x100 (ACC_NATIVE, same as regular native frames) +- **Thread Safety**: Called during JFR serialization with lockAll() held + - Library array is stable (no concurrent dlopen_hook modifications) + - No additional locking needed + +### 6. **Configuration** (`arguments.h/cpp`) + +- **remotesym[=BOOL]**: New profiler argument +- Default: disabled +- Can be enabled with `remotesym=true` or `remotesym=y` + +### 7. **Libraries Integration** (`libraries.h/cpp`, `libraries_linux.cpp`) + +- **updateBuildIds()**: Extracts build-ids for all loaded libraries + - Called during profiler startup when remote symbolication is enabled + - Uses O(1) cache lookup via `_build_id_processed` set + - Mirrors `_parsed_inodes` pattern from symbols_linux.cpp + - Linux-only implementation using ELF parsing +- **getLibraryByIndex()**: Retrieves CodeCache by library index + - Parameter type: uint32_t (matches 17-bit lib_index packing) + - Returns nullptr if index out of bounds + - Used during JFR serialization to unpack remote frames + +### 8. **Upstream Stack Walker Integration** (`gradle/patching.gradle`) + +Patches async-profiler's `stackWalker.h` and `stackWalker.cpp` to integrate remote symbolication: + +**Header Patches (stackWalker.h)**: +- Adds `lock_index` parameter to all three `walkVM` signatures (private implementation, public with features, public with anchor) +- Enables per-strip RemoteFrameInfo pool access during stack walking + +**Implementation Patches (stackWalker.cpp)**: +- Updates all `walkVM` signatures to accept and propagate `lock_index` +- **Critical patch at line 454**: Replaces `profiler->findNativeMethod(pc)` with `profiler->resolveNativeFrameForWalkVM(pc, lock_index)` +- Adds dynamic BCI selection (BCI_NATIVE_FRAME vs BCI_NATIVE_FRAME_REMOTE) +- Adds `fillFrame()` overload for void* method_id to support both symbol names and RemoteFrameInfo pointers +- Handles marked C++ interpreter frames (terminates scan if detected) + +## Usage + +### Enable Remote Symbolication + +```bash +java -agentpath:/libjavaProfiler.so=start,cpu,remotesym=true,file=profile.jfr MyApp +``` + +### Mixed Configuration + +```bash +java -agentpath:/libjavaProfiler.so=start,event=cpu,interval=1000000,remotesym=true,file=profile.jfr MyApp +``` + +## JFR Output Format + +When remote symbolication is enabled, native frames in the JFR output contain: + +- **Class Name**: Build-ID hex string (e.g., `deadbeef1234567890abcdef`) + - Stored via `_classes->lookup(rfi->build_id)` + - Deduplicated in JFR constant pool +- **Method Name**: `` + - Constant string indicating remote symbolication needed + - Stored via `_symbols.lookup("")` +- **Signature**: PC offset in hex (e.g., `0x1a2b`) + - Formatted with `snprintf(buf, size, "0x%lx", pc_offset)` + - Stored via `_symbols.lookup(offset_hex)` + - Note: No parentheses, just hex value +- **Modifier**: `0x100` (ACC_NATIVE) + - Same as regular native frames for consistency +- **Frame Type**: `FRAME_NATIVE_REMOTE` (7) + - Distinguishes from regular native frames (FRAME_NATIVE = 6) + - Allows parsers to identify frames needing remote symbolication + +## Backward Compatibility + +- **Default behavior**: No changes (remote symbolication disabled) +- **Mixed traces**: Supports both local and remote frames in same trace +- **Fallback**: Gracefully falls back to local symbolication when build-id unavailable + +## Memory Management + +- **Build-IDs**: Stored once per CodeCache, shared across frames + - Hex string allocated with malloc (one-time, ~40 bytes per library) + - Freed in CodeCache destructor + - Total overhead: < 2 KB for typical applications +- **Packed Remote Frames**: No separate allocation needed + - Data packed directly into 64-bit jmethodID field + - Zero additional memory overhead per frame + - Eliminates need for signal-safe pool allocation +- **JFR Serialization**: Temporary RemoteFrameInfo created during unpacking + - Stack-allocated, no heap allocation + - Only exists during JFR serialization with lockAll() held + +## Testing + +### Unit Tests +- **remotesymbolication_ut.cpp**: Tests RemoteFrameInfo structure and build-id extraction +- **remoteargs_ut.cpp**: Tests argument parsing for remote symbolication option + +### Test Coverage +- Build-ID extraction from ELF files +- Frame encoding/decoding +- Argument parsing +- Error handling for invalid inputs + +## Platform Support + +- **Linux**: Full support with ELF build-id extraction +- **macOS/Windows**: Framework in place, needs platform-specific implementation + +## Observability and Metrics + +The following counters track remote symbolication usage (added to `counters.h`): + +- **REMOTE_SYMBOLICATION_FRAMES**: Number of frames using remote symbolication + - Incremented in `populateRemoteFrame()` each time a remote frame is created + - Indicates actual usage of the feature +- **REMOTE_SYMBOLICATION_LIBS_WITH_BUILD_ID**: Libraries with extracted build-IDs + - Incremented in `updateBuildIds()` after successful build-ID extraction + - Shows how many libraries are eligible for remote symbolication +- **REMOTE_SYMBOLICATION_BUILD_ID_CACHE_HITS**: Build-ID cache hit rate + - Incremented when `_build_id_processed` cache prevents redundant extraction + - Demonstrates effectiveness of O(1) caching strategy + +These metrics appear in profiler statistics and can be used to monitor: +- Feature adoption rate (frames with remote symbolication vs total native frames) +- Build-ID coverage (libraries with build-IDs vs total libraries) +- Cache efficiency (cache hits vs total updateBuildIds() calls) + +## Performance Considerations + +### Benefits +- **Identical hot-path performance** to traditional symbolication + - Same O(log n) binarySearch for mark checking + - Zero additional overhead for packed representation +- **Reduced memory footprint**: 8 bytes per frame vs storing symbol strings +- **Faster profiling** with deferred full symbolication to post-processing +- **Eliminated duplicate lookups**: Single binarySearch per frame (was 2x before optimization) + +### Costs +- **One-time build-ID extraction** during startup (~ms per library) + - Cached with O(1) lookup to prevent redundant work + - Only extracted for libraries loaded when profiler starts or via dlopen +- **Library index lookup** during JFR serialization + - O(1) array access with lockAll() held + - No contention (serialization is single-threaded) + +## Future Enhancements + +1. **macOS Support**: Implement Mach-O UUID extraction +2. **Caching**: Cache build-ids across profiler sessions +3. **Compression**: Compress build-ids in JFR output +4. **Validation**: Add runtime validation of build-id consistency +5. **Dynamic Pool Sizing**: Adjust RemoteFrameInfo pool size based on workload +6. **Native Frame Modifier Optimization**: Change native frame modifiers from `0x100` to `0x0` + - Current: All native frames use `0x100` (ACC_NATIVE) = 2-byte varint encoding + - Proposed: Use `0x0` (no modifiers) = 1-byte varint encoding + - Benefit: **Save 1 byte per native frame** across all JFR recordings + - Impact: Significant space savings for native-heavy profiles (C++ applications) + - Note: Would require coordination with JFR parsing tools + +## File Structure + +``` +ddprof-lib/src/main/cpp/ +├── symbols_linux_dd.h # Build-ID extraction interface (Linux-specific) +├── symbols_linux_dd.cpp # Build-ID extraction with bounds/alignment checks +├── vmEntry.h # Enhanced with RemoteFrameInfo and BCI constants +├── codeCache.h # Enhanced with build-id fields (cleaned up operator[]) +├── codeCache.cpp # Build-id storage implementation +├── profiler.h # Added resolveNativeFrame/ForWalkVM, RemoteFrameInfo pool +├── profiler.cpp # Remote symbolication logic and pool allocation +├── stackWalker_dd.h # DataDog wrappers with lock_index parameter +├── flightRecorder.h # Added fillRemoteFrameInfo declaration +├── flightRecorder.cpp # Remote frame JFR serialization +├── arguments.h # Added _remote_symbolication field +├── arguments.cpp # Remote symbolication argument parsing +├── libraries.h # Added updateBuildIds method +└── libraries.cpp # Build-id extraction for loaded libraries + +gradle/ +└── patching.gradle # Upstream stackWalker.h/cpp patches for remote symbolication + +ddprof-lib/src/main/cpp-external/ +├── stackWalker.h # Patched: lock_index parameter added to all walkVM signatures +└── stackWalker.cpp # Patched: resolveNativeFrameForWalkVM integration at line 454 + +ddprof-lib/src/test/cpp/ +├── remotesymbolication_ut.cpp # Unit tests for remote symbolication +└── remoteargs_ut.cpp # Unit tests for argument parsing + +ddprof-test/src/test/java/ +└── RemoteSymbolicationTest.java # Integration tests for all cstack modes +``` + +## Implementation Notes + +### Thread Safety +- **Build-ID extraction**: Protected by `_build_id_lock` mutex in `updateBuildIds()` +- **Build-ID cache**: `_build_id_processed` set provides O(1) duplicate detection +- **JFR serialization**: Called with `lockAll()` held, library array is stable + - No concurrent dlopen_hook modifications possible + - No additional locking needed for `getLibraryByIndex()` + +### Signal Handler Safety +- **Packed representation**: No allocations in signal handlers + - Data packed directly into 64-bit jmethodID field + - Zero memory overhead, eliminates need for signal-safe pools +- **Read-only operations**: binarySearch() and mark checking are signal-safe + - No malloc, no locks (except tryLock which is acceptable) + - Atomic operations where needed + +### Error Handling +- **Graceful fallback**: Falls back to traditional symbolication when: + - Library has no build-ID + - Library index out of bounds during unpacking + - Build-ID extraction fails +- **Defensive programming**: Null checks before dereferencing pointers +- **Logging**: TEST_LOG() for debugging production issues + +### ELF Security +- Bounds checking for program header table (prevents reading beyond mapped region) +- Alignment verification for program header offset (prevents misaligned pointer access) +- Two-stage validation for note sections (header first, then payload) +- ELFCLASS64 verification ensures uniform 64-bit structure sizes + +### Stack Walker Integration +- **walkFP/walkDwarf**: Return raw PCs → `convertNativeTrace()` → `populateRemoteFrame()` +- **walkVM/walkVMX**: Direct call to `resolveNativeFrameForWalkVM()` during stack walk + - No post-processing or reverse PC lookup needed + - Mark checking happens inline during frame resolution + - Terminates stack walk at JVM internal frames (marks != 0) + +### Design Evolution +- **Original approach**: Separate marked ranges index with O(log n) isMarkedAddress() +- **Current approach**: Simplified to binarySearch() + NativeFunc::mark() + - Same O(log n) performance but ~150 lines less code + - Eliminated complexity of maintaining separate index + - Marks packed into jmethodID for JFR serialization + +This implementation provides a solid foundation for remote symbolication while maintaining full backward compatibility and robust error handling. diff --git a/docs/architecture/CallTraceStorage.md b/doc/architecture/CallTraceStorage.md similarity index 100% rename from docs/architecture/CallTraceStorage.md rename to doc/architecture/CallTraceStorage.md diff --git a/docs/architecture/TLSContext.md b/doc/architecture/TLSContext.md similarity index 100% rename from docs/architecture/TLSContext.md rename to doc/architecture/TLSContext.md diff --git a/docs/architecture/TlsPriming.md b/doc/architecture/TlsPriming.md similarity index 100% rename from docs/architecture/TlsPriming.md rename to doc/architecture/TlsPriming.md diff --git a/gradle/patching.gradle b/gradle/patching.gradle index b6aba7e36..76f9126ec 100644 --- a/gradle/patching.gradle +++ b/gradle/patching.gradle @@ -188,7 +188,7 @@ ext.upstreamPatches = [ ] ], - // Stack walker patches for ASan compatibility + // Stack walker patches for ASan compatibility and remote symbolication "stackWalker.cpp": [ validations: [[contains: "StackWalker::"], [contains: "StackWalker::walkVM"]], operations: [ @@ -199,6 +199,86 @@ ext.upstreamPatches = [ find: "(int\\s+StackWalker::walkVM\\s*\\()", replace: "__attribute__((no_sanitize(\"address\"))) \$1", idempotent_check: "__attribute__((no_sanitize(\"address\"))) int StackWalker::walkVM(" + ], + [ + type: "signature_parameter_add", + name: "Add lock_index to public walkVM (features) signature", + description: "Adds lock_index parameter to public walkVM overload with features parameter", + find: "(__attribute__\\(\\(no_sanitize\\(\"address\"\\)\\)\\) int StackWalker::walkVM\\(void\\* ucontext, ASGCT_CallFrame\\* frames, int max_depth,\\s*StackWalkFeatures features, EventType event_type)\\)", + replace: "\$1, int lock_index)", + idempotent_check: "EventType event_type, int lock_index\\) \\{" + ], + [ + type: "expression_replace", + name: "Update first walkVM call in public features overload", + description: "Adds lock_index to walkVM call with callerPC/callerSP/callerFP", + find: "(return walkVM\\(&empty_ucontext, frames, max_depth, features, event_type,\\s*callerPC\\(\\), \\(uintptr_t\\)callerSP\\(\\), \\(uintptr_t\\)callerFP\\(\\))\\);", + replace: "\$1, lock_index);", + idempotent_check: "callerFP\\(\\), lock_index\\);" + ], + [ + type: "expression_replace", + name: "Update second walkVM call in public features overload", + description: "Adds lock_index to walkVM call with frame.pc/sp/fp", + find: "(return walkVM\\(ucontext, frames, max_depth, features, event_type,\\s*\\(const void\\*\\)frame\\.pc\\(\\), frame\\.sp\\(\\), frame\\.fp\\(\\))\\);", + replace: "\$1, lock_index);", + idempotent_check: "frame\\.fp\\(\\), lock_index\\);" + ], + [ + type: "signature_parameter_add", + name: "Add lock_index to public walkVM (anchor) signature", + description: "Adds lock_index parameter to public walkVM overload with anchor parameter", + find: "(__attribute__\\(\\(no_sanitize\\(\"address\"\\)\\)\\) int StackWalker::walkVM\\(void\\* ucontext, ASGCT_CallFrame\\* frames, int max_depth, JavaFrameAnchor\\* anchor, EventType event_type)\\)", + replace: "\$1, int lock_index)", + idempotent_check: "EventType event_type, int lock_index\\) \\{" + ], + [ + type: "expression_replace", + name: "Update walkVM call in public anchor overload", + description: "Adds lock_index to walkVM call from anchor overload", + find: "(return walkVM\\(ucontext, frames, max_depth, no_features, event_type, pc, sp, fp)\\);", + replace: "\$1, lock_index);", + idempotent_check: "fp, lock_index\\);" + ], + [ + type: "signature_parameter_add", + name: "Add lock_index to private walkVM implementation signature", + description: "Adds lock_index parameter to private walkVM implementation for remote symbolication pool management", + find: "(__attribute__\\(\\(no_sanitize\\(\"address\"\\)\\)\\) int StackWalker::walkVM\\(void\\* ucontext, ASGCT_CallFrame\\* frames, int max_depth,\\s*StackWalkFeatures features, EventType event_type,\\s*const void\\* pc, uintptr_t sp, uintptr_t fp)\\)", + replace: "\$1, int lock_index)", + idempotent_check: "uintptr_t fp, int lock_index\\)" + ], + [ + type: "expression_replace", + name: "Add remote symbolication support to walkVM native frame resolution", + description: "Replaces direct symbol resolution with resolveNativeFrameForWalkVM call that checks remote symbolication mode", + find: "const char\\* method_name = profiler->findNativeMethod\\(pc\\);", + replace: "// Check if remote symbolication is enabled\n Profiler::NativeFrameResolution resolution = profiler->resolveNativeFrameForWalkVM((uintptr_t)pc, lock_index);\n if (resolution.is_marked) {\n // This is a marked C++ interpreter frame, terminate scan\n break;\n }\n const char* method_name = (const char*)resolution.method_id;\n int frame_bci = resolution.bci;", + idempotent_check: "resolveNativeFrameForWalkVM" + ], + [ + type: "expression_replace", + name: "Guard NativeFunc::mark check for remote frames", + description: "Prevents calling NativeFunc::mark on packed remote frame data by checking frame_bci first", + find: "if \\(method_name != NULL && \\(mark = NativeFunc::mark\\(method_name\\)\\) != 0\\) \\{", + replace: "if (frame_bci != BCI_NATIVE_FRAME_REMOTE && method_name != NULL && (mark = NativeFunc::mark(method_name)) != 0) {", + idempotent_check: "frame_bci != BCI_NATIVE_FRAME_REMOTE" + ], + [ + type: "expression_replace", + name: "Update fillFrame call to use dynamic BCI", + description: "Updates the fillFrame call to use the dynamic frame_bci value determined by remote symbolication mode", + find: "fillFrame\\(frames\\[depth\\+\\+\\], BCI_NATIVE_FRAME, method_name\\);", + replace: "fillFrame(frames[depth++], frame_bci, (void*)method_name);", + idempotent_check: "frame_bci, \\(void\\*\\)method_name" + ], + [ + type: "method_implementation", + name: "Add fillFrame overload for void* method_id", + description: "Adds fillFrame overload that accepts void* method_id to support both symbol names and RemoteFrameInfo pointers", + find: "(static inline void fillFrame\\(ASGCT_CallFrame& frame, ASGCT_CallFrameType type, const char\\* name\\) \\{[^}]+\\})", + replace: "\$1\n\n// Overload for RemoteFrameInfo* (passed as void* to support both char* and RemoteFrameInfo*)\nstatic inline void fillFrame(ASGCT_CallFrame& frame, int bci, void* method_id_ptr) {\n frame.bci = bci;\n frame.method_id = (jmethodID)method_id_ptr;\n}", + idempotent_check: "void fillFrame\\(ASGCT_CallFrame& frame, int bci, void\\* method_id_ptr\\)" ] ] ], @@ -284,5 +364,36 @@ ext.upstreamPatches = [ idempotent_check: "uintptr_t sender_sp_baseline(" ] ] + ], + + // Stack walker header patches for remote symbolication support + "stackWalker.h": [ + validations: [[contains: "class StackWalker"], [contains: "static int walkVM"]], + operations: [ + [ + type: "signature_parameter_add", + name: "Add lock_index to private walkVM signature", + description: "Adds lock_index parameter to private walkVM implementation for remote symbolication pool management", + find: "(static int walkVM\\(void\\* ucontext, ASGCT_CallFrame\\* frames, int max_depth,\\s*StackWalkFeatures features, EventType event_type,\\s*const void\\* pc, uintptr_t sp, uintptr_t fp)\\);", + replace: "\$1, int lock_index);", + idempotent_check: "int lock_index\\);" + ], + [ + type: "signature_parameter_add", + name: "Add lock_index to public walkVM signature (features overload)", + description: "Adds lock_index parameter to public walkVM features-based overload", + find: "(static int walkVM\\(void\\* ucontext, ASGCT_CallFrame\\* frames, int max_depth, StackWalkFeatures features, EventType event_type)\\);", + replace: "\$1, int lock_index);", + idempotent_check: "EventType event_type, int lock_index\\);" + ], + [ + type: "signature_parameter_add", + name: "Add lock_index to public walkVM signature (anchor overload)", + description: "Adds lock_index parameter to public walkVM anchor-based overload", + find: "(static int walkVM\\(void\\* ucontext, ASGCT_CallFrame\\* frames, int max_depth, JavaFrameAnchor\\* anchor, EventType event_type)\\);", + replace: "\$1, int lock_index);", + idempotent_check: "JavaFrameAnchor\\* anchor, EventType event_type, int lock_index\\);" + ] + ] ] ] \ No newline at end of file