Skip to content

Commit 74c5410

Browse files
committed
Add remote symbolication support with build-id and PC offset
1 parent 8283ea1 commit 74c5410

20 files changed

Lines changed: 1096 additions & 24 deletions

README.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -402,6 +402,25 @@ Improved thread-local storage initialization to prevent race conditions:
402402

403403
These architectural improvements focus on eliminating race conditions, improving performance in high-throughput scenarios, and providing better debugging capabilities for the native profiling engine.
404404

405+
### Remote Symbolication Support (2025)
406+
407+
Added support for remote symbolication to enable offloading symbol resolution from the agent to backend services:
408+
409+
- **Build-ID extraction**: Automatically extracts GNU build-id from ELF binaries on Linux
410+
- **Raw addressing information**: Stores build-id and PC offset instead of resolved symbol names
411+
- **Remote symbolication mode**: Enable with `remotesym=true` profiler argument
412+
- **JFR integration**: Remote frames serialized with build-id and offset for backend resolution
413+
- **Zero encoding overhead**: Uses dedicated frame type (FRAME_NATIVE_REMOTE) for efficient serialization
414+
415+
**Benefits**:
416+
- Reduces agent overhead by eliminating local symbol resolution
417+
- Enables centralized symbol resolution with better caching
418+
- Supports scenarios where debug symbols are not available locally
419+
420+
**Key files**: `elfBuildId.h`, `elfBuildId.cpp`, `profiler.cpp`, `flightRecorder.cpp`
421+
422+
For detailed documentation, see [doc/REMOTE_SYMBOLICATION.md](doc/REMOTE_SYMBOLICATION.md).
423+
405424
## Contributing
406425
1. Fork the repository
407426
2. Create a feature branch

ddprof-lib/src/main/cpp/arguments.cpp

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,10 @@ static const Multiplier UNIVERSAL[] = {
8888
// samples
8989
// generations - track surviving generations
9090
// lightweight[=BOOL] - enable lightweight profiling - events without
91-
// stacktraces (default: true) jfr - dump events in Java
91+
// stacktraces (default: true)
92+
// remotesymbolication[=BOOL] - enable remote symbolication for native frames
93+
// (stores build-id and PC offset instead of symbol names)
94+
// jfr - dump events in Java
9295
// Flight Recorder format interval=N - sampling interval in ns
9396
// (default: 10'000'000, i.e. 10 ms) jstackdepth=N - maximum Java stack
9497
// depth (default: 2048) safemode=BITS - disable stack recovery
@@ -317,17 +320,30 @@ Error Arguments::parse(const char *args) {
317320
_lightweight = false;
318321
}
319322
}
320-
CASE("wallsampler")
321-
if (value != NULL) {
322-
switch (value[0]) {
323-
case 'j':
324-
_wallclock_sampler = JVMTI;
325-
break;
326-
case 'a':
327-
default:
328-
_wallclock_sampler = ASGCT;
329-
}
330-
}
323+
324+
CASE("remotesym")
325+
if (value != NULL) {
326+
switch (value[0]) {
327+
case 'y': // yes
328+
case 't': // true
329+
_remote_symbolication = true;
330+
break;
331+
default:
332+
_remote_symbolication = false;
333+
}
334+
}
335+
336+
CASE("wallsampler")
337+
if (value != NULL) {
338+
switch (value[0]) {
339+
case 'j':
340+
_wallclock_sampler = JVMTI;
341+
break;
342+
case 'a':
343+
default:
344+
_wallclock_sampler = ASGCT;
345+
}
346+
}
331347

332348
DEFAULT()
333349
if (_unknown_arg == NULL)

ddprof-lib/src/main/cpp/arguments.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,7 @@ class Arguments {
147147
int _jfr_options;
148148
std::vector<std::string> _context_attributes;
149149
bool _lightweight;
150+
bool _remote_symbolication; // Enable remote symbolication for native frames
150151

151152
Arguments(bool persistent = false)
152153
: _buf(NULL),
@@ -177,7 +178,8 @@ class Arguments {
177178
_jfr_options(0),
178179
_context_attributes({}),
179180
_wallclock_sampler(ASGCT),
180-
_lightweight(false) {}
181+
_lightweight(false),
182+
_remote_symbolication(false) {}
181183

182184
~Arguments();
183185

ddprof-lib/src/main/cpp/codeCache.cpp

Lines changed: 63 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,11 @@ CodeCache::CodeCache(const char *name, short lib_index,
3737
_plt_size = 0;
3838
_debug_symbols = false;
3939

40+
// Initialize build-id fields
41+
_build_id = nullptr;
42+
_build_id_len = 0;
43+
_load_bias = 0;
44+
4045
memset(_imports, 0, sizeof(_imports));
4146
_imports_patchable = imports_patchable;
4247

@@ -54,10 +59,27 @@ CodeCache::CodeCache(const CodeCache &other) {
5459
_min_address = other._min_address;
5560
_max_address = other._max_address;
5661
_text_base = other._text_base;
62+
_image_base = other._image_base;
5763

58-
_imports_patchable = other._imports_patchable;
5964
_plt_offset = other._plt_offset;
6065
_plt_size = other._plt_size;
66+
_debug_symbols = other._debug_symbols;
67+
68+
// Copy build-id information
69+
_build_id_len = other._build_id_len;
70+
if (other._build_id != nullptr && other._build_id_len > 0) {
71+
size_t hex_str_len = strlen(other._build_id);
72+
_build_id = static_cast<char*>(malloc(hex_str_len + 1));
73+
if (_build_id != nullptr) {
74+
strcpy(_build_id, other._build_id);
75+
}
76+
} else {
77+
_build_id = nullptr;
78+
}
79+
_load_bias = other._load_bias;
80+
81+
memset(_imports, 0, sizeof(_imports));
82+
_imports_patchable = other._imports_patchable;
6183

6284
_dwarf_table_length = other._dwarf_table_length;
6385
_dwarf_table = new FrameDesc[_dwarf_table_length];
@@ -77,17 +99,34 @@ CodeCache &CodeCache::operator=(const CodeCache &other) {
7799
delete _name;
78100
delete _dwarf_table;
79101
delete _blobs;
102+
free(_build_id); // Free existing build-id
80103

81104
_name = NativeFunc::create(other._name, -1);
82105
_lib_index = other._lib_index;
83106
_min_address = other._min_address;
84107
_max_address = other._max_address;
85108
_text_base = other._text_base;
86-
87-
_imports_patchable = other._imports_patchable;
109+
_image_base = other._image_base;
88110

89111
_plt_offset = other._plt_offset;
90112
_plt_size = other._plt_size;
113+
_debug_symbols = other._debug_symbols;
114+
115+
// Copy build-id information
116+
_build_id_len = other._build_id_len;
117+
if (other._build_id != nullptr && other._build_id_len > 0) {
118+
size_t hex_str_len = strlen(other._build_id);
119+
_build_id = static_cast<char*>(malloc(hex_str_len + 1));
120+
if (_build_id != nullptr) {
121+
strcpy(_build_id, other._build_id);
122+
}
123+
} else {
124+
_build_id = nullptr;
125+
}
126+
_load_bias = other._load_bias;
127+
128+
memset(_imports, 0, sizeof(_imports));
129+
_imports_patchable = other._imports_patchable;
91130

92131
_dwarf_table_length = other._dwarf_table_length;
93132
_dwarf_table = new FrameDesc[_dwarf_table_length];
@@ -110,6 +149,7 @@ CodeCache::~CodeCache() {
110149
NativeFunc::destroy(_name);
111150
delete[] _blobs;
112151
delete _dwarf_table;
152+
free(_build_id); // Free build-id memory
113153
}
114154

115155
void CodeCache::expand() {
@@ -387,3 +427,23 @@ FrameDesc CodeCache::findFrameDesc(const void *pc) {
387427
return FrameDesc::default_frame;
388428
}
389429
}
430+
431+
void CodeCache::setBuildId(const char* build_id, size_t build_id_len) {
432+
// Free existing build-id if any
433+
free(_build_id);
434+
_build_id = nullptr;
435+
_build_id_len = 0;
436+
437+
if (build_id != nullptr && build_id_len > 0) {
438+
// build_id is a hex string, allocate based on actual string length
439+
size_t hex_str_len = strlen(build_id);
440+
_build_id = static_cast<char*>(malloc(hex_str_len + 1));
441+
442+
if (_build_id != nullptr) {
443+
// Copy the hex string
444+
strcpy(_build_id, build_id);
445+
// Store the original byte length (not hex string length)
446+
_build_id_len = build_id_len;
447+
}
448+
}
449+
}

ddprof-lib/src/main/cpp/codeCache.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,11 @@ class CodeCache {
116116
unsigned int _plt_offset;
117117
unsigned int _plt_size;
118118

119+
// Build-ID and load bias for remote symbolication
120+
char *_build_id; // GNU build-id (hex string, null if not available)
121+
size_t _build_id_len; // Build-id length in bytes (raw, not hex string length)
122+
uintptr_t _load_bias; // Load bias (image_base - file_base address)
123+
119124
void **_imports[NUM_IMPORTS][NUM_IMPORT_TYPES];
120125
bool _imports_patchable;
121126
bool _debug_symbols;
@@ -169,6 +174,19 @@ class CodeCache {
169174

170175
void setDebugSymbols(bool debug_symbols) { _debug_symbols = debug_symbols; }
171176

177+
// Build-ID and remote symbolication support
178+
const char* buildId() const { return _build_id; }
179+
size_t buildIdLen() const { return _build_id_len; }
180+
bool hasBuildId() const { return _build_id != nullptr; }
181+
uintptr_t loadBias() const { return _load_bias; }
182+
short libIndex() const { return _lib_index; }
183+
184+
// Sets the build-id (hex string) and stores the original byte length
185+
// build_id: null-terminated hex string (e.g., "abc123..." for 40-char string)
186+
// build_id_len: original byte length before hex conversion (e.g., 20 bytes)
187+
void setBuildId(const char* build_id, size_t build_id_len);
188+
void setLoadBias(uintptr_t load_bias) { _load_bias = load_bias; }
189+
172190
void add(const void *start, int length, const char *name,
173191
bool update_bounds = false);
174192
void updateBounds(const void *start, const void *end);
Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
/*
2+
* Copyright 2025, Datadog, Inc.
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
#ifdef __linux__
7+
8+
#include "elfBuildId.h"
9+
#include <elf.h>
10+
#include <fcntl.h>
11+
#include <sys/mman.h>
12+
#include <sys/stat.h>
13+
#include <unistd.h>
14+
#include <cstdio>
15+
#include <cstdlib>
16+
#include <cstring>
17+
18+
// GNU build-id note constants
19+
#define NT_GNU_BUILD_ID 3
20+
#define GNU_BUILD_ID_NAME "GNU"
21+
22+
char* ElfBuildIdExtractor::extractBuildId(const char* file_path, size_t* build_id_len) {
23+
if (!file_path || !build_id_len) {
24+
return nullptr;
25+
}
26+
27+
int fd = open(file_path, O_RDONLY);
28+
if (fd < 0) {
29+
return nullptr;
30+
}
31+
32+
struct stat st;
33+
if (fstat(fd, &st) < 0) {
34+
close(fd);
35+
return nullptr;
36+
}
37+
38+
void* elf_base = mmap(nullptr, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
39+
close(fd);
40+
41+
if (elf_base == MAP_FAILED) {
42+
return nullptr;
43+
}
44+
45+
char* result = extractBuildIdFromMemory(elf_base, st.st_size, build_id_len);
46+
47+
munmap(elf_base, st.st_size);
48+
return result;
49+
}
50+
51+
char* ElfBuildIdExtractor::extractBuildIdFromMemory(const void* elf_base, size_t elf_size, size_t* build_id_len) {
52+
if (!elf_base || !build_id_len || elf_size < sizeof(Elf64_Ehdr)) {
53+
return nullptr;
54+
}
55+
56+
const Elf64_Ehdr* ehdr = static_cast<const Elf64_Ehdr*>(elf_base);
57+
58+
// Verify ELF magic
59+
if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0) {
60+
return nullptr;
61+
}
62+
63+
// Only handle 64-bit ELF for now
64+
if (ehdr->e_ident[EI_CLASS] != ELFCLASS64) {
65+
return nullptr;
66+
}
67+
68+
// Check if we have program headers
69+
if (ehdr->e_phoff == 0 || ehdr->e_phnum == 0) {
70+
return nullptr;
71+
}
72+
73+
const char* base = static_cast<const char*>(elf_base);
74+
const Elf64_Phdr* phdr = reinterpret_cast<const Elf64_Phdr*>(base + ehdr->e_phoff);
75+
76+
// Search for PT_NOTE segments
77+
for (int i = 0; i < ehdr->e_phnum; i++) {
78+
if (phdr[i].p_type == PT_NOTE && phdr[i].p_filesz > 0) {
79+
// Ensure note segment is within file bounds
80+
if (phdr[i].p_offset + phdr[i].p_filesz > elf_size) {
81+
continue;
82+
}
83+
84+
const void* note_data = base + phdr[i].p_offset;
85+
const uint8_t* build_id_bytes = findBuildIdInNotes(note_data, phdr[i].p_filesz, build_id_len);
86+
87+
if (build_id_bytes) {
88+
return buildIdToHex(build_id_bytes, *build_id_len);
89+
}
90+
}
91+
}
92+
93+
return nullptr;
94+
}
95+
96+
const uint8_t* ElfBuildIdExtractor::findBuildIdInNotes(const void* note_data, size_t note_size, size_t* build_id_len) {
97+
const char* data = static_cast<const char*>(note_data);
98+
size_t offset = 0;
99+
100+
while (offset + sizeof(Elf64_Nhdr) < note_size) {
101+
const Elf64_Nhdr* nhdr = reinterpret_cast<const Elf64_Nhdr*>(data + offset);
102+
103+
// Calculate aligned sizes
104+
size_t name_size_aligned = (nhdr->n_namesz + 3) & ~3;
105+
size_t desc_size_aligned = (nhdr->n_descsz + 3) & ~3;
106+
107+
// Check bounds
108+
if (offset + sizeof(Elf64_Nhdr) + name_size_aligned + desc_size_aligned > note_size) {
109+
break;
110+
}
111+
112+
// Check if this is a GNU build-id note
113+
if (nhdr->n_type == NT_GNU_BUILD_ID && nhdr->n_namesz > 0 && nhdr->n_descsz > 0) {
114+
const char* name = data + offset + sizeof(Elf64_Nhdr);
115+
116+
// Verify GNU build-id name (including null terminator)
117+
if (nhdr->n_namesz == 4 && strncmp(name, GNU_BUILD_ID_NAME, 3) == 0 && name[3] == '\0') {
118+
const uint8_t* desc = reinterpret_cast<const uint8_t*>(data + offset + sizeof(Elf64_Nhdr) + name_size_aligned);
119+
*build_id_len = nhdr->n_descsz;
120+
return desc;
121+
}
122+
}
123+
124+
offset += sizeof(Elf64_Nhdr) + name_size_aligned + desc_size_aligned;
125+
}
126+
127+
return nullptr;
128+
}
129+
130+
char* ElfBuildIdExtractor::buildIdToHex(const uint8_t* build_id_bytes, size_t byte_len) {
131+
if (!build_id_bytes || byte_len == 0) {
132+
return nullptr;
133+
}
134+
135+
// Allocate string for hex representation (2 chars per byte + null terminator)
136+
char* hex_str = static_cast<char*>(malloc(byte_len * 2 + 1));
137+
if (!hex_str) {
138+
return nullptr;
139+
}
140+
141+
for (size_t i = 0; i < byte_len; i++) {
142+
snprintf(hex_str + i * 2, 3, "%02x", build_id_bytes[i]);
143+
}
144+
145+
hex_str[byte_len * 2] = '\0';
146+
return hex_str;
147+
}
148+
149+
#endif // __linux__

0 commit comments

Comments
 (0)