From 6a3c7fe38188fee66bf342bfc3c1fc50547b57f4 Mon Sep 17 00:00:00 2001 From: Roman Kennke Date: Mon, 19 Jan 2026 16:46:18 +0100 Subject: [PATCH 1/2] [PROF-13123] Streamline upstream sources --- .github/workflows/ci.yml | 9 - .github/workflows/test_workflow.yml | 36 - .gitignore | 1 - CLAUDE.md | 20 +- README.md | 61 - ddprof-lib/benchmarks/build.gradle | 4 +- ddprof-lib/build.gradle | 241 ---- ddprof-lib/fuzz/build.gradle | 10 - ddprof-lib/gtest/build.gradle | 10 - ddprof-lib/src/main/cpp/arch.h | 211 ++++ ddprof-lib/src/main/cpp/asprof.h | 106 ++ ddprof-lib/src/main/cpp/cpuEngine.h | 52 + ddprof-lib/src/main/cpp/dwarf.h | 183 +++ ddprof-lib/src/main/cpp/incbin.h | 36 + ddprof-lib/src/main/cpp/j9StackTraces.h | 49 + ddprof-lib/src/main/cpp/mutex.cpp | 35 + ddprof-lib/src/main/cpp/mutex.h | 49 + ddprof-lib/src/main/cpp/os.h | 150 +++ ddprof-lib/src/main/cpp/os_linux.cpp | 693 +++++++++++ ddprof-lib/src/main/cpp/os_macos.cpp | 458 +++++++ ddprof-lib/src/main/cpp/stackFrame.h | 92 ++ .../src/main/cpp/stackFrame_aarch64.cpp | 405 +++++++ ddprof-lib/src/main/cpp/stackFrame_arm.cpp | 141 +++ ddprof-lib/src/main/cpp/stackFrame_i386.cpp | 162 +++ .../src/main/cpp/stackFrame_loongarch64.cpp | 116 ++ ddprof-lib/src/main/cpp/stackFrame_ppc64.cpp | 162 +++ .../src/main/cpp/stackFrame_riscv64.cpp | 118 ++ ddprof-lib/src/main/cpp/stackFrame_x64.cpp | 322 +++++ ddprof-lib/src/main/cpp/stackWalker.cpp | 590 +++++++++ ddprof-lib/src/main/cpp/stackWalker.h | 61 + ddprof-lib/src/main/cpp/symbols.h | 50 +- ddprof-lib/src/main/cpp/symbols_linux.cpp | 1066 +++++++++++++++++ ddprof-lib/src/main/cpp/symbols_linux.h | 2 +- ddprof-lib/src/main/cpp/symbols_macos.cpp | 231 ++++ ddprof-lib/src/main/cpp/trap.cpp | 64 + ddprof-lib/src/main/cpp/trap.h | 56 + ddprof-lib/src/main/cpp/tsc.cpp | 54 + ddprof-lib/src/main/cpp/tsc.h | 105 ++ ddprof-lib/src/main/cpp/vmStructs.cpp | 762 ++++++++++++ ddprof-lib/src/main/cpp/vmStructs.h | 705 +++++++++++ ddprof-lib/src/test/fuzz/fuzz_dwarf.cpp | 2 +- ddprof-lib/src/test/make/Makefile | 4 +- doc/event-type-system.md | 2 +- gradle/lock.properties | 5 - gradle/patching.gradle | 288 ----- 45 files changed, 7263 insertions(+), 716 deletions(-) create mode 100644 ddprof-lib/src/main/cpp/arch.h create mode 100644 ddprof-lib/src/main/cpp/asprof.h create mode 100644 ddprof-lib/src/main/cpp/cpuEngine.h create mode 100644 ddprof-lib/src/main/cpp/dwarf.h create mode 100644 ddprof-lib/src/main/cpp/incbin.h create mode 100644 ddprof-lib/src/main/cpp/j9StackTraces.h create mode 100644 ddprof-lib/src/main/cpp/mutex.cpp create mode 100644 ddprof-lib/src/main/cpp/mutex.h create mode 100644 ddprof-lib/src/main/cpp/os.h create mode 100644 ddprof-lib/src/main/cpp/os_linux.cpp create mode 100644 ddprof-lib/src/main/cpp/os_macos.cpp create mode 100644 ddprof-lib/src/main/cpp/stackFrame.h create mode 100644 ddprof-lib/src/main/cpp/stackFrame_aarch64.cpp create mode 100644 ddprof-lib/src/main/cpp/stackFrame_arm.cpp create mode 100644 ddprof-lib/src/main/cpp/stackFrame_i386.cpp create mode 100644 ddprof-lib/src/main/cpp/stackFrame_loongarch64.cpp create mode 100644 ddprof-lib/src/main/cpp/stackFrame_ppc64.cpp create mode 100644 ddprof-lib/src/main/cpp/stackFrame_riscv64.cpp create mode 100644 ddprof-lib/src/main/cpp/stackFrame_x64.cpp create mode 100644 ddprof-lib/src/main/cpp/stackWalker.cpp create mode 100644 ddprof-lib/src/main/cpp/stackWalker.h create mode 100644 ddprof-lib/src/main/cpp/symbols_linux.cpp create mode 100644 ddprof-lib/src/main/cpp/symbols_macos.cpp create mode 100644 ddprof-lib/src/main/cpp/trap.cpp create mode 100644 ddprof-lib/src/main/cpp/trap.h create mode 100644 ddprof-lib/src/main/cpp/tsc.cpp create mode 100644 ddprof-lib/src/main/cpp/tsc.h create mode 100644 ddprof-lib/src/main/cpp/vmStructs.cpp create mode 100644 ddprof-lib/src/main/cpp/vmStructs.h delete mode 100644 gradle/lock.properties delete mode 100644 gradle/patching.gradle diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5390f3d5..050070a4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -110,15 +110,6 @@ jobs: restore-keys: | gradle-caches-${{ runner.os }}- - - name: Cache async-profiler - uses: actions/cache@v4 - with: - path: ddprof-lib/build/async-profiler - key: async-profiler-${{ runner.os }}-${{ hashFiles('gradle/lock.properties') }} - enableCrossOsArchive: true - restore-keys: | - async-profiler-${{ runner.os }}- - - name: Validate Javadoc run: | # Note: javadoc task depends on copyReleaseLibs which requires building native libraries diff --git a/.github/workflows/test_workflow.yml b/.github/workflows/test_workflow.yml index 62fc7575..3deb10c4 100644 --- a/.github/workflows/test_workflow.yml +++ b/.github/workflows/test_workflow.yml @@ -52,15 +52,6 @@ jobs: key: gradle-caches-${{ runner.os }}-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }} restore-keys: | gradle-caches-${{ runner.os }}- - - name: Cache async-profiler - if: steps.set_enabled.outputs.enabled == 'true' - uses: actions/cache@v4 - with: - path: ddprof-lib/build/async-profiler - key: async-profiler-${{ runner.os }}-${{ hashFiles('gradle/lock.properties') }} - enableCrossOsArchive: true - restore-keys: | - async-profiler-${{ runner.os }}- - name: Setup cached JDK id: cache-jdk if: steps.set_enabled.outputs.enabled == 'true' @@ -173,15 +164,6 @@ jobs: key: gradle-caches-${{ runner.os }}-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }} restore-keys: | gradle-caches-${{ runner.os }}- - - name: Cache async-profiler - if: steps.set_enabled.outputs.enabled == 'true' - uses: actions/cache@v4 - with: - path: ddprof-lib/build/async-profiler - key: async-profiler-${{ runner.os }}-${{ hashFiles('gradle/lock.properties') }} - enableCrossOsArchive: true - restore-keys: | - async-profiler-${{ runner.os }}- - name: Setup cached JDK id: cache-jdk uses: ./.github/actions/setup_cached_java @@ -311,15 +293,6 @@ jobs: with: version: ${{ matrix.java_version }} arch: 'aarch64' - - name: Cache async-profiler - if: steps.set_enabled.outputs.enabled == 'true' - uses: actions/cache@v4 - with: - path: ddprof-lib/build/async-profiler - key: async-profiler-${{ runner.os }}-${{ hashFiles('gradle/lock.properties') }} - enableCrossOsArchive: true - restore-keys: | - async-profiler-${{ runner.os }}- - name: Setup OS if: steps.set_enabled.outputs.enabled == 'true' run: | @@ -427,15 +400,6 @@ jobs: with: version: ${{ matrix.java_version }} arch: 'aarch64-musl' - - name: Cache async-profiler - if: steps.set_enabled.outputs.enabled == 'true' - uses: actions/cache@v4 - with: - path: ddprof-lib/build/async-profiler - key: async-profiler-${{ runner.os }}-${{ hashFiles('gradle/lock.properties') }} - enableCrossOsArchive: true - restore-keys: | - async-profiler-${{ runner.os }}- - name: Extract Versions uses: ./.github/actions/extract_versions - name: Test diff --git a/.gitignore b/.gitignore index 37256dda..98ebc2de 100644 --- a/.gitignore +++ b/.gitignore @@ -17,7 +17,6 @@ .tmp *.iml /ddprof-stresstest/jmh-result.* -/ddprof-lib/src/main/cpp-external/**/* **/.resources/ diff --git a/CLAUDE.md b/CLAUDE.md index e1ec9ca1..c0a9b6dd 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -136,17 +136,9 @@ The project supports multiple build configurations per platform: - **asan**: AddressSanitizer build for memory error detection - **tsan**: ThreadSanitizer build for thread safety validation -### Upstream Integration -The project maintains integration with async-profiler upstream: -- `cloneAsyncProfiler`: Clones DataDog's async-profiler fork -- `copyUpstreamFiles`: Copies selected upstream files to `ddprof-lib/src/main/cpp-external` -- `patchStackFrame`/`patchStackWalker`: Applies necessary patches for ASAN compatibility -- Lock file: `gradle/ap-lock.properties` specifies branch/commit - ### Key Source Locations - Java API: `ddprof-lib/src/main/java/com/datadoghq/profiler/JavaProfiler.java` - C++ engine: `ddprof-lib/src/main/cpp/` -- Upstream C++ code: `ddprof-lib/src/main/cpp-external/` (generated) - Native libraries: `ddprof-lib/build/lib/main/{config}/{os}/{arch}/` - Test resources: `ddprof-test/src/test/java/` @@ -221,9 +213,7 @@ The profiler uses a sophisticated double-buffered storage system for call traces - **Buffer Management**: Thread-local recording buffers with configurable flush thresholds ### Native Integration Patterns -- **Upstream Sync**: Uses DataDog fork of async-profiler with branch `dd/master` - **Adapter Pattern**: `*_dd.h` files adapt upstream code for Datadog needs -- **External Code**: Upstream files copied to `cpp-external/` with minimal patches - **Signal Handler Safety**: Careful memory management in signal handler contexts ### Multi-Engine Profiling System @@ -256,7 +246,7 @@ The profiler uses a sophisticated double-buffered storage system for call traces ### Code Organization Principles - **Namespace Separation**: Use `ddprof` namespace for adapted upstream classes - **File Naming**: Datadog adaptations use `*_dd` suffix (e.g., `stackWalker_dd.h`) -- **External Dependencies**: Upstream code in `cpp-external/`, local code in `cpp/` +- **External Dependencies**: Local code in `cpp/` ### Performance Constraints - **Algorithmic Complexity**: Use O(N) or better, max 256 elements for linear scans @@ -275,14 +265,6 @@ The profiler uses a sophisticated double-buffered storage system for call traces - **Static Analysis**: `scanBuild` for additional code quality checks - **Test Logging**: Use `TEST_LOG` macro for debug output in tests -### Upstream Integration Workflow -The project maintains a carefully managed relationship with async-profiler upstream: -1. **Lock File**: `gradle/ap-lock.properties` specifies exact upstream commit -2. **Branch Tracking**: `dd/master` branch contains safe upstream changes -3. **File Copying**: `copyUpstreamFiles` task selectively imports upstream code -4. **Minimal Patching**: Only essential patches for ASan compatibility -5. **Cherry-pick Strategy**: Rare cherry-picks only for critical fixes - ## Build System Architecture ### Gradle Multi-project Structure diff --git a/README.md b/README.md index 7455d5a3..2f141132 100644 --- a/README.md +++ b/README.md @@ -37,16 +37,6 @@ cd java-profiler The resulting artifact will be in `ddprof-lib/build/libs/ddprof-.jar` #### Gritty details -To smoothen the absorption of the upstream changes, we are using parts of the upstream codebase in (mostly) vanilla form. - -For this, we have several gradle tasks in [ddprof-lib/build.gradle](ddprof-lib/build.gradle): -- `cloneAsyncProfiler` - clones the [DataDog/async-profiler](https://github.com/DataDog/async-profiler) repository into `ddprof-lib/build/async-profiler` using the commit lock specified in [gradle/lock.properties](gradle/lock.properties) - - in that repository, we are maintaining a branch called `dd/master` where we keep the upstream code in sync with the 'safe' changes from the upstream `master` branch - - cherry-picks into that branch should be rare and only done for critical fixes that are needed in the project - - otherwise, we should wait for the next upstream release to avoid conflicts -- `copyUpstreamFiles` - copies the selected upstream source files into the `ddprof-lib/src/main/cpp-external` directory -- `patchUpstreamFiles` - applies unified patches to upstream files for ASan compatibility, memory safety, and API extensions - Since the upstream code might not be 100% compatible with the current version of the project, we need to provide adapters. The adapters are sharing the same file name as the upstream files but are suffixed with `_dd` (e.g. `arch_dd.h`). @@ -55,57 +45,6 @@ conflicts with the upstream code. This allows us to use the upstream code as-is See [ddprof-lib/src/main/cpp/stackWalker_dd.h](ddprof-lib/src/main/cpp/stackWalker_dd.h) for an example of how we adapt the upstream code to fit our needs. -### Unified Patching System - -The project uses a unified configuration-driven patching system to apply modifications to upstream source files: - -- **Configuration File**: All patches are defined in `gradle/patching.gradle` using structured Gradle DSL -- **Direct Source Modification**: Patches are applied directly to upstream source files using regex-based find/replace -- **Idempotent Operations**: Each patch includes checks to prevent double-application -- **Validation System**: Pre-patch validation ensures upstream structure hasn't changed incompatibly -- **Single Unified Task**: One `patchUpstreamFiles` task replaces multiple fragmented patch tasks - -## Patch Configuration Structure - -Patches are defined in `gradle/patching.gradle` with this structure: - -```groovy -ext.upstreamPatches = [ - "filename.cpp": [ - validations: [ - [contains: "expected_function"], - [contains: "expected_class"] - ], - operations: [ - [ - type: "function_attribute", - name: "Add ASan compatibility attribute", - find: "(bool\\s+StackFrame::unwindStub\\s*\\()", - replace: "__attribute__((no_sanitize(\"address\"))) \$1", - idempotent_check: "__attribute__((no_sanitize(\"address\"))) bool StackFrame::unwindStub(" - ] - ] - ] -] -``` - -### Patch Operation Types - -1. **function_attribute**: Add attributes (like `__attribute__`) to function declarations -2. **expression_replace**: Replace unsafe code patterns with safe equivalents -3. **method_declaration**: Add new method declarations to class definitions -4. **method_implementation**: Add complete method implementations to source files - -### Adding New Patches - -1. **Edit Configuration**: Add patch definition to `gradle/patching.gradle` -2. **Add Validations**: Ensure expected code structure exists -3. **Define Operations**: Specify find/replace patterns with appropriate type -4. **Include Idempotency**: Add `idempotent_check` to prevent double-application -5. **Test Thoroughly**: Verify patch works with clean upstream files - -For detailed syntax documentation, see the comprehensive comments in `gradle/patching.gradle`. - ## Claude Code Integration This project includes Claude Code commands for streamlined development workflows when using [Claude Code](https://claude.ai/code): diff --git a/ddprof-lib/benchmarks/build.gradle b/ddprof-lib/benchmarks/build.gradle index 5b57b3bc..a752d1a1 100644 --- a/ddprof-lib/benchmarks/build.gradle +++ b/ddprof-lib/benchmarks/build.gradle @@ -17,10 +17,8 @@ application { // Include the main library headers tasks.withType(CppCompile).configureEach { - dependsOn ':ddprof-lib:patchUpstreamFiles' - + // TODO: Do we need this, or is this included by default? includes file('../src/main/cpp').toString() - includes file('../src/main/cpp-external').toString() } // Add a task to run the benchmark diff --git a/ddprof-lib/build.gradle b/ddprof-lib/build.gradle index 10e3866d..b826a56e 100644 --- a/ddprof-lib/build.gradle +++ b/ddprof-lib/build.gradle @@ -180,14 +180,6 @@ description = "Datadog Java Profiler Library" def component_version = project.hasProperty("ddprof_version") ? project.ddprof_version : project.version -def props = new Properties() -file("${rootDir}/gradle/lock.properties").withInputStream { stream -> - props.load(stream) -} - -def ap_branch_lock = props.getProperty("ap.branch") -def ap_commit_lock = props.getProperty("ap.commit") - // this feels weird but it is the only way invoking `./gradlew :ddprof-lib:*` tasks will work if (rootDir.toString().endsWith("ddprof-lib")) { apply from: rootProject.file('../common.gradle') @@ -289,224 +281,6 @@ tasks.register('copyExternalLibs', Copy) { } } -def cloneAPTask = tasks.register('cloneAsyncProfiler') { - description = 'Clones async-profiler repo if directory is missing or updates it if commit hash differs' - inputs.file("${rootDir}/gradle/lock.properties") - outputs.dir("${projectDir}/build/async-profiler") - outputs.upToDateWhen { - def targetDir = file("${projectDir}/build/async-profiler") - if (!targetDir.exists()) { - return false - } - def currentCommit = "" - try { - new ByteArrayOutputStream().withStream { os -> - exec { - workingDir targetDir.absolutePath - commandLine 'git', 'rev-parse', 'HEAD' - standardOutput = os - } - currentCommit = os.toString().trim() - } - return currentCommit == ap_commit_lock - } catch (Exception e) { - return false - } - } - doLast { - // Fix for CI environments where git detects dubious ownership - exec { - commandLine 'git', 'config', '--global', '--add', 'safe.directory', projectDir.parentFile.absolutePath - ignoreExitValue = true // Don't fail if this command fails - } - - def targetDir = file("${projectDir}/build/async-profiler") - if (!targetDir.exists()) { - println "Cloning missing async-profiler git subdirectory..." - exec { - commandLine 'git', 'clone', '--branch', ap_branch_lock, 'https://github.com/datadog/async-profiler.git', targetDir.absolutePath - } - exec { - workingDir targetDir.absolutePath - commandLine 'git', 'checkout', ap_commit_lock - } - } else { - // Also fix git ownership for existing directory - exec { - workingDir targetDir.absolutePath - commandLine 'git', 'config', '--global', '--add', 'safe.directory', targetDir.absolutePath - ignoreExitValue = true - } - - def currentCommit = "" - new ByteArrayOutputStream().withStream { os -> - exec { - workingDir targetDir.absolutePath - commandLine 'git', 'rev-parse', 'HEAD' - standardOutput = os - } - currentCommit = os.toString().trim() - } - - if (currentCommit != ap_commit_lock) { - println "async-profiler commit hash differs (current: ${currentCommit}, expected: ${ap_commit_lock}), updating..." - exec { - workingDir targetDir.absolutePath - commandLine 'rm', '-rf', targetDir.absolutePath - } - exec { - commandLine 'git', 'clone', '--branch', ap_branch_lock, 'https://github.com/datadog/async-profiler.git', targetDir.absolutePath - } - exec { - workingDir targetDir.absolutePath - commandLine 'git', 'checkout', ap_commit_lock - } - } else { - println "async-profiler git subdirectory present with correct commit hash." - } - } - } -} - -def copyUpstreamFiles = tasks.register('copyUpstreamFiles', Copy) { - configure { - dependsOn cloneAPTask - } - onlyIf { - !project.hasProperty("debug-ap") - } - description = 'Copy shared upstream files' - from("${projectDir}/build/async-profiler/src") { - include "arch.h" - include "asprof.h" - include "cpuEngine.h" - include "dwarf.h" - include "incbin.h" - include "j9StackTraces.h" - include "log.h" - include "mutex.h" - include "mutex.cpp" - include "os.h" - include "os_*.cpp" - include "spinLock.h" - include "stackFrame.h" - include "stackWalker.h" - include "stackWalker.cpp" - include "stackFrame*.cpp" - include "symbols.h" - include "symbols_*.cpp" - include "trap.h" - include "trap.cpp" - include "tsc.h" - include "tsc.cpp" - include "vmStructs.h" - include "vmStructs.cpp" - } - into "${projectDir}/src/main/cpp-external" -} - -tasks.named("spotlessMisc") { - configure { - dependsOn patchUpstreamFiles - } -} - -// Load patch configuration from external file -apply from: "${rootDir}/gradle/patching.gradle" - -def patchUpstreamFiles = tasks.register("patchUpstreamFiles") { - description = 'Apply all upstream patches via unified configuration system' - configure { - dependsOn copyUpstreamFiles - } - - inputs.file("${rootDir}/gradle/patching.gradle") - inputs.files(fileTree("${projectDir}/src/main/cpp-external").include("*.cpp", "*.h")) - outputs.files(fileTree("${projectDir}/src/main/cpp-external").include("*.cpp", "*.h")) - - doLast { - try { - // Use configuration from gradle/patching.gradle - def patches = upstreamPatches - - // Apply patches using simplified inline logic - def totalFiles = patches.size() - def totalOperations = 0 - patches.each { fileName, fileConfig -> - totalOperations += fileConfig.operations?.size() ?: 0 - } - - logger.quiet("Unified patching system: processing ${totalFiles} files with ${totalOperations} total operations") - - // Apply patches to all configured files - patches.each { fileName, fileConfig -> - def filePath = "${projectDir}/src/main/cpp-external/${fileName}" - def targetFile = file(filePath) - - if (targetFile.exists()) { - def content = targetFile.getText('UTF-8') - def originalContent = content - def patchCount = 0 - - // Run validations first - fileConfig.validations?.each { validation -> - if (validation.contains && !content.contains(validation.contains)) { - throw new RuntimeException("Validation failed for ${fileName}: required text '${validation.contains}' not found. Upstream structure may have changed.") - } - } - - // Apply operations in order - fileConfig.operations?.each { operation -> - // Check if already applied (idempotent check) - if (operation.idempotent_check && content.contains(operation.idempotent_check)) { - logger.quiet("Skipped patch '${operation.name ?: operation.type}' for ${fileName} (already applied)") - return - } - - // Apply regex pattern - def pattern = java.util.regex.Pattern.compile(operation.find) - def matcher = pattern.matcher(content) - - if (matcher.find()) { - def newContent = matcher.replaceAll(operation.replace) - - if (newContent != content) { - content = newContent - patchCount++ - logger.quiet("Applied patch '${operation.name ?: operation.type}' to ${fileName}") - } - } else { - logger.warn("Pattern '${operation.find}' not found in ${fileName} for operation: ${operation.name ?: operation.type}") - } - } - - // Write back if any modifications were made - if (patchCount > 0) { - targetFile.write(content, 'UTF-8') - logger.quiet("Patched ${fileName} with ${patchCount} operations") - } else { - logger.quiet("No patches applied to ${fileName} (all already present)") - } - } else { - logger.warn("Patch target file not found: ${fileName}") - } - } - - logger.quiet("Unified patching completed successfully") - - } catch (Exception e) { - throw new GradleException("Unified patching failed: ${e.message}", e) - } - } -} - - -def initSubrepoTask = tasks.register('initSubrepo') { - configure { - dependsOn patchUpstreamFiles - } -} - tasks.register('assembleAll') {} // use the build config names to create configurations, copy lib and asemble jar tasks @@ -568,10 +342,6 @@ configurations { // added by the cpp-library plugin tasks.whenTaskAdded { task -> if (task instanceof CppCompile) { - configure { - dependsOn patchUpstreamFiles - } - if (!task.name.startsWith('compileLib') && task.name.contains('Release')) { buildConfigurations.each { config -> if (config.os == osIdentifier() && config.arch == archIdentifier()) { @@ -589,7 +359,6 @@ tasks.whenTaskAdded { task -> toolChain = task.toolChain targetPlatform = task.targetPlatform includes task.includes - includes project(':ddprof-lib').file('src/main/cpp-external').toString() includes project(':ddprof-lib').file('src/main/cpp').toString() includes "${javaHome()}/include" includes project(':malloc-shim').file('src/main/public').toString() @@ -673,9 +442,7 @@ tasks.withType(LinkSharedLibrary).configureEach { library { baseName = "javaProfiler" source.from file('src/main/cpp') - source.from file('src/main/cpp-external') privateHeaders.from file('src/main/cpp') - privateHeaders.from file('src/main/cpp-external') // aarch64 support is still incubating // for the time being an aarch64 linux machine will match 'machines.linux.x86_64' @@ -758,10 +525,6 @@ gradle.projectsEvaluated { if (javadocTask != null && copyReleaseLibs != null) { javadocTask.dependsOn copyReleaseLibs } - def initTask = tasks.findByName("initSubrepo") - if (initTask != null) { - compileTask.dependsOn initTask - } } } @@ -858,7 +621,3 @@ tasks.withType(AbstractPublishToMaven).configureEach { mustRunAfter tasks.matching { it instanceof VerificationTask } } } - -clean { - delete "${projectDir}/src/main/cpp-external" -} diff --git a/ddprof-lib/fuzz/build.gradle b/ddprof-lib/fuzz/build.gradle index 27c3607b..ed706cb0 100644 --- a/ddprof-lib/fuzz/build.gradle +++ b/ddprof-lib/fuzz/build.gradle @@ -168,7 +168,6 @@ tasks.whenTaskAdded { task -> toolChain = task.toolChain targetPlatform = task.targetPlatform includes task.includes - includes project(':ddprof-lib').file('src/main/cpp-external').toString() includes project(':ddprof-lib').file('src/main/cpp').toString() includes "${javaHome()}/include" includes project(':malloc-shim').file('src/main/public').toString() @@ -182,9 +181,6 @@ tasks.whenTaskAdded { task -> source project(':ddprof-lib').fileTree('src/main/cpp') { include '**/*' } - source project(':ddprof-lib').fileTree('src/main/cpp-external') { - include '**/*' - } // Compile the fuzz target itself source fuzzFile @@ -195,12 +191,6 @@ tasks.whenTaskAdded { task -> if (linkTask != null) { linkTask.dependsOn fuzzCompileTask } - def subrepoInitTask = project(':ddprof-lib').tasks.named("initSubrepo") - if (subrepoInitTask != null) { - fuzzCompileTask.configure { - dependsOn subrepoInitTask - } - } } } } diff --git a/ddprof-lib/gtest/build.gradle b/ddprof-lib/gtest/build.gradle index d474eec0..d7ee2027 100644 --- a/ddprof-lib/gtest/build.gradle +++ b/ddprof-lib/gtest/build.gradle @@ -109,7 +109,6 @@ tasks.whenTaskAdded { task -> toolChain = task.toolChain targetPlatform = task.targetPlatform includes task.includes - includes project(':ddprof-lib').file('src/main/cpp-external').toString() includes project(':ddprof-lib').file('src/main/cpp').toString() includes "${javaHome()}/include" includes project(':malloc-shim').file('src/main/public').toString() @@ -123,9 +122,6 @@ tasks.whenTaskAdded { task -> source project(':ddprof-lib').fileTree('src/main/cpp') { include '**/*' } - source project(':ddprof-lib').fileTree('src/main/cpp-external') { - include '**/*' - } source testFile inputs.files source @@ -135,12 +131,6 @@ tasks.whenTaskAdded { task -> if (linkTask != null) { linkTask.get().dependsOn gtestCompileTask } - def subrepoInitTask = project(':ddprof-lib').tasks.named("initSubrepo") - if (subrepoInitTask != null) { - gtestCompileTask.configure { - dependsOn subrepoInitTask - } - } } } } diff --git a/ddprof-lib/src/main/cpp/arch.h b/ddprof-lib/src/main/cpp/arch.h new file mode 100644 index 00000000..8d7701a6 --- /dev/null +++ b/ddprof-lib/src/main/cpp/arch.h @@ -0,0 +1,211 @@ +/* + * Copyright The async-profiler authors + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _ARCH_H +#define _ARCH_H + + +#ifndef likely +# define likely(x) (__builtin_expect(!!(x), 1)) +#endif + +#ifndef unlikely +# define unlikely(x) (__builtin_expect(!!(x), 0)) +#endif + +#ifdef _LP64 +# define LP64_ONLY(code) code +#else // !_LP64 +# define LP64_ONLY(code) +#endif // _LP64 + + +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; +typedef unsigned long long u64; + +static inline u64 atomicInc(volatile u64& var, u64 increment = 1) { + return __sync_fetch_and_add(&var, increment); +} + +static inline int atomicInc(volatile u32& var, int increment = 1) { + return __sync_fetch_and_add(&var, increment); +} + +static inline int atomicInc(volatile int& var, int increment = 1) { + return __sync_fetch_and_add(&var, increment); +} + +static inline u64 loadAcquire(u64& var) { + return __atomic_load_n(&var, __ATOMIC_ACQUIRE); +} + +static inline void storeRelease(u64& var, u64 value) { + return __atomic_store_n(&var, value, __ATOMIC_RELEASE); +} + + +#if defined(__x86_64__) || defined(__i386__) + +typedef unsigned char instruction_t; +const instruction_t BREAKPOINT = 0xcc; +const int BREAKPOINT_OFFSET = 0; + +const int SYSCALL_SIZE = 2; +const int FRAME_PC_SLOT = 1; +const int PROBE_SP_LIMIT = 4; +const int PLT_HEADER_SIZE = 16; +const int PLT_ENTRY_SIZE = 16; +const int PERF_REG_PC = 8; // PERF_REG_X86_IP + +#define spinPause() asm volatile("pause") +#define rmb() asm volatile("lfence" : : : "memory") +#define flushCache(addr) asm volatile("mfence; clflush (%0); mfence" : : "r" (addr) : "memory") + +#define callerPC() __builtin_return_address(0) +#define callerFP() __builtin_frame_address(1) +#define callerSP() ((void**)__builtin_frame_address(0) + 2) + +#elif defined(__arm__) || defined(__thumb__) + +typedef unsigned int instruction_t; +const instruction_t BREAKPOINT = 0xe7f001f0; +const instruction_t BREAKPOINT_THUMB = 0xde01de01; +const int BREAKPOINT_OFFSET = 0; + +const int SYSCALL_SIZE = sizeof(instruction_t); +const int FRAME_PC_SLOT = 1; +const int PROBE_SP_LIMIT = 0; +const int PLT_HEADER_SIZE = 20; +const int PLT_ENTRY_SIZE = 12; +const int PERF_REG_PC = 15; // PERF_REG_ARM_PC + +#define spinPause() asm volatile("yield") +#define rmb() asm volatile("dmb ish" : : : "memory") +#define flushCache(addr) __builtin___clear_cache((char*)(addr), (char*)(addr) + sizeof(instruction_t)) + +#define callerPC() __builtin_return_address(0) +#define callerFP() __builtin_frame_address(1) +#define callerSP() __builtin_frame_address(1) + +#elif defined(__aarch64__) + +typedef unsigned int instruction_t; +const instruction_t BREAKPOINT = 0xd4200000; +const int BREAKPOINT_OFFSET = 0; + +const int SYSCALL_SIZE = sizeof(instruction_t); +const int FRAME_PC_SLOT = 1; +const int PROBE_SP_LIMIT = 0; +const int PLT_HEADER_SIZE = 32; +const int PLT_ENTRY_SIZE = 16; +const int PERF_REG_PC = 32; // PERF_REG_ARM64_PC + +#define spinPause() asm volatile("isb") +#define rmb() asm volatile("dmb ish" : : : "memory") +#define flushCache(addr) __builtin___clear_cache((char*)(addr), (char*)(addr) + sizeof(instruction_t)) + +#define callerPC() ({ void* pc; asm volatile("adr %0, ." : "=r"(pc)); pc; }) +#define callerFP() ({ void* fp; asm volatile("mov %0, fp" : "=r"(fp)); fp; }) +#define callerSP() ({ void* sp; asm volatile("mov %0, sp" : "=r"(sp)); sp; }) + +#elif defined(__PPC64__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + +typedef unsigned int instruction_t; +const instruction_t BREAKPOINT = 0x7fe00008; +// We place the break point in the third instruction slot on PPCLE as the first two are skipped if +// the call comes from within the same compilation unit according to the LE ABI. +const int BREAKPOINT_OFFSET = 8; + +const int SYSCALL_SIZE = sizeof(instruction_t); +const int FRAME_PC_SLOT = 2; +const int PROBE_SP_LIMIT = 0; +const int PLT_HEADER_SIZE = 24; +const int PLT_ENTRY_SIZE = 24; +const int PERF_REG_PC = 32; // PERF_REG_POWERPC_NIP + +#define spinPause() asm volatile("yield") // does nothing, but using or 1,1,1 would lead to other problems +#define rmb() asm volatile ("sync" : : : "memory") // lwsync would do but better safe than sorry +#define flushCache(addr) __builtin___clear_cache((char*)(addr), (char*)(addr) + sizeof(instruction_t)) + +#define callerPC() __builtin_return_address(0) +#define callerFP() __builtin_frame_address(1) +#define callerSP() __builtin_frame_address(0) + +#elif defined(__riscv) && (__riscv_xlen == 64) + +typedef unsigned int instruction_t; +#if defined(__riscv_compressed) +const instruction_t BREAKPOINT = 0x9002; // EBREAK (compressed form) +#else +const instruction_t BREAKPOINT = 0x00100073; // EBREAK +#endif +const int BREAKPOINT_OFFSET = 0; + +const int SYSCALL_SIZE = sizeof(instruction_t); +const int FRAME_PC_SLOT = 1; // return address is at -1 from FP +const int PROBE_SP_LIMIT = 0; +const int PLT_HEADER_SIZE = 24; // Best guess from examining readelf +const int PLT_ENTRY_SIZE = 24; // ...same... +const int PERF_REG_PC = 0; // PERF_REG_RISCV_PC + +#define spinPause() // No architecture support +#define rmb() asm volatile ("fence" : : : "memory") +#define flushCache(addr) __builtin___clear_cache((char*)(addr), (char*)(addr) + sizeof(instruction_t)) + +#define callerPC() __builtin_return_address(0) +#define callerFP() __builtin_frame_address(1) +#define callerSP() __builtin_frame_address(0) + +#elif defined(__loongarch_lp64) + +typedef unsigned int instruction_t; +const instruction_t BREAKPOINT = 0x002a0005; // EBREAK +const int BREAKPOINT_OFFSET = 0; + +const int SYSCALL_SIZE = sizeof(instruction_t); +const int FRAME_PC_SLOT = 1; +const int PROBE_SP_LIMIT = 0; +const int PLT_HEADER_SIZE = 32; +const int PLT_ENTRY_SIZE = 16; +const int PERF_REG_PC = 0; // PERF_REG_LOONGARCH_PC + +#define spinPause() asm volatile("ibar 0x0") +#define rmb() asm volatile("dbar 0x0" : : : "memory") +#define flushCache(addr) __builtin___clear_cache((char*)(addr), (char*)(addr) + sizeof(instruction_t)) + +#define callerPC() __builtin_return_address(0) +#define callerFP() __builtin_frame_address(1) +#define callerSP() __builtin_frame_address(0) + +#else + +#error "Compiling on unsupported arch" + +#endif + + +// On Apple M1 and later processors, memory is either writable or executable (W^X) +#if defined(__aarch64__) && defined(__APPLE__) +# define WX_MEMORY true +#else +# define WX_MEMORY false +#endif + +// Pointer authentication (PAC) support. +// Only 48-bit virtual addresses are currently supported. +#ifdef __aarch64__ +const unsigned long PAC_MASK = WX_MEMORY ? 0x7fffffffffffUL : 0xffffffffffffUL; + +static inline const void* stripPointer(const void* p) { + return (const void*) ((unsigned long)p & PAC_MASK); +} +#else +# define stripPointer(p) (p) +#endif + + +#endif // _ARCH_H diff --git a/ddprof-lib/src/main/cpp/asprof.h b/ddprof-lib/src/main/cpp/asprof.h new file mode 100644 index 00000000..3f6cbfdc --- /dev/null +++ b/ddprof-lib/src/main/cpp/asprof.h @@ -0,0 +1,106 @@ +/* + * Copyright The async-profiler authors + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _ASPROF_H +#define _ASPROF_H + +#include +#include + +#ifdef __clang__ +# define DLLEXPORT __attribute__((visibility("default"))) +#else +# define DLLEXPORT __attribute__((visibility("default"),externally_visible)) +#endif + +#define WEAK __attribute__((weak)) + +#ifdef __cplusplus +extern "C" { +#endif + + +typedef const char* asprof_error_t; +typedef void (*asprof_writer_t)(const char* buf, size_t size); + +// Should be called once prior to any other API functions +DLLEXPORT void asprof_init(); +typedef void (*asprof_init_t)(); + +// Returns an error message for the given error code or NULL if there is no error +DLLEXPORT const char* asprof_error_str(asprof_error_t err); +typedef const char* (*asprof_error_str_t)(asprof_error_t err); + +// Executes async-profiler command using output_callback as an optional sink +// for the profiler output. Returning an error code or NULL on success. +DLLEXPORT asprof_error_t asprof_execute(const char* command, asprof_writer_t output_callback); +typedef asprof_error_t (*asprof_execute_t)(const char* command, asprof_writer_t output_callback); + +// This API is UNSTABLE and might change or be removed in the next version of async-profiler. +typedef struct { + // A thread-local sample counter, which increments (not necessarily by 1) every time a + // stack profiling sample is taken using a profiling signal. + // + // The counter might be initialized lazily, only starting counting from 0 the first time + // `asprof_get_thread_local_data` is called on a given thread. Further calls to + // `asprof_get_thread_local_data` on a given thread will of course not reset the counter. + volatile uint64_t sample_counter; +} asprof_thread_local_data; + +// This API is UNSTABLE and might change or be removed in the next version of async-profiler. +// +// Gets a pointer to asprof's thread-local data structure, see `asprof_thread_local_data`'s +// documentation for the details of each field. This function might lazily initialize that +// structure. +// +// This function can return NULL either if the profiler is not yet initializer, or in +// case of an allocation failure. +// +// This function is *not* async-signal-safe. However, it is safe to call concurrently +// with async-profiler operations, including initialization. +DLLEXPORT asprof_thread_local_data* asprof_get_thread_local_data(void); +typedef asprof_thread_local_data* (*asprof_get_thread_local_data_t)(void); + + +typedef int asprof_jfr_event_key; + +// This API is UNSTABLE and might change or be removed in the next version of async-profiler. +// +// Return a asprof_jfr_event_key identifier for a user-defined JFR key. +// That identifier can then be used in `asprof_emit_jfr_event` +// +// The name is required to be valid (since it's a C string, NUL-free) UTF-8. +// +// Returns -1 on failure. +DLLEXPORT asprof_jfr_event_key asprof_register_jfr_event(const char* name); +typedef asprof_jfr_event_key (*asprof_register_jfr_event_t)(const char* name); + + +#define ASPROF_MAX_JFR_EVENT_LENGTH 2048 + +// This API is UNSTABLE and might change or be removed in the next version of async-profiler. +// +// Emits a custom, user-defined JFR event. The key should be created via `asprof_register_jfr_event`. +// The data can be arbitrary binary data, with size <= ASPROF_MAX_JFR_EVENT_LENGTH. +// +// User-defined events are included in the JFR under a `profiler.UserEvent` event type. That type will contain +// (at least) the following fields: +// 1. `startTime` [Long] - the emitted event's time in ticks. +// 2. `eventThread` [java.lang.Thread] - the thread that emitted the events. +// 3. `type` [profiler.types.UserEventType] - the event's type, +// where `profiler.types.UserEventType` is an indexed string from the JFR constant pool. +// 4. `data` [String] - the event data. This is the Latin-1 encoded version of the inputted data. +// The Latin-1 encoding is used as a way to stuff the arbitrary byte input into something +// that JFR supports (JFR technically supports byte arrays, but `jfr print` doesn't). +// +// Returns an error code or NULL on success. +DLLEXPORT asprof_error_t asprof_emit_jfr_event(asprof_jfr_event_key type, const uint8_t* data, size_t len); +typedef asprof_error_t (*asprof_emit_jfr_event_t)(asprof_jfr_event_key type, const uint8_t* data, size_t len); + +#ifdef __cplusplus +} +#endif + +#endif // _ASPROF_H diff --git a/ddprof-lib/src/main/cpp/cpuEngine.h b/ddprof-lib/src/main/cpp/cpuEngine.h new file mode 100644 index 00000000..da8becbd --- /dev/null +++ b/ddprof-lib/src/main/cpp/cpuEngine.h @@ -0,0 +1,52 @@ +/* + * Copyright The async-profiler authors + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _CPUENGINE_H +#define _CPUENGINE_H + +#include +#include "engine.h" + + +// Base class for CPU sampling engines: PerfEvents, CTimer, ITimer +class CpuEngine : public Engine { + protected: + static void** _pthread_entry; + static CpuEngine* _current; + + static long _interval; + static CStack _cstack; + static int _signal; + static bool _count_overrun; + + static void signalHandler(int signo, siginfo_t* siginfo, void* ucontext); + static void signalHandlerJ9(int signo, siginfo_t* siginfo, void* ucontext); + + static bool setupThreadHook(); + + void enableThreadHook(); + void disableThreadHook(); + + bool isResourceLimit(int err); + + int createForAllThreads(); + + virtual int createForThread(int tid) { return -1; } + virtual void destroyForThread(int tid) {} + + public: + const char* title() { + return "CPU profile"; + } + + const char* units() { + return "ns"; + } + + static void onThreadStart(); + static void onThreadEnd(); +}; + +#endif // _CPUENGINE_H diff --git a/ddprof-lib/src/main/cpp/dwarf.h b/ddprof-lib/src/main/cpp/dwarf.h new file mode 100644 index 00000000..6cc3a483 --- /dev/null +++ b/ddprof-lib/src/main/cpp/dwarf.h @@ -0,0 +1,183 @@ +/* + * Copyright The async-profiler authors + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _DWARF_H +#define _DWARF_H + +#include +#include +#include "arch.h" + + +const int DW_REG_PLT = 128; // denotes special rule for PLT entries +const int DW_REG_INVALID = 255; // denotes unsupported configuration + +const int DW_PC_OFFSET = 1; +const int DW_SAME_FP = 0x80000000; +const int DW_LINK_REGISTER = 0x80000000; +const int DW_STACK_SLOT = sizeof(void*); + + +#if defined(__x86_64__) + +#define DWARF_SUPPORTED true + +const int DW_REG_FP = 6; +const int DW_REG_SP = 7; +const int DW_REG_PC = 16; +const int EMPTY_FRAME_SIZE = DW_STACK_SLOT; +const int LINKED_FRAME_SIZE = 2 * DW_STACK_SLOT; +const int INITIAL_PC_OFFSET = -EMPTY_FRAME_SIZE; + +#elif defined(__i386__) + +#define DWARF_SUPPORTED true + +const int DW_REG_FP = 5; +const int DW_REG_SP = 4; +const int DW_REG_PC = 8; +const int EMPTY_FRAME_SIZE = DW_STACK_SLOT; +const int LINKED_FRAME_SIZE = 2 * DW_STACK_SLOT; +const int INITIAL_PC_OFFSET = -EMPTY_FRAME_SIZE; + +#elif defined(__aarch64__) + +#define DWARF_SUPPORTED true + +const int DW_REG_FP = 29; +const int DW_REG_SP = 31; +const int DW_REG_PC = 30; +const int EMPTY_FRAME_SIZE = 0; +const int LINKED_FRAME_SIZE = 0; +const int INITIAL_PC_OFFSET = DW_LINK_REGISTER; + +#else + +#define DWARF_SUPPORTED false + +const int DW_REG_FP = 0; +const int DW_REG_SP = 1; +const int DW_REG_PC = 2; +const int EMPTY_FRAME_SIZE = 0; +const int LINKED_FRAME_SIZE = 0; +const int INITIAL_PC_OFFSET = DW_LINK_REGISTER; + +#endif + + +struct FrameDesc { + u32 loc; + int cfa; + int fp_off; + int pc_off; + + static FrameDesc empty_frame; + static FrameDesc default_frame; + + static int comparator(const void* p1, const void* p2) { + FrameDesc* fd1 = (FrameDesc*)p1; + FrameDesc* fd2 = (FrameDesc*)p2; + return (int)(fd1->loc - fd2->loc); + } +}; + + +class DwarfParser { + private: + const char* _name; + const char* _image_base; + const char* _ptr; + + int _capacity; + int _count; + FrameDesc* _table; + FrameDesc* _prev; + + u32 _code_align; + int _data_align; + + const char* add(size_t size) { + const char* ptr = _ptr; + _ptr = ptr + size; + return ptr; + } + + u8 get8() { + return *_ptr++; + } + + u16 get16() { + const char* ptr = add(2); + u16 result; + memcpy(&result, ptr, sizeof(u16)); + return result; + } + + u32 get32() { + const char* ptr = add(4); + u32 result; + memcpy(&result, ptr, sizeof(u32)); + return result; + } + + u32 getLeb() { + u32 result = 0; + for (u32 shift = 0; ; shift += 7) { + u8 b = *_ptr++; + result |= (b & 0x7f) << shift; + if ((b & 0x80) == 0) { + return result; + } + } + } + + int getSLeb() { + int result = 0; + for (u32 shift = 0; ; shift += 7) { + u8 b = *_ptr++; + result |= (b & 0x7f) << shift; + if ((b & 0x80) == 0) { + if ((b & 0x40) != 0 && (shift += 7) < 32) { + result |= ~0U << shift; + } + return result; + } + } + } + + void skipLeb() { + while (*_ptr++ & 0x80) {} + } + + const char* getPtr() { + const char* ptr = _ptr; + const char* offset_ptr = add(4); + int offset; + memcpy(&offset, offset_ptr, sizeof(int)); + return ptr + offset; + } + + void parse(const char* eh_frame_hdr); + void parseCie(); + void parseFde(); + void parseInstructions(u32 loc, const char* end); + int parseExpression(); + + void addRecord(u32 loc, u32 cfa_reg, int cfa_off, int fp_off, int pc_off); + FrameDesc* addRecordRaw(u32 loc, int cfa, int fp_off, int pc_off); + + public: + DwarfParser(const char* name, const char* image_base, const char* eh_frame_hdr); + + FrameDesc* table() const { + return _table; + } + + int count() const { + return _count; + } +}; + +#endif // _DWARF_H diff --git a/ddprof-lib/src/main/cpp/incbin.h b/ddprof-lib/src/main/cpp/incbin.h new file mode 100644 index 00000000..afbc7629 --- /dev/null +++ b/ddprof-lib/src/main/cpp/incbin.h @@ -0,0 +1,36 @@ +/* + * Copyright The async-profiler authors + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _INCBIN_H +#define _INCBIN_H + +#ifdef __APPLE__ +# define INCBIN_SECTION ".const_data" +# define INCBIN_SYMBOL "_" +#else +# define INCBIN_SECTION ".section \".rodata\", \"a\"" +# define INCBIN_SYMBOL +#endif + +#define INCBIN(NAME, FILE) \ + extern "C" const char NAME[];\ + extern "C" const char NAME##_END[];\ + asm(INCBIN_SECTION "\n"\ + ".globl " INCBIN_SYMBOL #NAME "\n"\ + INCBIN_SYMBOL #NAME ":\n"\ + ".incbin \"" FILE "\"\n"\ + ".globl " INCBIN_SYMBOL #NAME "_END\n"\ + INCBIN_SYMBOL #NAME "_END:\n"\ + ".byte 0x00\n"\ + ".previous\n"\ + ); + +#define INCBIN_SIZEOF(NAME) (NAME##_END - NAME) + +#define INCLUDE_HELPER_CLASS(NAME_VAR, DATA_VAR, NAME) \ + static const char* const NAME_VAR = NAME;\ + INCBIN(DATA_VAR, "src/helper/" NAME ".class") + +#endif // _INCBIN_H diff --git a/ddprof-lib/src/main/cpp/j9StackTraces.h b/ddprof-lib/src/main/cpp/j9StackTraces.h new file mode 100644 index 00000000..c0703253 --- /dev/null +++ b/ddprof-lib/src/main/cpp/j9StackTraces.h @@ -0,0 +1,49 @@ +/* + * Copyright The async-profiler authors + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _J9STACKTRACES_H +#define _J9STACKTRACES_H + +#include +#include "arch.h" +#include "arguments.h" + + +const int MAX_J9_NATIVE_FRAMES = 128; + +struct J9StackTraceNotification { + void* env; + u64 counter; + int num_frames; + int reserved; + const void* addr[MAX_J9_NATIVE_FRAMES]; + + size_t size() { + return sizeof(*this) - sizeof(this->addr) + num_frames * sizeof(const void*); + } +}; + + +class J9StackTraces { + private: + static pthread_t _thread; + static int _max_stack_depth; + static int _pipe[2]; + + static void* threadEntry(void* unused) { + timerLoop(); + return NULL; + } + + static void timerLoop(); + + public: + static Error start(Arguments& args); + static void stop(); + + static void checkpoint(u64 counter, J9StackTraceNotification* notif); +}; + +#endif // _J9STACKTRACES_H diff --git a/ddprof-lib/src/main/cpp/mutex.cpp b/ddprof-lib/src/main/cpp/mutex.cpp new file mode 100644 index 00000000..85228dbf --- /dev/null +++ b/ddprof-lib/src/main/cpp/mutex.cpp @@ -0,0 +1,35 @@ +/* + * Copyright The async-profiler authors + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "mutex.h" + + +Mutex::Mutex() { + pthread_mutexattr_t attr; + pthread_mutexattr_init(&attr); + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE); + pthread_mutex_init(&_mutex, &attr); +} + +void Mutex::lock() { + pthread_mutex_lock(&_mutex); +} + +void Mutex::unlock() { + pthread_mutex_unlock(&_mutex); +} + +WaitableMutex::WaitableMutex() : Mutex() { + pthread_cond_init(&_cond, NULL); +} + +bool WaitableMutex::waitUntil(u64 wall_time) { + struct timespec ts = {(time_t)(wall_time / 1000000), (long)(wall_time % 1000000) * 1000}; + return pthread_cond_timedwait(&_cond, &_mutex, &ts) != 0; +} + +void WaitableMutex::notify() { + pthread_cond_signal(&_cond); +} diff --git a/ddprof-lib/src/main/cpp/mutex.h b/ddprof-lib/src/main/cpp/mutex.h new file mode 100644 index 00000000..7d017536 --- /dev/null +++ b/ddprof-lib/src/main/cpp/mutex.h @@ -0,0 +1,49 @@ +/* + * Copyright The async-profiler authors + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _MUTEX_H +#define _MUTEX_H + +#include +#include "arch.h" + + +class Mutex { + protected: + pthread_mutex_t _mutex; + + public: + Mutex(); + + void lock(); + void unlock(); +}; + +class WaitableMutex : public Mutex { + protected: + pthread_cond_t _cond; + + public: + WaitableMutex(); + + bool waitUntil(u64 wall_time); + void notify(); +}; + +class MutexLocker { + private: + Mutex* _mutex; + + public: + MutexLocker(Mutex& mutex) : _mutex(&mutex) { + _mutex->lock(); + } + + ~MutexLocker() { + _mutex->unlock(); + } +}; + +#endif // _MUTEX_H diff --git a/ddprof-lib/src/main/cpp/os.h b/ddprof-lib/src/main/cpp/os.h new file mode 100644 index 00000000..0ad8b1e2 --- /dev/null +++ b/ddprof-lib/src/main/cpp/os.h @@ -0,0 +1,150 @@ +/* + * Copyright The async-profiler authors + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _OS_H +#define _OS_H + +#include +#include +#include +#include "arch.h" + + +typedef void (*SigAction)(int, siginfo_t*, void*); +typedef void (*SigHandler)(int); +typedef void (*TimerCallback)(void*); + +// Interrupt threads with this signal. The same signal is used inside JDK to interrupt I/O operations. +const int WAKEUP_SIGNAL = SIGIO; + +enum ThreadState { + THREAD_UNKNOWN, + THREAD_RUNNING, + THREAD_SLEEPING +}; + +struct ProcessInfo { + int pid = 0; + int ppid = 0; + char name[16]; // Process name from /proc/{pid}/stats + char cmdline[2048]; // Command line from /proc/{pid}/cmdline + unsigned int uid = 0; // User ID + unsigned char state = 0; // Process state (R, S, D, Z, T, etc.) + u64 start_time = 0; // Process start time (milliseconds since epoch) + + // CPU & thread stats + float cpu_user = 0; // User CPU time (seconds) + float cpu_system = 0; // System CPU time (seconds) + float cpu_percent = 0; // CPU utilization percentage + int threads = 0; // Number of threads + + // Memory stats (in bytes) + u64 vm_size = 0; // Total virtual memory size + u64 vm_rss = 0; // Resident memory size + u64 rss_anon = 0; // Resident anonymous memory + u64 rss_files = 0; // Resident file mappings + u64 rss_shmem = 0; // Resident shared memory + + // Page fault stats + u64 minor_faults = 0; // Minor page faults (no I/O required) + u64 major_faults = 0; // Major page faults (I/O required) + + // I/O stats + u64 io_read = 0; // KB read from storage + u64 io_write = 0; // KB written to storage +}; + + +class ThreadList { + protected: + u32 _index; + u32 _count; + + ThreadList() : _index(0), _count(0) { + } + + public: + virtual ~ThreadList() {} + + u32 index() const { return _index; } + u32 count() const { return _count; } + + bool hasNext() const { + return _index < _count; + } + + virtual int next() = 0; + virtual void update() = 0; +}; + + +// W^X memory support +class JitWriteProtection { + private: + u64 _prev; + bool _restore; + + public: + JitWriteProtection(bool enable); + ~JitWriteProtection(); +}; + + +class OS { + public: + static const size_t page_size; + static const size_t page_mask; + static const long clock_ticks_per_sec; + + static u64 nanotime(); + static u64 micros(); + static u64 processStartTime(); + static void sleep(u64 nanos); + static void uninterruptibleSleep(u64 nanos, volatile bool* flag); + static u64 overrun(siginfo_t* siginfo); + + static u64 hton64(u64 x); + static u64 ntoh64(u64 x); + + static int getMaxThreadId(); + static int processId(); + static int threadId(); + static const char* schedPolicy(int thread_id); + static bool threadName(int thread_id, char* name_buf, size_t name_len); + static ThreadState threadState(int thread_id); + static u64 threadCpuTime(int thread_id); + static ThreadList* listThreads(); + + static bool isLinux(); + static bool isMusl(); + + static SigAction installSignalHandler(int signo, SigAction action, SigHandler handler = NULL); + static SigAction replaceCrashHandler(SigAction action); + static int getProfilingSignal(int mode); + static bool sendSignalToThread(int thread_id, int signo); + + static void* safeAlloc(size_t size); + static void safeFree(void* addr, size_t size); + + static bool getCpuDescription(char* buf, size_t size); + static int getCpuCount(); + static u64 getProcessCpuTime(u64* utime, u64* stime); + static u64 getTotalCpuTime(u64* utime, u64* stime); + + static int createMemoryFile(const char* name); + static void copyFile(int src_fd, int dst_fd, off_t offset, size_t size); + static void freePageCache(int fd, off_t start_offset); + static int mprotect(void* addr, size_t size, int prot); + + static bool checkPreloaded(); + + static u64 getSystemBootTime(); + static u64 getRamSize(); + static int getProcessIds(int* pids, int max_pids); + static bool getBasicProcessInfo(int pid, ProcessInfo* info); + static bool getDetailedProcessInfo(ProcessInfo* info); +}; + +#endif // _OS_H diff --git a/ddprof-lib/src/main/cpp/os_linux.cpp b/ddprof-lib/src/main/cpp/os_linux.cpp new file mode 100644 index 00000000..8e2cd7dd --- /dev/null +++ b/ddprof-lib/src/main/cpp/os_linux.cpp @@ -0,0 +1,693 @@ +/* + * Copyright The async-profiler authors + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifdef __linux__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "os.h" + + +#ifdef __LP64__ +# define MMAP_SYSCALL __NR_mmap +#else +# define MMAP_SYSCALL __NR_mmap2 +#endif + +#define COMM_LEN 16 + +class LinuxThreadList : public ThreadList { + private: + DIR* _dir; + int* _thread_array; + u32 _capacity; + + void addThread(int thread_id) { + if (_count >= _capacity) { + _capacity = _count * 2; + _thread_array = (int*)realloc(_thread_array, _capacity * sizeof(int)); + } + _thread_array[_count++] = thread_id; + } + + void fillThreadArray() { + if (_dir != NULL) { + rewinddir(_dir); + struct dirent* entry; + while ((entry = readdir(_dir)) != NULL) { + if (entry->d_name[0] != '.') { + addThread(atoi(entry->d_name)); + } + } + } + } + + public: + LinuxThreadList() : ThreadList() { + _dir = opendir("/proc/self/task"); + _capacity = 128; + _thread_array = (int*)malloc(_capacity * sizeof(int)); + fillThreadArray(); + } + + ~LinuxThreadList() { + free(_thread_array); + if (_dir != NULL) { + closedir(_dir); + } + } + + int next() { + return _thread_array[_index++]; + } + + void update() { + _index = _count = 0; + fillThreadArray(); + } +}; + + +JitWriteProtection::JitWriteProtection(bool enable) { + // Not used on Linux +} + +JitWriteProtection::~JitWriteProtection() { + // Not used on Linux +} + + +static SigAction installed_sigaction[64]; + +const size_t OS::page_size = sysconf(_SC_PAGESIZE); +const size_t OS::page_mask = OS::page_size - 1; +const long OS::clock_ticks_per_sec = sysconf(_SC_CLK_TCK); + + +u64 OS::nanotime() { + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return (u64)ts.tv_sec * 1000000000 + ts.tv_nsec; +} + +u64 OS::micros() { + struct timeval tv; + gettimeofday(&tv, NULL); + return (u64)tv.tv_sec * 1000000 + tv.tv_usec; +} + +u64 OS::processStartTime() { + static u64 start_time = 0; + + if (start_time == 0) { + char buf[64]; + snprintf(buf, sizeof(buf), "/proc/%d", processId()); + + struct stat st; + if (stat(buf, &st) == 0) { + start_time = (u64)st.st_mtim.tv_sec * 1000 + st.st_mtim.tv_nsec / 1000000; + } + } + + return start_time; +} + +void OS::sleep(u64 nanos) { + struct timespec ts = {(time_t)(nanos / 1000000000), (long)(nanos % 1000000000)}; + nanosleep(&ts, NULL); +} + +void OS::uninterruptibleSleep(u64 nanos, volatile bool* flag) { + // Workaround nanosleep bug: https://man7.org/linux/man-pages/man2/nanosleep.2.html#BUGS + u64 deadline = OS::nanotime() + nanos; + struct timespec ts = {(time_t)(deadline / 1000000000), (long)(deadline % 1000000000)}; + while (*flag && clock_nanosleep(CLOCK_MONOTONIC, TIMER_ABSTIME, &ts, &ts) == EINTR); +} + +u64 OS::overrun(siginfo_t* siginfo) { + return siginfo->si_overrun; +} + +u64 OS::hton64(u64 x) { + return htonl(1) == 1 ? x : bswap_64(x); +} + +u64 OS::ntoh64(u64 x) { + return ntohl(1) == 1 ? x : bswap_64(x); +} + +int OS::getMaxThreadId() { + char buf[16] = "65536"; + int fd = open("/proc/sys/kernel/pid_max", O_RDONLY); + if (fd != -1) { + ssize_t r = read(fd, buf, sizeof(buf) - 1); + (void) r; + close(fd); + } + return atoi(buf); +} + +int OS::processId() { + static const int self_pid = getpid(); + + return self_pid; +} + +int OS::threadId() { + return syscall(__NR_gettid); +} + +const char* OS::schedPolicy(int thread_id) { + int sched_policy = sched_getscheduler(thread_id); + if (sched_policy >= SCHED_BATCH) { + return sched_policy >= SCHED_IDLE ? "SCHED_IDLE" : "SCHED_BATCH"; + } + return "SCHED_OTHER"; +} + +bool OS::threadName(int thread_id, char* name_buf, size_t name_len) { + char buf[64]; + snprintf(buf, sizeof(buf), "/proc/self/task/%d/comm", thread_id); + int fd = open(buf, O_RDONLY); + if (fd == -1) { + return false; + } + + ssize_t r = read(fd, name_buf, name_len); + close(fd); + + if (r > 0) { + name_buf[r - 1] = 0; + return true; + } + return false; +} + +ThreadState OS::threadState(int thread_id) { + char buf[512]; + snprintf(buf, sizeof(buf), "/proc/self/task/%d/stat", thread_id); + int fd = open(buf, O_RDONLY); + if (fd == -1) { + return THREAD_UNKNOWN; + } + + ThreadState state = THREAD_UNKNOWN; + if (read(fd, buf, sizeof(buf)) > 0) { + char* s = strchr(buf, ')'); + state = s != NULL && (s[2] == 'R' || s[2] == 'D') ? THREAD_RUNNING : THREAD_SLEEPING; + } + + close(fd); + return state; +} + +u64 OS::threadCpuTime(int thread_id) { + clockid_t thread_cpu_clock; + if (thread_id) { + thread_cpu_clock = ((~(unsigned int)(thread_id)) << 3) | 6; // CPUCLOCK_SCHED | CPUCLOCK_PERTHREAD_MASK + } else { + thread_cpu_clock = CLOCK_THREAD_CPUTIME_ID; + } + + struct timespec ts; + if (clock_gettime(thread_cpu_clock, &ts) == 0) { + return (u64)ts.tv_sec * 1000000000 + ts.tv_nsec; + } + return 0; +} + +ThreadList* OS::listThreads() { + return new LinuxThreadList(); +} + +bool OS::isLinux() { + return true; +} + +// _CS_GNU_LIBC_VERSION is not defined on musl +const static bool musl = confstr(_CS_GNU_LIBC_VERSION, NULL, 0) == 0 && errno != 0; + +bool OS::isMusl() { + return musl; +} + +SigAction OS::installSignalHandler(int signo, SigAction action, SigHandler handler) { + struct sigaction sa; + struct sigaction oldsa; + sigemptyset(&sa.sa_mask); + + if (handler != NULL) { + sa.sa_handler = handler; + sa.sa_flags = 0; + } else { + sa.sa_sigaction = action; + sa.sa_flags = SA_SIGINFO | SA_RESTART; + if (signo > 0 && signo < sizeof(installed_sigaction) / sizeof(installed_sigaction[0])) { + installed_sigaction[signo] = action; + } + } + + sigaction(signo, &sa, &oldsa); + return oldsa.sa_sigaction; +} + +static void restoreSignalHandler(int signo, siginfo_t* siginfo, void* ucontext) { + signal(signo, SIG_DFL); +} + +SigAction OS::replaceCrashHandler(SigAction action) { + struct sigaction sa; + sigaction(SIGSEGV, NULL, &sa); + SigAction old_action = sa.sa_handler == SIG_DFL ? restoreSignalHandler : sa.sa_sigaction; + sigemptyset(&sa.sa_mask); + sa.sa_sigaction = action; + sa.sa_flags |= SA_SIGINFO | SA_RESTART | SA_NODEFER; + sigaction(SIGSEGV, &sa, NULL); + return old_action; +} + +int OS::getProfilingSignal(int mode) { + static int preferred_signals[2] = {SIGPROF, SIGVTALRM}; + + const u64 allowed_signals = + 1ULL << SIGPROF | 1ULL << SIGVTALRM | 1ULL << SIGSTKFLT | 1ULL << SIGPWR | -(1ULL << SIGRTMIN); + + int& signo = preferred_signals[mode]; + int initial_signo = signo; + int other_signo = preferred_signals[1 - mode]; + + do { + struct sigaction sa; + if ((allowed_signals & (1ULL << signo)) != 0 && signo != other_signo && sigaction(signo, NULL, &sa) == 0) { + if (sa.sa_handler == SIG_DFL || sa.sa_handler == SIG_IGN || sa.sa_sigaction == installed_sigaction[signo]) { + return signo; + } + } + } while ((signo = (signo + 53) & 63) != initial_signo); + + return signo; +} + +bool OS::sendSignalToThread(int thread_id, int signo) { + return syscall(__NR_tgkill, processId(), thread_id, signo) == 0; +} + +void* OS::safeAlloc(size_t size) { + // Naked syscall can be used inside a signal handler. + // Also, we don't want to catch our own calls when profiling mmap. + intptr_t result = syscall(MMAP_SYSCALL, NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (result < 0 && result > -4096) { + return NULL; + } + return (void*)result; +} + +void OS::safeFree(void* addr, size_t size) { + syscall(__NR_munmap, addr, size); +} + +bool OS::getCpuDescription(char* buf, size_t size) { + int fd = open("/proc/cpuinfo", O_RDONLY); + if (fd == -1) { + return false; + } + + ssize_t r = read(fd, buf, size); + close(fd); + if (r <= 0) { + return false; + } + buf[r < size ? r : size - 1] = 0; + + char* c; + do { + c = strchr(buf, '\n'); + } while (c != NULL && *(buf = c + 1) != '\n'); + + *buf = 0; + return true; +} + +int OS::getCpuCount() { + return sysconf(_SC_NPROCESSORS_ONLN); +} + +u64 OS::getProcessCpuTime(u64* utime, u64* stime) { + struct tms buf; + clock_t real = times(&buf); + *utime = buf.tms_utime; + *stime = buf.tms_stime; + return real; +} + +u64 OS::getTotalCpuTime(u64* utime, u64* stime) { + int fd = open("/proc/stat", O_RDONLY); + if (fd == -1) { + return (u64)-1; + } + + u64 real = (u64)-1; + char buf[128] = {0}; + if (read(fd, buf, sizeof(buf)) >= 12) { + u64 user, nice, system, idle; + if (sscanf(buf + 4, "%llu %llu %llu %llu", &user, &nice, &system, &idle) == 4) { + *utime = user + nice; + *stime = system; + real = user + nice + system + idle; + } + } + + close(fd); + return real; +} + +int OS::createMemoryFile(const char* name) { + return syscall(__NR_memfd_create, name, 0); +} + +void OS::copyFile(int src_fd, int dst_fd, off_t offset, size_t size) { + // copy_file_range() is probably better, but not supported on all kernels + while (size > 0) { + ssize_t bytes = sendfile(dst_fd, src_fd, &offset, size); + if (bytes <= 0) { + break; + } + size -= (size_t)bytes; + } +} + +void OS::freePageCache(int fd, off_t start_offset) { + posix_fadvise(fd, start_offset & ~page_mask, 0, POSIX_FADV_DONTNEED); +} + +int OS::mprotect(void* addr, size_t size, int prot) { + return ::mprotect(addr, size, prot); +} + +static int checkPreloadedCallback(dl_phdr_info* info, size_t size, void* data) { + Dl_info* dl_info = (Dl_info*)data; + + Dl_info libprofiler = dl_info[0]; + Dl_info libc = dl_info[1]; + + if ((void*)info->dlpi_addr == libprofiler.dli_fbase) { + // async-profiler found first + return 1; + } else if ((void*)info->dlpi_addr == libc.dli_fbase) { + // libc found first + return -1; + } + + return 0; +} + +// Checks if async-profiler is preloaded through the LD_PRELOAD mechanism. +// This is done by analyzing the order of loaded dynamic libraries. +bool OS::checkPreloaded() { + if (getenv("LD_PRELOAD") == NULL) { + return false; + } + + // Find async-profiler shared object + Dl_info libprofiler; + if (dladdr((const void*)OS::checkPreloaded, &libprofiler) == 0) { + return false; + } + + // Find libc shared object + Dl_info libc; + if (dladdr((const void*)exit, &libc) == 0) { + return false; + } + + Dl_info info[2] = {libprofiler, libc}; + return dl_iterate_phdr(checkPreloadedCallback, (void*)info) == 1; +} + +u64 OS::getRamSize() { + static u64 mem_total = 0; + + if (mem_total == 0) { + FILE* file = fopen("/proc/meminfo", "r"); + if (!file) return 0; + + char line[1024]; + while (fgets(line, sizeof(line), file)) { + if (strncmp(line, "MemTotal:", 9) == 0) { + mem_total = strtoull(line + 9, NULL, 10) * 1024; + break; + } + } + + fclose(file); + } + + return mem_total; +} + +u64 OS::getSystemBootTime() { + static u64 system_boot_time = 0; + + if (system_boot_time == 0) { + FILE* file = fopen("/proc/stat", "r"); + if (!file) return 0; + + char line[1024]; + while (fgets(line, sizeof(line), file)) { + if (strncmp(line, "btime", 5) == 0) { + system_boot_time = strtoull(line + 5, NULL, 10); + break; + } + } + + fclose(file); + } + + return system_boot_time; +} + +int OS::getProcessIds(int* pids, int max_pids) { + int count = 0; + DIR* proc = opendir("/proc"); + if (!proc) return 0; + + for (dirent* de; (de = readdir(proc)) && count < max_pids;) { + int pid = atoi(de->d_name); + if (pid > 0) { + pids[count++] = pid; + } + } + + closedir(proc); + return count; +} + +static bool readProcessCmdline(int pid, ProcessInfo* info) { + char path[64]; + snprintf(path, sizeof(path), "/proc/%d/cmdline", pid); + + int fd = open(path, O_RDONLY); + if (fd == -1) { + return false; + } + + const size_t max_read = sizeof(info->cmdline) - 1; + size_t len = 0; + + ssize_t r; + while (r = read(fd, info->cmdline + len, max_read - len)) { + if (r > 0) { + len += (size_t)r; + if (len == max_read) break; + } else { + if (errno == EINTR) continue; + close(fd); + return false; + } + } + + close(fd); + + // Replace null bytes with spaces (arguments are separated by null bytes) + for (size_t i = 0; i < len; i++) { + if (info->cmdline[i] == '\0') { + info->cmdline[i] = ' '; + } + } + + // Ensure null termination + info->cmdline[len] = '\0'; + + // Remove trailing space if present + while (len > 0 && info->cmdline[len - 1] == ' ') { + info->cmdline[--len] = '\0'; + } + + return true; +} + +static bool readProcessStats(int pid, ProcessInfo* info) { + char path[64]; + snprintf(path, sizeof(path), "/proc/%d/stat", pid); + + int fd = open(path, O_RDONLY); + if (fd == -1) return false; + + char buffer[4096]; + size_t len = 0; + + ssize_t r; + while (r = read(fd, buffer + len, sizeof(buffer) - 1 - len)) { + if (r > 0) { + len += (size_t)r; + if (len == sizeof(buffer) - 1) break; + } else { + if (errno == EINTR) continue; + close(fd); + return false; + } + } + close(fd); + + if (len == 0) return false; + buffer[len] = '\0'; + + int parsed_pid, ppid; + char comm[COMM_LEN] = {0}; + char state; + u64 minflt, majflt, utime, stime; + u64 starttime; + u64 vsize, rss; + int threads; + int parsed = + sscanf(buffer, + "%d " /* 1 pid */ + "(%15[^)]) " /* 2 comm (read until ')') */ + "%c %d " /* 3 state, 4 ppid */ + "%*d %*d %*d %*d %*u " /* 5-9 skip */ + "%llu %*u %llu %*u " /* 10-13 minflt,-,majflt,- */ + "%llu %llu " /* 14-15 utime, stime */ + "%*d %*d %*d %*d " /* 16-19 skip */ + "%d " /* 20 threads */ + "%*d " /* 21 skip */ + "%llu " /* 22 starttime */ + "%llu " /* 23 vsize */ + "%llu", /* 24 rss */ + &parsed_pid, comm, &state, &ppid, &minflt, &majflt, &utime, &stime, &threads, &starttime, &vsize, &rss); + + if (parsed < 12) return false; + + memcpy(info->name, comm, COMM_LEN); + info->pid = parsed_pid; + info->ppid = ppid; + info->state = (unsigned char)state; + info->minor_faults = minflt; + info->major_faults = majflt; + info->cpu_user = (float)utime / OS::clock_ticks_per_sec; + info->cpu_system = (float)stime / OS::clock_ticks_per_sec; + info->threads = threads; + info->vm_size = vsize; + // (24) rss - convert from number of pages to bytes + info->vm_rss = rss * OS::page_size; + info->start_time = (OS::getSystemBootTime() + starttime / OS::clock_ticks_per_sec) * 1000; + return true; +} + +static bool readProcessStatus(int pid, ProcessInfo* info) { + char path[64]; + snprintf(path, sizeof(path), "/proc/%d/status", pid); + FILE* file = fopen(path, "r"); + if (!file) { + return false; + } + + int read_count = 0; + char line[1024]; + char key[32]; + u64 value; + while (fgets(line, sizeof(line), file) && read_count < 6) { + if (sscanf(line, "%31s %llu", key, &value) != 2) { + continue; + } + + if (strncmp(key, "Uid", 3) == 0) { + read_count++; + info->uid = (unsigned int)value; + } else if (strncmp(key, "RssAnon", 7) == 0) { + read_count++; + info->rss_anon = value * 1024; + } else if (strncmp(key, "RssFile", 7) == 0) { + read_count++; + info->rss_files = value * 1024; + } else if (strncmp(key, "RssShmem", 8) == 0) { + read_count++; + info->rss_shmem = value * 1024; + } else if (strncmp(key, "VmSize", 6) == 0) { + read_count++; + info->vm_size = value * 1024; + } else if (strncmp(key, "VmRSS", 5) == 0) { + read_count++; + info->vm_rss = value * 1024; + } + } + + fclose(file); + return true; +} + +static bool readProcessIO(int pid, ProcessInfo* info) { + char path[64]; + snprintf(path, sizeof(path), "/proc/%d/io", pid); + FILE* file = fopen(path, "r"); + if (!file) return false; + + int read_count = 0; + char line[1024]; + while (fgets(line, sizeof(line), file) && read_count < 2) { + if (strncmp(line, "read_bytes:", 11) == 0) { + u64 read_bytes = strtoull(line + 11, NULL, 10); + info->io_read = read_bytes >> 10; + read_count++; + } else if (strncmp(line, "write_bytes:", 12) == 0) { + u64 write_bytes = strtoull(line + 12, NULL, 10); + info->io_write = write_bytes >> 10; + read_count++; + } + } + + fclose(file); + return true; +} + +bool OS::getBasicProcessInfo(int pid, ProcessInfo* info) { + return readProcessStats(pid, info); +} + +bool OS::getDetailedProcessInfo(ProcessInfo* info) { + readProcessStatus(info->pid, info); + readProcessIO(info->pid, info); + readProcessCmdline(info->pid, info); + return true; +} + +#endif // __linux__ diff --git a/ddprof-lib/src/main/cpp/os_macos.cpp b/ddprof-lib/src/main/cpp/os_macos.cpp new file mode 100644 index 00000000..e9410d71 --- /dev/null +++ b/ddprof-lib/src/main/cpp/os_macos.cpp @@ -0,0 +1,458 @@ +/* + * Copyright The async-profiler authors + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifdef __APPLE__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "os.h" + + +class MacThreadList : public ThreadList { + private: + task_t _task; + thread_array_t _thread_array; + + void deallocate() { + if (_thread_array != NULL) { + for (u32 i = 0; i < _count; i++) { + mach_port_deallocate(_task, _thread_array[i]); + } + vm_deallocate(_task, (vm_address_t)_thread_array, _count * sizeof(thread_t)); + _thread_array = NULL; + } + } + + public: + MacThreadList() { + _task = mach_task_self(); + _thread_array = NULL; + task_threads(_task, &_thread_array, &_count); + } + + ~MacThreadList() { + deallocate(); + } + + int next() { + return (int)_thread_array[_index++]; + } + + void update() { + deallocate(); + _index = _count = 0; + task_threads(_task, &_thread_array, &_count); + } +}; + + +JitWriteProtection::JitWriteProtection(bool enable) { +#ifdef __aarch64__ + // Mimic pthread_jit_write_protect_np(), but save the previous state + if (*(volatile char*)0xfffffc10c) { + u64 val = enable ? *(volatile u64*)0xfffffc118 : *(volatile u64*)0xfffffc110; + u64 prev; + asm volatile("mrs %0, s3_6_c15_c1_5" : "=r" (prev) : : ); + if (prev != val) { + _prev = prev; + _restore = true; + asm volatile("msr s3_6_c15_c1_5, %0\n" + "isb" + : "+r" (val) : : "memory"); + return; + } + } + // Already in the required mode, or write protection is not supported + _restore = false; +#endif +} + +JitWriteProtection::~JitWriteProtection() { +#ifdef __aarch64__ + if (_restore) { + u64 prev = _prev; + asm volatile("msr s3_6_c15_c1_5, %0\n" + "isb" + : "+r" (prev) : : "memory"); + } +#endif +} + + +static SigAction installed_sigaction[32]; +static SigAction orig_sigbus_handler; +static SigAction orig_sigsegv_handler; + +const size_t OS::page_size = sysconf(_SC_PAGESIZE); +const size_t OS::page_mask = OS::page_size - 1; +const long OS::clock_ticks_per_sec = sysconf(_SC_CLK_TCK); + +static mach_timebase_info_data_t timebase = {0, 0}; + +u64 OS::nanotime() { + if (timebase.denom == 0) { + mach_timebase_info(&timebase); + } + return (u64)mach_absolute_time() * timebase.numer / timebase.denom; +} + +u64 OS::micros() { + struct timeval tv; + gettimeofday(&tv, NULL); + return (u64)tv.tv_sec * 1000000 + tv.tv_usec; +} + +void OS::sleep(u64 nanos) { + struct timespec ts = {(time_t)(nanos / 1000000000), (long)(nanos % 1000000000)}; + nanosleep(&ts, NULL); +} + +void OS::uninterruptibleSleep(u64 nanos, volatile bool* flag) { + struct timespec ts = {(time_t)(nanos / 1000000000), (long)(nanos % 1000000000)}; + while (*flag && nanosleep(&ts, &ts) < 0 && errno == EINTR); +} + +u64 OS::overrun(siginfo_t* siginfo) { + return 0; +} + +u64 OS::processStartTime() { + static u64 start_time = 0; + + if (start_time == 0) { + struct proc_bsdinfo info; + if (proc_pidinfo(processId(), PROC_PIDTBSDINFO, 0, &info, sizeof(info)) > 0) { + start_time = (u64)info.pbi_start_tvsec * 1000 + info.pbi_start_tvusec / 1000; + } + } + + return start_time; +} + +u64 OS::hton64(u64 x) { + return OSSwapHostToBigInt64(x); +} + +u64 OS::ntoh64(u64 x) { + return OSSwapBigToHostInt64(x); +} + +int OS::getMaxThreadId() { + return 0x7fffffff; +} + +int OS::processId() { + static const int self_pid = getpid(); + + return self_pid; +} + +int OS::threadId() { + // Used to be pthread_mach_thread_np(pthread_self()), + // but pthread_mach_thread_np is not async signal safe + mach_port_t port = mach_thread_self(); + mach_port_deallocate(mach_task_self(), port); + return (int)port; +} + +const char* OS::schedPolicy(int thread_id) { + // Not used on macOS + return "SCHED_OTHER"; +} + +bool OS::threadName(int thread_id, char* name_buf, size_t name_len) { + pthread_t thread = pthread_from_mach_thread_np(thread_id); + return thread && pthread_getname_np(thread, name_buf, name_len) == 0 && name_buf[0] != 0; +} + +ThreadState OS::threadState(int thread_id) { + struct thread_basic_info info; + mach_msg_type_number_t size = sizeof(info); + if (thread_info((thread_act_t)thread_id, THREAD_BASIC_INFO, (thread_info_t)&info, &size) != 0) { + return THREAD_UNKNOWN; + } + return info.run_state == TH_STATE_RUNNING ? THREAD_RUNNING : THREAD_SLEEPING; +} + +u64 OS::threadCpuTime(int thread_id) { + if (thread_id == 0) thread_id = threadId(); + + struct thread_basic_info info; + mach_msg_type_number_t size = sizeof(info); + if (thread_info((thread_act_t)thread_id, THREAD_BASIC_INFO, (thread_info_t)&info, &size) != 0) { + return 0; + } + return u64(info.user_time.seconds + info.system_time.seconds) * 1000000000 + + u64(info.user_time.microseconds + info.system_time.microseconds) * 1000; +} + +ThreadList* OS::listThreads() { + return new MacThreadList(); +} + +bool OS::isLinux() { + return false; +} + +bool OS::isMusl() { + return false; +} + +SigAction OS::installSignalHandler(int signo, SigAction action, SigHandler handler) { + struct sigaction sa; + struct sigaction oldsa; + sigemptyset(&sa.sa_mask); + + if (handler != NULL) { + sa.sa_handler = handler; + sa.sa_flags = 0; + } else { + sa.sa_sigaction = action; + sa.sa_flags = SA_SIGINFO | SA_RESTART; + if (signo > 0 && signo < sizeof(installed_sigaction) / sizeof(installed_sigaction[0])) { + installed_sigaction[signo] = action; + } + } + + sigaction(signo, &sa, &oldsa); + return oldsa.sa_sigaction; +} + +static void restoreSignalHandler(int signo, siginfo_t* siginfo, void* ucontext) { + signal(signo, SIG_DFL); +} + +SigAction OS::replaceCrashHandler(SigAction action) { + // It is not well specified when macOS raises SIGBUS and when SIGSEGV. + // HotSpot handles both similarly, so do we. + struct sigaction sa; + + sigaction(SIGBUS, NULL, &sa); + orig_sigbus_handler = sa.sa_handler == SIG_DFL ? restoreSignalHandler : sa.sa_sigaction; + sigemptyset(&sa.sa_mask); + sa.sa_sigaction = action; + sa.sa_flags |= SA_SIGINFO | SA_RESTART | SA_NODEFER; + sigaction(SIGBUS, &sa, NULL); + + sigaction(SIGSEGV, NULL, &sa); + orig_sigsegv_handler = sa.sa_handler == SIG_DFL ? restoreSignalHandler : sa.sa_sigaction; + sigemptyset(&sa.sa_mask); + sa.sa_sigaction = action; + sa.sa_flags |= SA_SIGINFO | SA_RESTART| SA_NODEFER; + sigaction(SIGSEGV, &sa, NULL); + + // Return an action that dispatches to one of the original handlers depending on signo, + // so that the caller does not need to deal with multiple handlers + return [](int signo, siginfo_t* siginfo, void* ucontext) { + (signo == SIGBUS ? orig_sigbus_handler : orig_sigsegv_handler)(signo, siginfo, ucontext); + }; +} + +int OS::getProfilingSignal(int mode) { + static int preferred_signals[2] = {SIGPROF, SIGVTALRM}; + + const u64 allowed_signals = + 1ULL << SIGPROF | 1ULL << SIGVTALRM | 1ULL << SIGEMT | 1ULL << SIGSYS; + + int& signo = preferred_signals[mode]; + int initial_signo = signo; + int other_signo = preferred_signals[1 - mode]; + + do { + struct sigaction sa; + if ((allowed_signals & (1ULL << signo)) != 0 && signo != other_signo && sigaction(signo, NULL, &sa) == 0) { + if (sa.sa_handler == SIG_DFL || sa.sa_handler == SIG_IGN || sa.sa_sigaction == installed_sigaction[signo]) { + return signo; + } + } + } while ((signo = (signo + 1) & 31) != initial_signo); + + return signo; +} + +bool OS::sendSignalToThread(int thread_id, int signo) { +#ifdef __aarch64__ + register long x0 asm("x0") = thread_id; + register long x1 asm("x1") = signo; + register long x16 asm("x16") = 328; + asm volatile("svc #0x80" + : "+r" (x0) + : "r" (x1), "r" (x16) + : "memory"); + return x0 == 0; +#else + int result; + asm volatile("syscall" + : "=a" (result) + : "a" (0x2000148), "D" (thread_id), "S" (signo) + : "rcx", "r11", "memory"); + return result == 0; +#endif +} + +void* OS::safeAlloc(size_t size) { + // mmap() is not guaranteed to be async signal safe, but in practice, it is. + // There is no a reasonable alternative anyway. + void* result = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (result == MAP_FAILED) { + return NULL; + } + return result; +} + +void OS::safeFree(void* addr, size_t size) { + munmap(addr, size); +} + +bool OS::getCpuDescription(char* buf, size_t size) { + return sysctlbyname("machdep.cpu.brand_string", buf, &size, NULL, 0) == 0; +} + +int OS::getCpuCount() { + int cpu_count; + size_t size = sizeof(cpu_count); + return sysctlbyname("hw.logicalcpu", &cpu_count, &size, NULL, 0) == 0 ? cpu_count : 1; +} + +u64 OS::getProcessCpuTime(u64* utime, u64* stime) { + struct tms buf; + clock_t real = times(&buf); + *utime = buf.tms_utime; + *stime = buf.tms_stime; + return real; +} + +u64 OS::getTotalCpuTime(u64* utime, u64* stime) { + natural_t cpu_count; + processor_info_array_t cpu_info_array; + mach_msg_type_number_t cpu_info_count; + + host_name_port_t host = mach_host_self(); + kern_return_t ret = host_processor_info(host, PROCESSOR_CPU_LOAD_INFO, &cpu_count, &cpu_info_array, &cpu_info_count); + mach_port_deallocate(mach_task_self(), host); + if (ret != 0) { + return (u64)-1; + } + + processor_cpu_load_info_data_t* cpu_load = (processor_cpu_load_info_data_t*)cpu_info_array; + u64 user = 0; + u64 system = 0; + u64 idle = 0; + for (natural_t i = 0; i < cpu_count; i++) { + user += cpu_load[i].cpu_ticks[CPU_STATE_USER] + cpu_load[i].cpu_ticks[CPU_STATE_NICE]; + system += cpu_load[i].cpu_ticks[CPU_STATE_SYSTEM]; + idle += cpu_load[i].cpu_ticks[CPU_STATE_IDLE]; + } + vm_deallocate(mach_task_self(), (vm_address_t)cpu_info_array, cpu_info_count * sizeof(int)); + + *utime = user; + *stime = system; + return user + system + idle; +} + +int OS::createMemoryFile(const char* name) { + // Not supported on macOS + return -1; +} + +void OS::copyFile(int src_fd, int dst_fd, off_t offset, size_t size) { + char* buf = (char*)mmap(NULL, size + offset, PROT_READ, MAP_PRIVATE, src_fd, 0); + if (buf == NULL) { + return; + } + + while (size > 0) { + ssize_t bytes = write(dst_fd, buf + offset, size < 262144 ? size : 262144); + if (bytes <= 0) { + break; + } + offset += (size_t)bytes; + size -= (size_t)bytes; + } + + munmap(buf, offset); +} + +void OS::freePageCache(int fd, off_t start_offset) { + // Not supported on macOS +} + +int OS::mprotect(void* addr, size_t size, int prot) { + if (prot & PROT_WRITE) prot |= VM_PROT_COPY; + return vm_protect(mach_task_self(), (vm_address_t)addr, size, 0, prot); +} + +// Checks if async-profiler is preloaded through the DYLD_INSERT_LIBRARIES mechanism. +// This is done by analyzing the order of loaded dynamic libraries. +bool OS::checkPreloaded() { + if (getenv("DYLD_INSERT_LIBRARIES") == NULL) { + return false; + } + + // Find async-profiler shared object + Dl_info libprofiler; + if (dladdr((const void*)OS::checkPreloaded, &libprofiler) == 0) { + return false; + } + + // Find libc shared object + Dl_info libc; + if (dladdr((const void*)exit, &libc) == 0) { + return false; + } + + uint32_t images = _dyld_image_count(); + for (uint32_t i = 0; i < images; i++) { + void* image_base = (void*)_dyld_get_image_header(i); + + if (image_base == libprofiler.dli_fbase) { + // async-profiler found first + return true; + } else if (image_base == libc.dli_fbase) { + // libc found first + return false; + } + } + + return false; +} + +u64 OS::getSystemBootTime() { + return 0; +} + +u64 OS::getRamSize() { + return 0; +} + +int OS::getProcessIds(int* pids, int max_pids) { + return 0; +} + +bool OS::getBasicProcessInfo(int pid, ProcessInfo* info) { + return false; +} + +bool OS::getDetailedProcessInfo(ProcessInfo* info) { + return false; +} + +#endif // __APPLE__ diff --git a/ddprof-lib/src/main/cpp/stackFrame.h b/ddprof-lib/src/main/cpp/stackFrame.h new file mode 100644 index 00000000..a8d5b6fa --- /dev/null +++ b/ddprof-lib/src/main/cpp/stackFrame.h @@ -0,0 +1,92 @@ +/* + * Copyright The async-profiler authors + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _STACKFRAME_H +#define _STACKFRAME_H + +#include +#include +#include +#include "arch.h" + + +class NMethod; + +class StackFrame { + private: + ucontext_t* _ucontext; + + static bool withinCurrentStack(uintptr_t address) { + // Check that the address is not too far from the stack pointer of current context + void* real_sp; + return address - (uintptr_t)&real_sp <= 0xffff; + } + + public: + explicit StackFrame(void* ucontext) { + _ucontext = (ucontext_t*)ucontext; + } + + void restore(uintptr_t saved_pc, uintptr_t saved_sp, uintptr_t saved_fp) { + if (_ucontext != nullptr) { + pc() = saved_pc; + sp() = saved_sp; + fp() = saved_fp; + } + } + + uintptr_t stackAt(int slot) { + return ((uintptr_t*)sp())[slot]; + } + + uintptr_t& pc(); + uintptr_t& sp(); + uintptr_t& fp(); + + uintptr_t& retval(); + uintptr_t link(); + uintptr_t arg0(); + uintptr_t arg1(); + uintptr_t arg2(); + uintptr_t arg3(); + uintptr_t jarg0(); + uintptr_t method(); + uintptr_t senderSP(); + + void ret(); + + bool unwindStub(instruction_t* entry, const char* name) { + return unwindStub(entry, name, pc(), sp(), fp()); + } + + bool unwindCompiled(NMethod* nm) { + return unwindCompiled(nm, pc(), sp(), fp()); + } + + bool unwindStub(instruction_t* entry, const char* name, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp); + bool unwindAtomicStub(const void*& pc); + + // TODO: this function will be removed once `vm` becomes the default stack walking mode + bool unwindCompiled(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp); + + bool unwindPrologue(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp); + bool unwindEpilogue(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp); + + void adjustSP(const void* entry, const void* pc, uintptr_t& sp); + + // SP baseline helpers for compiled frame unwinding + uintptr_t sender_sp_baseline(const NMethod* nm, uintptr_t sp, uintptr_t fp, const void* pc); + const void* read_caller_pc_from_sp(uintptr_t sp_base); + uintptr_t read_saved_fp_from_sp(uintptr_t sp_base); + + bool skipFaultInstruction(); + + bool checkInterruptedSyscall(); + + // Check if PC points to a syscall instruction + static bool isSyscall(instruction_t* pc); +}; + +#endif // _STACKFRAME_H diff --git a/ddprof-lib/src/main/cpp/stackFrame_aarch64.cpp b/ddprof-lib/src/main/cpp/stackFrame_aarch64.cpp new file mode 100644 index 00000000..12d17611 --- /dev/null +++ b/ddprof-lib/src/main/cpp/stackFrame_aarch64.cpp @@ -0,0 +1,405 @@ +/* + * Copyright The async-profiler authors + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifdef __aarch64__ + +#include +#include +#include +#include "stackFrame.h" +#include "safeAccess.h" +#include "vmStructs.h" + + +#ifdef __APPLE__ +# define REG(l, m) _ucontext->uc_mcontext->__ss.__##m +#else +# define REG(l, m) _ucontext->uc_mcontext.l +#endif + + +uintptr_t& StackFrame::pc() { + return (uintptr_t&)REG(pc, pc); +} + +uintptr_t& StackFrame::sp() { + return (uintptr_t&)REG(sp, sp); +} + +uintptr_t& StackFrame::fp() { + return (uintptr_t&)REG(regs[29], fp); +} + +uintptr_t& StackFrame::retval() { + return (uintptr_t&)REG(regs[0], x[0]); +} + +uintptr_t StackFrame::link() { + return (uintptr_t)REG(regs[30], lr); +} + +uintptr_t StackFrame::arg0() { + return (uintptr_t)REG(regs[0], x[0]); +} + +uintptr_t StackFrame::arg1() { + return (uintptr_t)REG(regs[1], x[1]); +} + +uintptr_t StackFrame::arg2() { + return (uintptr_t)REG(regs[2], x[2]); +} + +uintptr_t StackFrame::arg3() { + return (uintptr_t)REG(regs[3], x[3]); +} + +uintptr_t StackFrame::jarg0() { + return arg1(); +} + +uintptr_t StackFrame::method() { + return (uintptr_t)REG(regs[12], x[12]); +} + +uintptr_t StackFrame::senderSP() { + return (uintptr_t)REG(regs[19], x[19]); +} + +void StackFrame::ret() { + pc() = link(); +} + +static inline bool isSTP(instruction_t insn) { + // stp xn, xm, [sp, #-imm]! + // stp dn, dm, [sp, #-imm]! + return (insn & 0xffe003e0) == 0xa9a003e0 || (insn & 0xffe003e0) == 0x6da003e0; +} + +// Check if this is a well-known leaf stub with a constant size frame +static inline bool isFixedSizeFrame(const char* name) { + // Dispatch by the first character to optimize lookup + switch (name[0]) { + case 'i': + return strncmp(name, "indexof_linear_", 15) == 0; + case 'm': + return strncmp(name, "md5_implCompress", 16) == 0; + case 's': + return strncmp(name, "sha256_implCompress", 19) == 0 + || strncmp(name, "string_indexof_linear_", 22) == 0 + || strncmp(name, "slow_subtype_check", 18) == 0; + default: + return false; + } +} + +// Check if this is a well-known leaf stub that does not change stack pointer +static inline bool isZeroSizeFrame(const char* name) { + // Dispatch by the first character to optimize lookup + switch (name[0]) { + case 'I': + return strcmp(name, "InlineCacheBuffer") == 0; + case 'S': + return strncmp(name, "SafeFetch", 9) == 0; + case 'a': + return strncmp(name, "atomic", 6) == 0; + case 'b': + return strncmp(name, "bigInteger", 10) == 0 + || strcmp(name, "base64_encodeBlock") == 0; + case 'c': + return strncmp(name, "copy_", 5) == 0 + || strncmp(name, "compare_long_string_", 20) == 0; + case 'e': + return strcmp(name, "encodeBlock") == 0; + case 'f': + return strcmp(name, "f2hf") == 0; + case 'g': + return strcmp(name, "ghash_processBlocks") == 0; + case 'h': + return strcmp(name, "hf2f") == 0; + case 'i': + return strncmp(name, "itable", 6) == 0; + case 'l': + return strcmp(name, "large_byte_array_inflate") == 0 + || strncmp(name, "lookup_secondary_supers_", 24) == 0; + case 'm': + return strncmp(name, "md5_implCompress", 16) == 0; + case 's': + return strncmp(name, "sha1_implCompress", 17) == 0 + || strncmp(name, "compare_long_string_same_encoding", 33) == 0 + || strcmp(name, "compare_long_string_LL") == 0 + || strcmp(name, "compare_long_string_UU") == 0; + case 'u': + return strcmp(name, "updateBytesAdler32") == 0; + case 'v': + return strncmp(name, "vtable", 6) == 0; + case 'z': + return strncmp(name, "zero_", 5) == 0; + default: + return false; + } +} + +bool StackFrame::unwindStub(instruction_t* entry, const char* name, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) { + instruction_t* ip = (instruction_t*)pc; + if (ip == entry || *ip == 0xd65f03c0) { + pc = link(); + return true; + } else if (entry != NULL && entry[0] == 0xa9bf7bfd) { + // The stub begins with + // stp x29, x30, [sp, #-16]! + // mov x29, sp + if (ip == entry + 1) { + sp += 16; + pc = ((uintptr_t*)sp)[-1]; + return true; + } else if (entry[1] == 0x910003fd && withinCurrentStack(fp)) { + sp = fp + 16; + fp = ((uintptr_t*)sp)[-2]; + pc = ((uintptr_t*)sp)[-1]; + return true; + } + } else if (entry != NULL && isSTP(entry[0]) && isFixedSizeFrame(name)) { + // The stub begins with + // stp xn, xm, [sp, #-imm]! + int offset = int(entry[0] << 10) >> 25; + sp = (intptr_t)sp - offset * 8; + pc = link(); + return true; + } else if (isZeroSizeFrame(name)) { + // Should be done after isSTP check, since frame size may vary between JVM versions + pc = link(); + return true; + } else if (strcmp(name, "forward_copy_longs") == 0 + || strcmp(name, "backward_copy_longs") == 0 + // There is a typo in JDK 8 + || strcmp(name, "foward_copy_longs") == 0) { + // These are called from arraycopy stub that maintains the regular frame link + if (&pc == &this->pc() && withinCurrentStack(fp)) { + // Unwind both stub frames for AsyncGetCallTrace + sp = fp + 16; + fp = ((uintptr_t*)sp)[-2]; + pc = ((uintptr_t*)sp)[-1] - sizeof(instruction_t); + } else { + // When cstack=vm, unwind stub frames one by one + pc = link(); + } + return true; + } + return false; +} + +static inline bool isEntryBarrier(instruction_t* ip) { + // ldr w9, [x28, #32] + // cmp x8, x9 + return ip[0] == 0xb9402389 && ip[1] == 0xeb09011f; +} + +bool StackFrame::unwindCompiled(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) { + instruction_t* ip = (instruction_t*)pc; + instruction_t* entry = (instruction_t*)nm->entry(); + if ((*ip & 0xffe07fff) == 0xa9007bfd) { + // stp x29, x30, [sp, #offset] + // SP has been adjusted, but FP not yet stored in a new frame + unsigned int offset = (*ip >> 12) & 0x1f8; + sp += offset + 16; + pc = link(); + } else if (ip > entry && ip[0] == 0x910003fd && ip[-1] == 0xa9bf7bfd) { + // stp x29, x30, [sp, #-16]! + // mov x29, sp + sp += 16; + pc = ((uintptr_t*)sp)[-1]; + } else if (ip > entry + 3 && !nm->isFrameCompleteAt(ip) && + (isEntryBarrier(ip) || isEntryBarrier(ip + 1))) { + // Frame should be complete at this point + sp += nm->frameSize() * sizeof(void*); + fp = ((uintptr_t*)sp)[-2]; + pc = ((uintptr_t*)sp)[-1]; + } else { + // Just try + pc = link(); + } + return true; +} + +static inline bool isFrameComplete(instruction_t* entry, instruction_t* ip) { + // Frame is fully constructed after sp is decremented by the frame size. + // Check if there is such an instruction anywhere between + // the method entry and the current instruction pointer. + while (--ip >= entry) { + if ((*ip & 0xff8003ff) == 0xd10003ff) { // sub sp, sp, #frame_size + return true; + } + } + return false; +} + +bool StackFrame::unwindPrologue(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) { + // C1/C2 methods: + // {stack_bang} + // sub sp, sp, #0x40 + // stp x29, x30, [sp, #48] + // + // Native wrappers: + // {stack_bang} + // stp x29, x30, [sp, #-16]! + // mov x29, sp + // sub sp, sp, #0x50 + // + instruction_t* ip = (instruction_t*)pc; + instruction_t* entry = (instruction_t*)nm->entry(); + if (ip <= entry) { + pc = link(); + } else if ((*ip & 0xffe07fff) == 0xa9007bfd) { + // stp x29, x30, [sp, #offset] + // SP has been adjusted, but FP not yet stored in a new frame + unsigned int offset = (*ip >> 12) & 0x1f8; + sp += offset + 16; + pc = link(); + } else if (ip[0] == 0x910003fd && ip[-1] == 0xa9bf7bfd) { + // stp x29, x30, [sp, #-16]! + // mov x29, sp + sp += 16; + pc = ((uintptr_t*)sp)[-1]; + } else if (ip <= entry + 16 && isFrameComplete(entry, ip)) { + sp += nm->frameSize() * sizeof(void*); + fp = ((uintptr_t*)sp)[-2]; + pc = ((uintptr_t*)sp)[-1]; + } else { + pc = link(); + } + return true; +} + +static inline bool isPollReturn(instruction_t* ip) { + // JDK 17+ + // add sp, sp, #0x30 + // ldr x8, [x28, #832] + // cmp sp, x8 + // b.hi offset + // ret + // + // JDK 11 + // add sp, sp, #0x30 + // ldr x8, [x28, #264] + // ldr wzr, [x8] + // ret + // + // JDK 8 + // add sp, sp, #0x30 + // adrp x8, polling_page + // ldr wzr, [x8] + // ret + // + if ((ip[0] & 0xffc003ff) == 0xf9400388 && (ip[-1] & 0xff8003ff) == 0x910003ff) { + // ldr x8, preceded by add sp + return true; + } else if ((ip[0] & 0x9f00001f) == 0x90000008 && (ip[-1] & 0xff8003ff) == 0x910003ff) { + // adrp x8, preceded by add sp + return true; + } else if (ip[0] == 0xeb2863ff && ip[2] == 0xd65f03c0) { + // cmp sp, x8, followed by ret + return true; + } else if ((ip[0] & 0xff000010) == 0x54000000 && ip[1] == 0xd65f03c0) { + // b.cond, followed by ret + return true; + } else if (ip[0] == 0xb940011f && ip[1] == 0xd65f03c0) { + // ldr wzr, followed by ret + return true; + } + return false; +} + +bool StackFrame::unwindEpilogue(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) { + // ldp x29, x30, [sp, #32] + // add sp, sp, #0x30 + // {poll_return} + // ret + instruction_t* ip = (instruction_t*)pc; + if (*ip == 0xd65f03c0 || isPollReturn(ip)) { // ret + pc = link(); + return true; + } + return false; +} + +bool StackFrame::unwindAtomicStub(const void*& pc) { + // VM threads may call generated atomic stubs, which are not normally walkable + const void* lr = (const void*)link(); + if (VMStructs::libjvm()->contains(lr)) { + NMethod* nm = CodeHeap::findNMethod(pc); + if (nm != NULL && strncmp(nm->name(), "Stub", 4) == 0) { + pc = lr; + return true; + } + } + return false; +} + +void StackFrame::adjustSP(const void* entry, const void* pc, uintptr_t& sp) { + instruction_t* ip = (instruction_t*)pc; + if (ip > entry && (ip[-1] == 0xa9bf27ff || (ip[-1] == 0xd63f0100 && ip[-2] == 0xa9bf27ff))) { + // When calling a leaf native from Java, JVM puts a dummy frame link onto the stack, + // thus breaking the invariant: sender_sp == current_sp + frame_size. + // Since JDK 21, there are more instructions between `blr` and `add`, + // ignore them now for the sake of simplicity. + // stp xzr, x9, [sp, #-16]! + // blr x8 + // ... + // add sp, sp, #0x10 + sp += 16; + } +} + +bool StackFrame::skipFaultInstruction() { + return false; +} + +bool StackFrame::checkInterruptedSyscall() { +#ifdef __APPLE__ + // We are not interested in syscalls that do not check error code, e.g. semaphore_wait_trap + if (*(instruction_t*)pc() == 0xd65f03c0) { + return true; + } + // If carry flag is set, the error code is in low byte of x0 + if (REG(pstate, cpsr) & (1 << 29)) { + return (retval() & 0xff) == EINTR || (retval() & 0xff) == ETIMEDOUT; + } else { + return retval() == (uintptr_t)-EINTR; + } +#else + if (retval() == (uintptr_t)-EINTR) { + // Workaround for JDK-8237858: restart the interrupted poll / epoll_wait manually + uintptr_t nr = (uintptr_t)REG(regs[8], x[8]); + if (nr == SYS_ppoll || (nr == SYS_epoll_pwait && (int)arg3() == -1)) { + // Check against unreadable page for the loop below + const uintptr_t max_distance = 24; + if ((pc() & 0xfff) < max_distance && SafeAccess::load32((int32_t*)(pc() - max_distance)) == 0) { + return true; + } + // Try to restore the original value of x0 saved in another register + for (uintptr_t prev_pc = pc() - 4; pc() - prev_pc <= max_distance; prev_pc -= 4) { + instruction_t insn = *(instruction_t*)prev_pc; + unsigned int reg = (insn >> 16) & 31; + if ((insn & 0xffe0ffff) == 0xaa0003e0 && reg >= 6) { + // mov x0, reg + REG(regs[0], x[0]) = REG(regs[reg], x[reg]); + pc() -= sizeof(instruction_t); + break; + } + } + } + return true; + } + return false; +#endif +} + +bool StackFrame::isSyscall(instruction_t* pc) { + // svc #0 or svc #80 + return (*pc & 0xffffefff) == 0xd4000001; +} + +#endif // __aarch64__ diff --git a/ddprof-lib/src/main/cpp/stackFrame_arm.cpp b/ddprof-lib/src/main/cpp/stackFrame_arm.cpp new file mode 100644 index 00000000..e175c964 --- /dev/null +++ b/ddprof-lib/src/main/cpp/stackFrame_arm.cpp @@ -0,0 +1,141 @@ +/* + * Copyright The async-profiler authors + * SPDX-License-Identifier: Apache-2.0 + */ + +#if defined(__arm__) || defined(__thumb__) + +#include +#include +#include "stackFrame.h" +#include "vmStructs.h" + + +uintptr_t& StackFrame::pc() { + return (uintptr_t&)_ucontext->uc_mcontext.arm_pc; +} + +uintptr_t& StackFrame::sp() { + return (uintptr_t&)_ucontext->uc_mcontext.arm_sp; +} + +uintptr_t& StackFrame::fp() { + return (uintptr_t&)_ucontext->uc_mcontext.arm_fp; +} + +uintptr_t& StackFrame::retval() { + return (uintptr_t&)_ucontext->uc_mcontext.arm_r0; +} + +uintptr_t StackFrame::link() { + return (uintptr_t)_ucontext->uc_mcontext.arm_lr; +} + +uintptr_t StackFrame::arg0() { + return (uintptr_t)_ucontext->uc_mcontext.arm_r0; +} + +uintptr_t StackFrame::arg1() { + return (uintptr_t)_ucontext->uc_mcontext.arm_r1; +} + +uintptr_t StackFrame::arg2() { + return (uintptr_t)_ucontext->uc_mcontext.arm_r2; +} + +uintptr_t StackFrame::arg3() { + return (uintptr_t)_ucontext->uc_mcontext.arm_r3; +} + +uintptr_t StackFrame::jarg0() { + // Unimplemented + return 0; +} + +uintptr_t StackFrame::method() { + return (uintptr_t)_ucontext->uc_mcontext.arm_r9; +} + +uintptr_t StackFrame::senderSP() { + return (uintptr_t)_ucontext->uc_mcontext.arm_r4; +} + +void StackFrame::ret() { + pc() = link(); +} + + +bool StackFrame::unwindStub(instruction_t* entry, const char* name, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) { + instruction_t* ip = (instruction_t*)pc; + if (ip == entry || *ip == 0xe12fff1e + || strncmp(name, "itable", 6) == 0 + || strncmp(name, "vtable", 6) == 0 + || strcmp(name, "InlineCacheBuffer") == 0) + { + pc = link(); + return true; + } + return false; +} + +bool StackFrame::unwindCompiled(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) { + instruction_t* ip = (instruction_t*)pc; + instruction_t* entry = (instruction_t*)nm->entry(); + if (ip > entry && ip <= entry + 4 && (*ip & 0xffffff00) == 0xe24dd000) { + // push {r11, lr} + // mov r11, sp (optional) + // -> sub sp, sp, #offs + fp = ((uintptr_t*)sp)[0]; + pc = ((uintptr_t*)sp)[1]; + sp += 8; + return true; + } else if (*ip == 0xe8bd4800) { + // add sp, sp, #offs + // -> pop {r11, lr} + fp = ((uintptr_t*)sp)[0]; + pc = ((uintptr_t*)sp)[1]; + sp += 8; + return true; + } + pc = link(); + return true; +} + +bool StackFrame::unwindPrologue(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) { + instruction_t* ip = (instruction_t*)pc; + instruction_t* entry = (instruction_t*)nm->entry(); + if (ip <= entry) { + pc = link(); + return true; + } + return false; +} + +bool StackFrame::unwindEpilogue(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) { + // Not yet implemented + return false; +} + +bool StackFrame::unwindAtomicStub(const void*& pc) { + // Not needed + return false; +} + +void StackFrame::adjustSP(const void* entry, const void* pc, uintptr_t& sp) { + // Not needed +} + +bool StackFrame::skipFaultInstruction() { + return false; +} + +bool StackFrame::checkInterruptedSyscall() { + return retval() == (uintptr_t)-EINTR; +} + +bool StackFrame::isSyscall(instruction_t* pc) { + // swi #0 + return *pc == 0xef000000; +} + +#endif // defined(__arm__) || defined(__thumb__) diff --git a/ddprof-lib/src/main/cpp/stackFrame_i386.cpp b/ddprof-lib/src/main/cpp/stackFrame_i386.cpp new file mode 100644 index 00000000..a30d16f4 --- /dev/null +++ b/ddprof-lib/src/main/cpp/stackFrame_i386.cpp @@ -0,0 +1,162 @@ +/* + * Copyright The async-profiler authors + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifdef __i386__ + +#include +#include +#include "stackFrame.h" +#include "vmStructs.h" + + +uintptr_t& StackFrame::pc() { + return (uintptr_t&)_ucontext->uc_mcontext.gregs[REG_EIP]; +} + +uintptr_t& StackFrame::sp() { + return (uintptr_t&)_ucontext->uc_mcontext.gregs[REG_ESP]; +} + +uintptr_t& StackFrame::fp() { + return (uintptr_t&)_ucontext->uc_mcontext.gregs[REG_EBP]; +} + +uintptr_t& StackFrame::retval() { + return (uintptr_t&)_ucontext->uc_mcontext.gregs[REG_EAX]; +} + +uintptr_t StackFrame::link() { + // No link register on x86 + return 0; +} + +uintptr_t StackFrame::arg0() { + return stackAt(1); +} + +uintptr_t StackFrame::arg1() { + return stackAt(2); +} + +uintptr_t StackFrame::arg2() { + return stackAt(3); +} + +uintptr_t StackFrame::arg3() { + return stackAt(4); +} + +uintptr_t StackFrame::jarg0() { + // Unimplemented + return 0; +} + +uintptr_t StackFrame::method() { + return _ucontext->uc_mcontext.gregs[REG_ESP]; +} + +uintptr_t StackFrame::senderSP() { + return _ucontext->uc_mcontext.gregs[REG_ESI]; +} + +void StackFrame::ret() { + pc() = stackAt(0); + sp() += 4; +} + + +bool StackFrame::unwindStub(instruction_t* entry, const char* name, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) { + instruction_t* ip = (instruction_t*)pc; + if (ip == entry || *ip == 0xc3 + || strncmp(name, "itable", 6) == 0 + || strncmp(name, "vtable", 6) == 0 + || strcmp(name, "InlineCacheBuffer") == 0) + { + pc = *(uintptr_t*)sp; + sp += 4; + return true; + } else if (entry != NULL && entry[0] == 0x55 && entry[1] == 0x8b && entry[2] == 0xec) { + // The stub begins with + // push ebp + // mov ebp, esp + if (ip == entry + 1) { + pc = ((uintptr_t*)sp)[1]; + sp += 8; + return true; + } else if (withinCurrentStack(fp)) { + sp = fp + 8; + fp = ((uintptr_t*)sp)[-2]; + pc = ((uintptr_t*)sp)[-1]; + return true; + } + } + return false; +} + +bool StackFrame::unwindCompiled(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) { + instruction_t* ip = (instruction_t*)pc; + instruction_t* entry = (instruction_t*)nm->entry(); + if (ip <= entry + || *ip == 0xc3 // ret + || *ip == 0x55 // push ebp + || ip[-1] == 0x5d) // after pop ebp + { + pc = *(uintptr_t*)sp; + sp += 4; + return true; + } else if (*ip == 0x5d) { + // pop ebp + fp = ((uintptr_t*)sp)[0]; + pc = ((uintptr_t*)sp)[1]; + sp += 8; + return true; + } + return false; +} + +bool StackFrame::unwindPrologue(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) { + instruction_t* ip = (instruction_t*)pc; + instruction_t* entry = (instruction_t*)nm->entry(); + if (ip <= entry || *ip == 0x55) { // push ebp + pc = *(uintptr_t*)sp; + sp += 4; + return true; + } + return false; +} + +bool StackFrame::unwindEpilogue(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) { + instruction_t* ip = (instruction_t*)pc; + if (*ip == 0xc3) { // ret + pc = *(uintptr_t*)sp; + sp += 4; + return true; + } + return false; +} + +bool StackFrame::unwindAtomicStub(const void*& pc) { + // Not needed + return false; +} + +void StackFrame::adjustSP(const void* entry, const void* pc, uintptr_t& sp) { + // Not needed +} + +bool StackFrame::skipFaultInstruction() { + return false; +} + +bool StackFrame::checkInterruptedSyscall() { + return retval() == (uintptr_t)-EINTR; +} + +bool StackFrame::isSyscall(instruction_t* pc) { + // int 0x80 + return pc[0] == 0xcd && pc[1] == 0x80; +} + +#endif // __i386__ diff --git a/ddprof-lib/src/main/cpp/stackFrame_loongarch64.cpp b/ddprof-lib/src/main/cpp/stackFrame_loongarch64.cpp new file mode 100644 index 00000000..99c15ace --- /dev/null +++ b/ddprof-lib/src/main/cpp/stackFrame_loongarch64.cpp @@ -0,0 +1,116 @@ +/* + * Copyright The async-profiler authors + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifdef __loongarch_lp64 + +#include +#include +#include +#include "stackFrame.h" + +#define REG(l) _ucontext->uc_mcontext.__gregs[l] + +uintptr_t& StackFrame::pc() { + return (uintptr_t&)_ucontext->uc_mcontext.__pc; +} + +uintptr_t& StackFrame::sp() { + return (uintptr_t&)REG(LARCH_REG_SP); +} + +uintptr_t& StackFrame::fp() { + return (uintptr_t&)REG(22); +} + +uintptr_t& StackFrame::retval() { + return (uintptr_t&)REG(LARCH_REG_A0); +} + +uintptr_t StackFrame::link() { + return (uintptr_t)REG(LARCH_REG_RA); +} + +uintptr_t StackFrame::arg0() { + return (uintptr_t)REG(LARCH_REG_A0); +} + +uintptr_t StackFrame::arg1() { + return (uintptr_t)REG(LARCH_REG_A0 + 1); +} + +uintptr_t StackFrame::arg2() { + return (uintptr_t)REG(LARCH_REG_A0 + 2); +} + +uintptr_t StackFrame::arg3() { + return (uintptr_t)REG(LARCH_REG_A0 + 3); +} + +uintptr_t StackFrame::jarg0() { + return (uintptr_t)REG(12); +} + +uintptr_t StackFrame::method() { + return (uintptr_t)REG(26); +} + +uintptr_t StackFrame::senderSP() { + return (uintptr_t)REG(27); +} + +void StackFrame::ret() { + pc() = link(); +} + +bool StackFrame::unwindStub(instruction_t* entry, const char* name, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) { + instruction_t* ip = (instruction_t*)pc; + if (ip == entry + || strncmp(name, "itable", 6) == 0 + || strncmp(name, "vtable", 6) == 0 + || strcmp(name, "InlineCacheBuffer") == 0) + { + pc = link(); + return true; + } + return false; +} + +bool StackFrame::unwindCompiled(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) { + // Not yet implemented + return false; +} + +bool StackFrame::unwindPrologue(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) { + // Not yet implemented + return false; +} + +bool StackFrame::unwindEpilogue(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) { + // Not yet implemented + return false; +} + +bool StackFrame::unwindAtomicStub(const void*& pc) { + // Not needed + return false; +} + +void StackFrame::adjustSP(const void* entry, const void* pc, uintptr_t& sp) { + // Not yet implemented +} + +bool StackFrame::skipFaultInstruction() { + return false; +} + +bool StackFrame::checkInterruptedSyscall() { + return retval() == (uintptr_t)-EINTR; +} + +bool StackFrame::isSyscall(instruction_t* pc) { + return (*pc) == 0x002b0000; +} + +#endif // __loongarch_lp64 diff --git a/ddprof-lib/src/main/cpp/stackFrame_ppc64.cpp b/ddprof-lib/src/main/cpp/stackFrame_ppc64.cpp new file mode 100644 index 00000000..ad20ed5b --- /dev/null +++ b/ddprof-lib/src/main/cpp/stackFrame_ppc64.cpp @@ -0,0 +1,162 @@ +/* + * Copyright The async-profiler authors + * SPDX-License-Identifier: Apache-2.0 + */ + +#if defined(__PPC64__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + +#include +#include +#include "stackFrame.h" + + +uintptr_t& StackFrame::pc() { + return (uintptr_t&)_ucontext->uc_mcontext.regs->nip; +} + +uintptr_t& StackFrame::sp() { + return (uintptr_t&)_ucontext->uc_mcontext.regs->gpr[1]; +} + +uintptr_t& StackFrame::fp() { + return *((uintptr_t*)_ucontext->uc_mcontext.regs->gpr[1]); +} + +uintptr_t& StackFrame::retval() { + return (uintptr_t&)_ucontext->uc_mcontext.regs->gpr[3]; +} + +uintptr_t StackFrame::link() { + return (uintptr_t)_ucontext->uc_mcontext.regs->link; +} + +uintptr_t StackFrame::arg0() { + return (uintptr_t)_ucontext->uc_mcontext.regs->gpr[3]; +} + +uintptr_t StackFrame::arg1() { + return (uintptr_t)_ucontext->uc_mcontext.regs->gpr[4]; +} + +uintptr_t StackFrame::arg2() { + return (uintptr_t)_ucontext->uc_mcontext.regs->gpr[5]; +} + +uintptr_t StackFrame::arg3() { + return (uintptr_t)_ucontext->uc_mcontext.regs->gpr[6]; +} + +uintptr_t StackFrame::jarg0() { + // Unimplemented + return 0; +} + +uintptr_t StackFrame::method() { + // Unimplemented + return 0; +} + +uintptr_t StackFrame::senderSP() { + // Unimplemented + return 0; +} + +void StackFrame::ret() { + pc() = link(); +} + +static inline bool inC1EpilogueCrit(uintptr_t pc) { + if (!(pc & 0xfff)) { + // Make sure we are not at the page boundary, so that reading [pc - 1] is safe + return false; + } + // C1 epilogue and critical section (posX) + // 3821**** add r1,r1,xx + // pos3 xxxxxxxx + // pos2 1000e1eb ld r31,16(r1) + // pos1 a603e87f mtlr r31 + // xxxxxxxx + // 2000804e blr + instruction_t* inst = (instruction_t*)pc; + if (inst[ 1] == 0xebe10010 && inst[2] == 0x7fe803a6 || + inst[ 0] == 0xebe10010 && inst[1] == 0x7fe803a6 || + inst[-1] == 0xebe10010 && inst[0] == 0x7fe803a6) { + return true; + } + + return false; // not in critical section +} + +static inline bool inC2PrologueCrit(uintptr_t pc) { + // C2 prologue and critical section + // f821**** stdu r1, (xx)r1 + // pos1 fa950010 std r20,16(r21) + instruction_t* inst = (instruction_t*)pc; + if (inst[0] == 0xfa950010 && (inst[-1] & 0xffff0000) == 0xf8210000) { + return true; + } + + return false; // not in critical section +} + + +bool StackFrame::unwindStub(instruction_t* entry, const char* name, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) { + pc = link(); + return true; +} + +bool StackFrame::unwindCompiled(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) { + // On PPC there is a valid back link to the previous frame at all times. The callee stores + // the return address in the caller's frame before it constructs its own frame. After it + // has destroyed its frame it restores the link register and returns. A problematic sequence + // is the prologue/epilogue of a compiled method before/after frame construction/destruction. + // Therefore popping the frame would not help here, as it is not yet/anymore present, rather + // more adjusting the pc to the callers pc does the trick. There are two exceptions to this, + // One in the prologue of C2 compiled methods and one in the epilogue of C1 compiled methods. + if (inC1EpilogueCrit(pc)) { + // lr not yet set: use the value stored in the frame + pc = ((uintptr_t*)sp)[2]; + } else if (inC2PrologueCrit(pc)) { + // frame constructed but lr not yet stored in it: just do it here + *(((unsigned long *) _ucontext->uc_mcontext.regs->gpr[21]) + 2) = (unsigned long) _ucontext->uc_mcontext.regs->gpr[20]; + } else { + // most probably caller's framer is still on top but pc is already in callee: use caller's pc + pc = link(); + } + + return true; +} + +bool StackFrame::unwindPrologue(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) { + // Not yet implemented + return false; +} + +bool StackFrame::unwindEpilogue(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) { + // Not yet implemented + return false; +} + +bool StackFrame::unwindAtomicStub(const void*& pc) { + // Not needed + return false; +} + +void StackFrame::adjustSP(const void* entry, const void* pc, uintptr_t& sp) { + // Not needed +} + +bool StackFrame::skipFaultInstruction() { + return false; +} + +bool StackFrame::checkInterruptedSyscall() { + return retval() == (uintptr_t)-EINTR; +} + +bool StackFrame::isSyscall(instruction_t* pc) { + // sc/svc + return (*pc & 0x1f) == 17; +} + +#endif // defined(__PPC64__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) diff --git a/ddprof-lib/src/main/cpp/stackFrame_riscv64.cpp b/ddprof-lib/src/main/cpp/stackFrame_riscv64.cpp new file mode 100644 index 00000000..54454177 --- /dev/null +++ b/ddprof-lib/src/main/cpp/stackFrame_riscv64.cpp @@ -0,0 +1,118 @@ +/* + * Copyright The async-profiler authors + * SPDX-License-Identifier: Apache-2.0 + */ + +#if defined(__riscv) && (__riscv_xlen == 64) + +#include +#include +#include +#include "stackFrame.h" + +#define REG(l) _ucontext->uc_mcontext.__gregs[l] + +uintptr_t& StackFrame::pc() { + return (uintptr_t&)REG(REG_PC); +} + +uintptr_t& StackFrame::sp() { + return (uintptr_t&)REG(REG_SP); +} + +uintptr_t& StackFrame::fp() { + return (uintptr_t&)REG(REG_S0); +} + +uintptr_t& StackFrame::retval() { + return (uintptr_t&)REG(REG_A0); +} + +uintptr_t StackFrame::link() { + return (uintptr_t)REG(REG_RA); +} + +uintptr_t StackFrame::arg0() { + return (uintptr_t)REG(REG_A0); +} + +uintptr_t StackFrame::arg1() { + return (uintptr_t)REG(REG_A0 + 1); +} + +uintptr_t StackFrame::arg2() { + return (uintptr_t)REG(REG_A0 + 2); +} + +uintptr_t StackFrame::arg3() { + return (uintptr_t)REG(REG_A0 + 3); +} + +uintptr_t StackFrame::jarg0() { + return arg1(); +} + +uintptr_t StackFrame::method() { + return (uintptr_t)REG(31); +} + +uintptr_t StackFrame::senderSP() { + return (uintptr_t)REG(19); +} + +void StackFrame::ret() { + pc() = link(); +} + +bool StackFrame::unwindStub(instruction_t* entry, const char* name, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) { + instruction_t* ip = (instruction_t*)pc; + if (ip == entry + || strncmp(name, "itable", 6) == 0 + || strncmp(name, "vtable", 6) == 0 + || strcmp(name, "InlineCacheBuffer") == 0) + { + pc = link(); + return true; + } + return false; +} + +bool StackFrame::unwindCompiled(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) { + // Not yet implemented + return false; +} + +bool StackFrame::unwindPrologue(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) { + // Not yet implemented + return false; +} + +bool StackFrame::unwindEpilogue(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) { + // Not yet implemented + return false; +} + +bool StackFrame::unwindAtomicStub(const void*& pc) { + // Not needed + return false; +} + +void StackFrame::adjustSP(const void* entry, const void* pc, uintptr_t& sp) { + // Not yet implemented +} + +bool StackFrame::skipFaultInstruction() { + return false; +} + +bool StackFrame::checkInterruptedSyscall() { + return retval() == (uintptr_t)-EINTR; +} + +bool StackFrame::isSyscall(instruction_t* pc) { + // RISC-V ISA uses ECALL for doing both syscalls and debugger + // calls, so this might technically mismatch. + return (*pc) == 0x00000073; +} + +#endif // riscv diff --git a/ddprof-lib/src/main/cpp/stackFrame_x64.cpp b/ddprof-lib/src/main/cpp/stackFrame_x64.cpp new file mode 100644 index 00000000..7e61a266 --- /dev/null +++ b/ddprof-lib/src/main/cpp/stackFrame_x64.cpp @@ -0,0 +1,322 @@ +/* + * Copyright The async-profiler authors + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifdef __x86_64__ + +#include +#include +#include +#include "stackFrame.h" +#include "vmStructs.h" + + +#ifdef __APPLE__ +# define REG(l, m) _ucontext->uc_mcontext->__ss.__##m +#else +# define REG(l, m) _ucontext->uc_mcontext.gregs[REG_##l] +#endif + + +uintptr_t& StackFrame::pc() { + return (uintptr_t&)REG(RIP, rip); +} + +uintptr_t& StackFrame::sp() { + return (uintptr_t&)REG(RSP, rsp); +} + +uintptr_t& StackFrame::fp() { + return (uintptr_t&)REG(RBP, rbp); +} + +uintptr_t& StackFrame::retval() { + return (uintptr_t&)REG(RAX, rax); +} + +uintptr_t StackFrame::link() { + // No link register on x86 + return 0; +} + +uintptr_t StackFrame::arg0() { + return (uintptr_t)REG(RDI, rdi); +} + +uintptr_t StackFrame::arg1() { + return (uintptr_t)REG(RSI, rsi); +} + +uintptr_t StackFrame::arg2() { + return (uintptr_t)REG(RDX, rdx); +} + +uintptr_t StackFrame::arg3() { + return (uintptr_t)REG(RCX, rcx); +} + +uintptr_t StackFrame::jarg0() { + return arg1(); +} + +uintptr_t StackFrame::method() { + return (uintptr_t)REG(RBX, rbx); +} + +uintptr_t StackFrame::senderSP() { + return (uintptr_t)REG(R13, r13); +} + +void StackFrame::ret() { + pc() = stackAt(0); + sp() += 8; +} + + +__attribute__((no_sanitize("address"))) bool StackFrame::unwindStub(instruction_t* entry, const char* name, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) { + instruction_t* ip = (instruction_t*)pc; + if (ip == entry || *ip == 0xc3 + || strncmp(name, "itable", 6) == 0 + || strncmp(name, "vtable", 6) == 0 + || strcmp(name, "InlineCacheBuffer") == 0) + { + pc = ((uintptr_t*)sp)[0] - 1; + sp += 8; + return true; + } else if (entry != NULL && ([&] { unsigned int val; memcpy(&val, entry, sizeof(val)); return val; }()) == 0xec8b4855) { + // The stub begins with + // push rbp + // mov rbp, rsp + if (ip == entry + 1) { + pc = ((uintptr_t*)sp)[1] - 1; + sp += 16; + return true; + } else if (withinCurrentStack(fp)) { + sp = fp + 16; + fp = ((uintptr_t*)sp)[-2]; + pc = ((uintptr_t*)sp)[-1] - 1; + return true; + } + } + return false; +} + +bool StackFrame::unwindCompiled(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) { + instruction_t* ip = (instruction_t*)pc; + instruction_t* entry = (instruction_t*)nm->entry(); + if (ip <= entry + || *ip == 0xc3 // ret + || *ip == 0x55 // push rbp + || ip[-1] == 0x5d // after pop rbp + || (ip[0] == 0x41 && ip[1] == 0x85 && ip[2] == 0x02 && ip[3] == 0xc3)) // poll return + { + // Subtract 1 for PC to point to the call instruction, + // otherwise it may be attributed to a wrong bytecode + pc = ((uintptr_t*)sp)[0] - 1; + sp += 8; + return true; + } else if (*ip == 0x5d) { + // pop rbp + fp = ((uintptr_t*)sp)[0]; + pc = ((uintptr_t*)sp)[1] - 1; + sp += 16; + return true; + } else if (ip <= entry + 15 && ((uintptr_t)ip & 0xfff) && ip[-1] == 0x55) { + // push rbp + pc = ((uintptr_t*)sp)[1] - 1; + sp += 16; + return true; + } else if (ip <= entry + 7 && ip[0] == 0x48 && ip[1] == 0x89 && ip[2] == 0x6c && ip[3] == 0x24) { + // mov [rsp + #off], rbp + sp += ip[4] + 16; + pc = ((uintptr_t*)sp)[-1] - 1; + return true; + } else if ((ip[0] == 0x41 && ip[1] == 0x81 && ip[2] == 0x7f && *(u32*)(ip + 4) == 1) || + (ip >= entry + 8 && ip[-8] == 0x41 && ip[-7] == 0x81 && ip[-6] == 0x7f && *(u32*)(ip - 4) == 1)) { + // cmp [r15 + #off], 1 + // nmethod_entry_barrier: frame is fully constructed here + sp += nm->frameSize() * sizeof(void*); + fp = ((uintptr_t*)sp)[-2]; + pc = ((uintptr_t*)sp)[-1]; + return true; + } + return false; +} + +static inline bool isFrameComplete(instruction_t* entry, instruction_t* ip) { + // Frame is fully constructed after rsp is decremented by the frame size. + // Check if there is such an instruction anywhere between + // the method entry and the current instruction pointer. + for (ip -= 4; ip >= entry; ip--) { + if (ip[0] == 0x48 && ip[2] == 0xec && (ip[1] & 0xfd) == 0x81) { // sub rsp, frame_size + return true; + } + } + return false; +} + +bool StackFrame::unwindPrologue(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) { + // 0: mov %eax,-0x14000(%rsp) + // 7: push %rbp + // 8: mov %rsp,%rbp ; for native methods only + // 11: sub $0x50,%rsp + instruction_t* ip = (instruction_t*)pc; + instruction_t* entry = (instruction_t*)nm->entry(); + if (ip <= entry || *ip == 0x55 || nm->frameSize() == 0) { // push rbp + pc = ((uintptr_t*)sp)[0] - 1; + sp += 8; + return true; + } else if (ip <= entry + 15 && ip[-1] == 0x55) { // right after push rbp + pc = ((uintptr_t*)sp)[1] - 1; + sp += 16; + return true; + } else if (ip <= entry + 31 && isFrameComplete(entry, ip)) { + sp += nm->frameSize() * sizeof(void*); + fp = ((uintptr_t*)sp)[-2]; + pc = ((uintptr_t*)sp)[-1]; + return true; + } + return false; +} + +static inline bool isPollReturn(instruction_t* ip) { + // JDK 17+ + // pop %rbp + // cmp 0x348(%r15),%rsp + // ja offset_32 + // ret + if (ip[0] == 0x49 && ip[1] == 0x3b && (ip[2] == 0x67 || ip[2] == 0xa7) && ip[-1] == 0x5d) { + // cmp, preceded by pop rbp + return true; + } else if (ip[0] == 0x0f && ip[1] == 0x87 && ip[6] == 0xc3) { + // ja, followed by ret + return true; + } + + // JDK 11 + // pop %rbp + // mov 0x108(%r15),%r10 + // test %eax,(%r10) + // ret + if (ip[0] == 0x4d && ip[1] == 0x8b && ip[2] == 0x97 && ip[-1] == 0x5d) { + // mov, preceded by pop rbp + return true; + } else if (ip[0] == 0x41 && ip[1] == 0x85 && ip[2] == 0x02 && ip[3] == 0xc3) { + // test, followed by ret + return true; + } + + // JDK 8 + // pop %rbp + // test %eax,offset(%rip) + // ret + if (ip[0] == 0x85 && ip[1] == 0x05 && ip[6] == 0xc3) { + // test, followed by ret + return true; + } + + return false; +} + +bool StackFrame::unwindEpilogue(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) { + // add $0x40,%rsp + // pop %rbp + // {poll_return} + // ret + instruction_t* ip = (instruction_t*)pc; + if (*ip == 0xc3 || isPollReturn(ip)) { // ret + pc = ((uintptr_t*)sp)[0] - 1; + sp += 8; + return true; + } else if (*ip == 0x5d) { // pop rbp + fp = ((uintptr_t*)sp)[0]; + pc = ((uintptr_t*)sp)[1] - 1; + sp += 16; + return true; + } + return false; +} + +bool StackFrame::unwindAtomicStub(const void*& pc) { + // Not needed + return false; +} + +void StackFrame::adjustSP(const void* entry, const void* pc, uintptr_t& sp) { + // Not needed +} + +// Skip failed MOV instruction by writing 0 to destination register +bool StackFrame::skipFaultInstruction() { + unsigned int insn = *(unsigned int*)pc(); + if ((insn & 0x80fff8) == 0x008b48) { + // mov r64, [r64 + offs] + unsigned int reg = ((insn << 1) & 8) | ((insn >> 19) & 7); + switch (reg) { + case 0x0: REG(RAX, rax) = 0; break; + case 0x1: REG(RCX, rcx) = 0; break; + case 0x2: REG(RDX, rdx) = 0; break; + case 0x3: REG(RBX, rbx) = 0; break; + case 0x4: return false; // Do not modify RSP + case 0x5: REG(RBP, rbp) = 0; break; + case 0x6: REG(RSI, rsi) = 0; break; + case 0x7: REG(RDI, rdi) = 0; break; + case 0x8: REG(R8 , r8 ) = 0; break; + case 0x9: REG(R9 , r9 ) = 0; break; + case 0xa: REG(R10, r10) = 0; break; + case 0xb: REG(R11, r11) = 0; break; + case 0xc: REG(R12, r12) = 0; break; + case 0xd: REG(R13, r13) = 0; break; + case 0xe: REG(R14, r14) = 0; break; + case 0xf: REG(R15, r15) = 0; break; + } + + unsigned int insn_size = 3; + if ((insn & 0x070000) == 0x040000) insn_size++; + if ((insn & 0x400000) == 0x400000) insn_size++; + pc() += insn_size; + return true; + } + return false; +} + +__attribute__((no_sanitize("address"))) bool StackFrame::checkInterruptedSyscall() { +#ifdef __APPLE__ + // We are not interested in syscalls that do not check error code, e.g. semaphore_wait_trap + if (*(instruction_t*)pc() == 0xc3) { + return true; + } + // If CF is set, the error code is in low byte of eax, + // some other syscalls (ulock_wait) do not set CF when interrupted + if (REG(EFL, rflags) & 1) { + return (retval() & 0xff) == EINTR || (retval() & 0xff) == ETIMEDOUT; + } else { + return retval() == (uintptr_t)-EINTR; + } +#else + if (retval() == (uintptr_t)-EINTR) { + // Workaround for JDK-8237858: restart the interrupted poll() manually. + // Check if the previous instruction is mov eax, SYS_poll with infinite timeout or + // mov eax, SYS_ppoll with any timeout (ppoll adjusts timeout automatically) + uintptr_t pc = this->pc(); + if ((pc & 0xfff) >= 7 && *(instruction_t*)(pc - 7) == 0xb8) { + int nr = ([&] { int val; memcpy(&val, (const void*)(pc - 6), sizeof(val)); return val; }()); + if (nr == SYS_ppoll + || (nr == SYS_poll && (int)REG(RDX, rdx) == -1) + || (nr == SYS_epoll_wait && (int)REG(R10, r10) == -1) + || (nr == SYS_epoll_pwait && (int)REG(R10, r10) == -1)) { + this->pc() = pc - 7; + } + } + return true; + } + return false; +#endif +} + +bool StackFrame::isSyscall(instruction_t* pc) { + return pc[0] == 0x0f && pc[1] == 0x05; +} + +#endif // __x86_64__ diff --git a/ddprof-lib/src/main/cpp/stackWalker.cpp b/ddprof-lib/src/main/cpp/stackWalker.cpp new file mode 100644 index 00000000..0f1f038c --- /dev/null +++ b/ddprof-lib/src/main/cpp/stackWalker.cpp @@ -0,0 +1,590 @@ +/* + * Copyright The async-profiler authors + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include "stackWalker.h" +#include "dwarf.h" +#include "profiler.h" +#include "safeAccess.h" +#include "stackFrame.h" +#include "symbols.h" +#include "vmStructs.h" + + +const uintptr_t SAME_STACK_DISTANCE = 8192; +const uintptr_t MAX_WALK_SIZE = 0x100000; +const intptr_t MAX_INTERPRETER_FRAME_SIZE = 0x1000; + +static ucontext_t empty_ucontext{}; + +// Use validation helpers from header (shared with tests) +using StackWalkValidation::inDeadZone; +using StackWalkValidation::aligned; +using StackWalkValidation::MAX_FRAME_SIZE; + +static inline bool sameStack(void* hi, void* lo) { + return (uintptr_t)hi - (uintptr_t)lo < SAME_STACK_DISTANCE; +} + +// AArch64: on Linux, frame link is stored at the top of the frame, +// while on macOS, frame link is at the bottom. +static inline uintptr_t defaultSenderSP(uintptr_t sp, uintptr_t fp) { +#ifdef __APPLE__ + return sp + 2 * sizeof(void*); +#else + return fp; +#endif +} + +static inline void fillFrame(ASGCT_CallFrame& frame, ASGCT_CallFrameType type, const char* name) { + frame.bci = type; + frame.method_id = (jmethodID)name; +} + +static inline void fillFrame(ASGCT_CallFrame& frame, ASGCT_CallFrameType type, u32 class_id) { + frame.bci = type; + frame.method_id = (jmethodID)(uintptr_t)class_id; +} + +static inline void fillFrame(ASGCT_CallFrame& frame, FrameTypeId type, int bci, jmethodID method) { + frame.bci = FrameType::encode(type, bci); + frame.method_id = method; +} + +static jmethodID getMethodId(VMMethod* method) { + if (!inDeadZone(method) && aligned((uintptr_t)method)) { + return method->validatedId(); + } + return NULL; +} + + +int StackWalker::walkFP(void* ucontext, const void** callchain, int max_depth, StackContext* java_ctx) { + const void* pc; + uintptr_t fp; + uintptr_t sp; + uintptr_t bottom = (uintptr_t)&sp + MAX_WALK_SIZE; + + StackFrame frame(ucontext); + if (ucontext == NULL) { + pc = callerPC(); + fp = (uintptr_t)callerFP(); + sp = (uintptr_t)callerSP(); + } else { + pc = (const void*)frame.pc(); + fp = frame.fp(); + sp = frame.sp(); + } + + int depth = 0; + + // Walk until the bottom of the stack or until the first Java frame + while (depth < max_depth) { + if (CodeHeap::contains(pc) && !(depth == 0 && frame.unwindAtomicStub(pc)) && + VMThread::current() != nullptr) { // If it is not a JVM thread, it cannot have Java frame + java_ctx->set(pc, sp, fp); + break; + } + + callchain[depth++] = pc; + + // Check if the next frame is below on the current stack + if (fp < sp || fp >= sp + MAX_FRAME_SIZE || fp >= bottom) { + break; + } + + // Frame pointer must be word aligned + if (!aligned(fp)) { + break; + } + + pc = stripPointer(SafeAccess::load((void**)fp + FRAME_PC_SLOT)); + if (inDeadZone(pc)) { + break; + } + + sp = fp + (FRAME_PC_SLOT + 1) * sizeof(void*); + fp = *(uintptr_t*)fp; + } + + return depth; +} + +int StackWalker::walkDwarf(void* ucontext, const void** callchain, int max_depth, StackContext* java_ctx) { + const void* pc; + uintptr_t fp; + uintptr_t sp; + uintptr_t bottom = (uintptr_t)&sp + MAX_WALK_SIZE; + + StackFrame frame(ucontext); + if (ucontext == NULL) { + pc = callerPC(); + fp = (uintptr_t)callerFP(); + sp = (uintptr_t)callerSP(); + } else { + pc = (const void*)frame.pc(); + fp = frame.fp(); + sp = frame.sp(); + } + + int depth = 0; + Profiler* profiler = Profiler::instance(); + + // Walk until the bottom of the stack or until the first Java frame + while (depth < max_depth) { + if (CodeHeap::contains(pc) && !(depth == 0 && frame.unwindAtomicStub(pc)) && + VMThread::current() != nullptr) { // If it is not a JVM thread, it cannot have Java frame + // Don't dereference pc as it may point to unreadable memory + // frame.adjustSP(page_start, pc, sp); + java_ctx->set(pc, sp, fp); + break; + } + + callchain[depth++] = pc; + + uintptr_t prev_sp = sp; + CodeCache* cc = profiler->findLibraryByAddress(pc); + FrameDesc f = cc != NULL ? cc->findFrameDesc(pc) : FrameDesc::default_frame; + + u8 cfa_reg = (u8)f.cfa; + int cfa_off = f.cfa >> 8; + if (cfa_reg == DW_REG_SP) { + sp = sp + cfa_off; + } else if (cfa_reg == DW_REG_FP) { + sp = fp + cfa_off; + } else if (cfa_reg == DW_REG_PLT) { + sp += ((uintptr_t)pc & 15) >= 11 ? cfa_off * 2 : cfa_off; + } else { + break; + } + + // Check if the next frame is below on the current stack + if (sp < prev_sp || sp >= prev_sp + MAX_FRAME_SIZE || sp >= bottom) { + break; + } + + // Stack pointer must be word aligned + if (!aligned(sp)) { + break; + } + + const void* prev_pc = pc; + if (f.fp_off & DW_PC_OFFSET) { + pc = (const char*)pc + (f.fp_off >> 1); + } else { + if (f.fp_off != DW_SAME_FP && f.fp_off < MAX_FRAME_SIZE && f.fp_off > -MAX_FRAME_SIZE) { + fp = (uintptr_t)SafeAccess::load((void**)(sp + f.fp_off)); + } + + if (EMPTY_FRAME_SIZE > 0 || f.pc_off != DW_LINK_REGISTER) { + pc = stripPointer(SafeAccess::load((void**)(sp + f.pc_off))); + } else if (depth == 1) { + pc = (const void*)frame.link(); + } else { + break; + } + + if (EMPTY_FRAME_SIZE == 0 && cfa_off == 0 && f.fp_off != DW_SAME_FP) { + // AArch64 default_frame + sp = defaultSenderSP(sp, fp); + if (sp < prev_sp || sp >= bottom || !aligned(sp)) { + break; + } + } + } + + if (inDeadZone(pc) || (pc == prev_pc && sp == prev_sp)) { + break; + } + } + + return depth; +} + +__attribute__((no_sanitize("address"))) int StackWalker::walkVM(void* ucontext, ASGCT_CallFrame* frames, int max_depth, + StackWalkFeatures features, EventType event_type) { + if (ucontext == NULL) { + return walkVM(&empty_ucontext, frames, max_depth, features, event_type, + callerPC(), (uintptr_t)callerSP(), (uintptr_t)callerFP()); + } else { + StackFrame frame(ucontext); + return walkVM(ucontext, frames, max_depth, features, event_type, + (const void*)frame.pc(), frame.sp(), frame.fp()); + } +} + +__attribute__((no_sanitize("address"))) int StackWalker::walkVM(void* ucontext, ASGCT_CallFrame* frames, int max_depth, JavaFrameAnchor* anchor, EventType event_type) { + uintptr_t sp = anchor->lastJavaSP(); + if (sp == 0) { + return 0; + } + + uintptr_t fp = anchor->lastJavaFP(); + if (fp == 0) { + fp = sp; + } + + const void* pc = anchor->lastJavaPC(); + if (pc == NULL) { + pc = ((const void**)sp)[-1]; + } + + StackWalkFeatures no_features{}; + return walkVM(ucontext, frames, max_depth, no_features, event_type, pc, sp, fp); +} + +__attribute__((no_sanitize("address"))) int StackWalker::walkVM(void* ucontext, ASGCT_CallFrame* frames, int max_depth, + StackWalkFeatures features, EventType event_type, + const void* pc, uintptr_t sp, uintptr_t fp) { + StackFrame frame(ucontext); + uintptr_t bottom = (uintptr_t)&frame + MAX_WALK_SIZE; + + Profiler* profiler = Profiler::instance(); + int bcp_offset = InterpreterFrame::bcp_offset(); + + jmp_buf crash_protection_ctx; + VMThread* vm_thread = VMThread::current(); + void* saved_exception = vm_thread != NULL ? vm_thread->exception() : NULL; + + // Should be preserved across setjmp/longjmp + volatile int depth = 0; + + JavaFrameAnchor* anchor = NULL; + if (vm_thread != NULL) { + anchor = vm_thread->anchor(); + vm_thread->exception() = &crash_protection_ctx; + if (setjmp(crash_protection_ctx) != 0) { + vm_thread->exception() = saved_exception; + if (depth < max_depth) { + fillFrame(frames[depth++], BCI_ERROR, "break_not_walkable"); + } + return depth; + } + } + + const void* prev_native_pc = NULL; + // Show extended frame types and stub frames for execution-type events + bool details = event_type <= MALLOC_SAMPLE || features.mixed; + + if (details && vm_thread != NULL && vm_thread->isJavaThread()) { + anchor = vm_thread->anchor(); + } + + unwind_loop: + + // Walk until the bottom of the stack or until the first Java frame + while (depth < max_depth) { + if (CodeHeap::contains(pc)) { + // If we're in JVM-generated code but don't have a VMThread, we cannot safely + // walk the Java stack because crash protection is not set up. + // + // This can occur during JNI attach/detach transitions: when a thread detaches, + // pthread_setspecific() clears the VMThread TLS, but if a profiling signal arrives + // while PC is still in JVM stubs (JavaCalls, method entry/exit), we see CodeHeap + // code without VMThread context. + // + // Without vm_thread, crash protection via setjmp/longjmp cannot work + // (checkFault() needs vm_thread->exception() to longjmp). Any memory dereference in interpreter + // frame handling or NMethod validation would crash the process with unrecoverable SEGV. + // + // The missing VMThread is a timing issue during thread lifecycle. + if (vm_thread == NULL) { + fillFrame(frames[depth++], BCI_ERROR, "break_no_vmthread"); + break; + } + prev_native_pc = NULL; // we are in JVM code, no previous 'native' PC + NMethod* nm = CodeHeap::findNMethod(pc); + if (nm == NULL) { + if (anchor == NULL) { + // Add an error frame only if we cannot recover + fillFrame(frames[depth++], BCI_ERROR, "unknown_nmethod"); + } + break; + } + + // Always prefer JavaFrameAnchor when it is available, + // since it provides reliable SP and FP. + // Do not treat the topmost stub as Java frame. + if (anchor != NULL && (depth > 0 || !nm->isStub())) { + if (anchor->getFrame(pc, sp, fp) && !nm->contains(pc)) { + anchor = NULL; + continue; // NMethod has changed as a result of correction + } + anchor = NULL; + } + + if (nm->isNMethod()) { + int level = nm->level(); + FrameTypeId type = details && level >= 1 && level <= 3 ? FRAME_C1_COMPILED : FRAME_JIT_COMPILED; + fillFrame(frames[depth++], type, 0, nm->method()->id()); + + if (nm->isFrameCompleteAt(pc)) { + if (depth == 1 && frame.unwindEpilogue(nm, (uintptr_t&)pc, sp, fp)) { + continue; + } + + int scope_offset = nm->findScopeOffset(pc); + if (scope_offset > 0) { + depth--; + ScopeDesc scope(nm); + do { + scope_offset = scope.decode(scope_offset); + if (details) { + type = scope_offset > 0 ? FRAME_INLINED : + level >= 1 && level <= 3 ? FRAME_C1_COMPILED : FRAME_JIT_COMPILED; + } + fillFrame(frames[depth++], type, scope.bci(), scope.method()->id()); + } while (scope_offset > 0 && depth < max_depth); + } + + // Handle situations when sp is temporarily changed in the compiled code + frame.adjustSP(nm->entry(), pc, sp); + + sp += nm->frameSize() * sizeof(void*); + fp = ((uintptr_t*)sp)[-FRAME_PC_SLOT - 1]; + pc = ((const void**)sp)[-FRAME_PC_SLOT]; + continue; + } else if (frame.unwindPrologue(nm, (uintptr_t&)pc, sp, fp)) { + continue; + } + + fillFrame(frames[depth++], BCI_ERROR, "break_compiled"); + break; + } else if (nm->isInterpreter()) { + if (vm_thread != NULL && vm_thread->inDeopt()) { + fillFrame(frames[depth++], BCI_ERROR, "break_deopt"); + break; + } + + bool is_plausible_interpreter_frame = !inDeadZone((const void*)fp) && aligned(fp) + && sp > fp - MAX_INTERPRETER_FRAME_SIZE + && sp < fp + bcp_offset * sizeof(void*); + + if (is_plausible_interpreter_frame) { + VMMethod* method = ((VMMethod**)fp)[InterpreterFrame::method_offset]; + jmethodID method_id = getMethodId(method); + if (method_id != NULL) { + const char* bytecode_start = method->bytecode(); + const char* bcp = ((const char**)fp)[bcp_offset]; + int bci = bytecode_start == NULL || bcp < bytecode_start ? 0 : bcp - bytecode_start; + fillFrame(frames[depth++], FRAME_INTERPRETED, bci, method_id); + + sp = ((uintptr_t*)fp)[InterpreterFrame::sender_sp_offset]; + pc = stripPointer(((void**)fp)[FRAME_PC_SLOT]); + fp = *(uintptr_t*)fp; + continue; + } + } + + if (depth == 0) { + VMMethod* method = (VMMethod*)frame.method(); + jmethodID method_id = getMethodId(method); + if (method_id != NULL) { + fillFrame(frames[depth++], FRAME_INTERPRETED, 0, method_id); + + if (is_plausible_interpreter_frame) { + pc = stripPointer(((void**)fp)[FRAME_PC_SLOT]); + sp = frame.senderSP(); + fp = *(uintptr_t*)fp; + } else { + pc = stripPointer(*(void**)sp); + sp = frame.senderSP(); + } + continue; + } + } + + fillFrame(frames[depth++], BCI_ERROR, "break_interpreted"); + break; + } else if (nm->isEntryFrame(pc) && !features.mixed) { + JavaFrameAnchor* next_anchor = JavaFrameAnchor::fromEntryFrame(fp); + if (next_anchor == NULL) { + fillFrame(frames[depth++], BCI_ERROR, "break_entry_frame"); + break; + } + uintptr_t prev_sp = sp; + if (!next_anchor->getFrame(pc, sp, fp)) { + // End of Java stack + break; + } + if (sp < prev_sp || sp >= bottom || !aligned(sp)) { + fillFrame(frames[depth++], BCI_ERROR, "break_entry_frame"); + break; + } + continue; + } else { + if (features.vtable_target && nm->isVTableStub() && depth == 0) { + uintptr_t receiver = frame.jarg0(); + if (receiver != 0) { + VMSymbol* symbol = VMKlass::fromOop(receiver)->name(); + u32 class_id = profiler->classMap()->lookup(symbol->body(), symbol->length()); + fillFrame(frames[depth++], BCI_ALLOC, class_id); + } + } + + CodeBlob* stub = profiler->findRuntimeStub(pc); + const void* start = stub != NULL ? stub->_start : nm->code(); + const char* name = stub != NULL ? stub->_name : nm->name(); + + if (details) { + fillFrame(frames[depth++], BCI_NATIVE_FRAME, name); + } + + if (frame.unwindStub((instruction_t*)start, name, (uintptr_t&)pc, sp, fp)) { + continue; + } + + if (depth > 1 && nm->frameSize() > 0) { + sp += nm->frameSize() * sizeof(void*); + fp = ((uintptr_t*)sp)[-FRAME_PC_SLOT - 1]; + pc = ((const void**)sp)[-FRAME_PC_SLOT]; + continue; + } + } + } else { + const char* method_name = profiler->findNativeMethod(pc); + char mark; + if (method_name != NULL && (mark = NativeFunc::mark(method_name)) != 0) { + if (mark == MARK_ASYNC_PROFILER && event_type == MALLOC_SAMPLE) { + // Skip all internal frames above malloc_hook functions, leave the hook itself + depth = 0; + } else if (mark == MARK_COMPILER_ENTRY && features.comp_task && vm_thread != NULL) { + // Insert current compile task as a pseudo Java frame + VMMethod* method = vm_thread->compiledMethod(); + jmethodID method_id = method != NULL ? method->id() : NULL; + if (method_id != NULL) { + fillFrame(frames[depth++], FRAME_JIT_COMPILED, 0, method_id); + } + } + } else if (method_name == NULL && details) { + // These workarounds will minimize the number of unknown frames for 'vm' + // We want to keep the 'raw' data in 'vmx', though + if (anchor) { + uintptr_t prev_sp = sp; + sp = anchor->lastJavaSP(); + fp = anchor->lastJavaFP(); + pc = anchor->lastJavaPC(); + if (sp != 0 && pc != NULL) { + // already used the anchor; disable it + anchor = NULL; + if (sp < prev_sp || sp >= bottom || !aligned(sp)) { + fillFrame(frames[depth++], BCI_ERROR, "break_no_anchor"); + break; + } + // we restored from Java frame; clean the prev_native_pc + prev_native_pc = NULL; + if (depth > 0) { + fillFrame(frames[depth++], BCI_ERROR, "[skipped frames]"); + } + continue; + } + } + const char* prev_symbol = prev_native_pc != NULL ? profiler->findNativeMethod(prev_native_pc) : NULL; + if (prev_symbol != NULL && strstr(prev_symbol, "thread_start")) { + // Unwinding from Rust 'thread_start' but not having enough info to do it correctly + // Rather, just assume that this is the root frame + break; + } + if (Symbols::isLibcOrPthreadAddress((uintptr_t)pc)) { + // We might not have the libc symbols available + // The unwinding is also not super reliable; best to jump out if this is not the leaf + fillFrame(frames[depth++], BCI_NATIVE_FRAME, "[libc/pthread]"); + break; + } + fillFrame(frames[depth++], BCI_ERROR, "break_no_anchor"); + break; + } + fillFrame(frames[depth++], BCI_NATIVE_FRAME, method_name); + } + + uintptr_t prev_sp = sp; + CodeCache* cc = profiler->findLibraryByAddress(pc); + FrameDesc f = cc != NULL ? cc->findFrameDesc(pc) : FrameDesc::default_frame; + + u8 cfa_reg = (u8)f.cfa; + int cfa_off = f.cfa >> 8; + if (cfa_reg == DW_REG_SP) { + sp = sp + cfa_off; + } else if (cfa_reg == DW_REG_FP) { + sp = fp + cfa_off; + } else if (cfa_reg == DW_REG_PLT) { + sp += ((uintptr_t)pc & 15) >= 11 ? cfa_off * 2 : cfa_off; + } else { + break; + } + + // Check if the next frame is below on the current stack + if (sp < prev_sp || sp >= prev_sp + MAX_FRAME_SIZE || sp >= bottom) { + break; + } + + // Stack pointer must be word aligned + if (!aligned(sp)) { + break; + } + + // store the previous pc before unwinding + prev_native_pc = pc; + if (f.fp_off & DW_PC_OFFSET) { + pc = (const char*)pc + (f.fp_off >> 1); + } else { + if (f.fp_off != DW_SAME_FP && f.fp_off < MAX_FRAME_SIZE && f.fp_off > -MAX_FRAME_SIZE) { + fp = (uintptr_t)SafeAccess::load((void**)(sp + f.fp_off)); + } + + if (EMPTY_FRAME_SIZE > 0 || f.pc_off != DW_LINK_REGISTER) { + pc = stripPointer(SafeAccess::load((void**)(sp + f.pc_off))); + } else if (depth == 1) { + pc = (const void*)frame.link(); + } else { + break; + } + + if (EMPTY_FRAME_SIZE > 0 || f.pc_off != DW_LINK_REGISTER) { + pc = stripPointer(*(void**)(sp + f.pc_off)); + } else if (depth == 1) { + pc = (const void*)frame.link(); + } else { + break; + } + + if (EMPTY_FRAME_SIZE == 0 && cfa_off == 0 && f.fp_off != DW_SAME_FP) { + // AArch64 default_frame + sp = defaultSenderSP(sp, fp); + if (sp < prev_sp || sp >= bottom || !aligned(sp)) { + break; + } + } + } + + if (inDeadZone(pc) || (pc == prev_native_pc && sp == prev_sp)) { + break; + } + } + + // If we did not meet Java frame but current thread has JavaFrameAnchor set, + // retry stack walking from the anchor + if (anchor != NULL && anchor->getFrame(pc, sp, fp)) { + anchor = NULL; + while (depth > 0 && frames[depth - 1].method_id == NULL) depth--; // pop unknown frames + goto unwind_loop; + } + + if (vm_thread != NULL) vm_thread->exception() = saved_exception; + + return depth; +} + +void StackWalker::checkFault() { + if (VMThread::key() < 0) { + // JVM has not been loaded or VMStructs have not been initialized yet + return; + } + + VMThread* vm_thread = VMThread::current(); + if (vm_thread != NULL && sameStack(vm_thread->exception(), &vm_thread)) { + longjmp(*(jmp_buf*)vm_thread->exception(), 1); + } +} diff --git a/ddprof-lib/src/main/cpp/stackWalker.h b/ddprof-lib/src/main/cpp/stackWalker.h new file mode 100644 index 00000000..84025157 --- /dev/null +++ b/ddprof-lib/src/main/cpp/stackWalker.h @@ -0,0 +1,61 @@ +/* + * Copyright The async-profiler authors + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _STACKWALKER_H +#define _STACKWALKER_H + +#include +#include "arguments.h" +#include "event.h" +#include "vmEntry.h" + + +class JavaFrameAnchor; + +struct StackContext { + const void* pc; + uintptr_t sp; + uintptr_t fp; + u64 cpu; + + void set(const void* pc, uintptr_t sp, uintptr_t fp) { + this->pc = pc; + this->sp = sp; + this->fp = fp; + } +}; + +// Stack walking validation helpers (used by implementation and tests) +namespace StackWalkValidation { + const uintptr_t DEAD_ZONE = 0x1000; + const intptr_t MAX_FRAME_SIZE = 0x40000; + + // Check if pointer is in dead zone (very low or very high address) + static inline bool inDeadZone(const void* ptr) { + return ptr < (const void*)DEAD_ZONE || ptr > (const void*)-DEAD_ZONE; + } + + // Check if pointer is properly aligned + static inline bool aligned(uintptr_t ptr) { + return (ptr & (sizeof(uintptr_t) - 1)) == 0; + } +} + +class StackWalker { + private: + static int walkVM(void* ucontext, ASGCT_CallFrame* frames, int max_depth, + StackWalkFeatures features, EventType event_type, + const void* pc, uintptr_t sp, uintptr_t fp); + + public: + static int walkFP(void* ucontext, const void** callchain, int max_depth, StackContext* java_ctx); + static int walkDwarf(void* ucontext, const void** callchain, int max_depth, StackContext* java_ctx); + static int walkVM(void* ucontext, ASGCT_CallFrame* frames, int max_depth, StackWalkFeatures features, EventType event_type); + static int walkVM(void* ucontext, ASGCT_CallFrame* frames, int max_depth, JavaFrameAnchor* anchor, EventType event_type); + + static void checkFault(); +}; + +#endif // _STACKWALKER_H diff --git a/ddprof-lib/src/main/cpp/symbols.h b/ddprof-lib/src/main/cpp/symbols.h index 749a2123..b315d51e 100644 --- a/ddprof-lib/src/main/cpp/symbols.h +++ b/ddprof-lib/src/main/cpp/symbols.h @@ -1,17 +1,6 @@ /* - * Copyright 2017 Andrei Pangin - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Copyright The async-profiler authors + * SPDX-License-Identifier: Apache-2.0 */ #ifndef _SYMBOLS_H @@ -20,22 +9,27 @@ #include "codeCache.h" #include "mutex.h" +#include + + class Symbols { -private: - static Mutex _parse_lock; - static bool _have_kernel_symbols; - static bool _libs_limit_reported; - -public: - static void parseKernelSymbols(CodeCache *cc); - static void parseLibraries(CodeCacheArray *array, bool kernel_symbols); - // The clear function is mainly for test purposes - // There are internal caches that are not associated to the array - static void clearParsingCaches(); - static bool haveKernelSymbols() { return _have_kernel_symbols; } - - // Some symbols are always roots - eg. no unwinding should be attempted once they are encountered - static bool isRootSymbol(const void* address); + private: + static Mutex _parse_lock; + static bool _have_kernel_symbols; + static bool _libs_limit_reported; + + public: + static void initLibraryRanges(); + static void parseKernelSymbols(CodeCache* cc); + static void parseLibraries(CodeCacheArray* array, bool kernel_symbols); + + static bool haveKernelSymbols() { + return _have_kernel_symbols; + } + // Clear internal caches - mainly for test purposes + static void clearParsingCaches(); + // Fast range check: does this PC lie in libc or libpthread? + static bool isLibcOrPthreadAddress(uintptr_t pc); }; class UnloadProtection { diff --git a/ddprof-lib/src/main/cpp/symbols_linux.cpp b/ddprof-lib/src/main/cpp/symbols_linux.cpp new file mode 100644 index 00000000..8d13202e --- /dev/null +++ b/ddprof-lib/src/main/cpp/symbols_linux.cpp @@ -0,0 +1,1066 @@ +/* + * Copyright The async-profiler authors + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifdef __linux__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "symbols.h" +#include "dwarf.h" +#include "fdtransferClient.h" +#include "log.h" +#include "os.h" + +// Simple address range +struct Range { + uintptr_t start; + uintptr_t end; +}; + +static bool range_valid(const Range* r) { + return r->start && r->end && r->end > r->start; +} + +static Range g_libc = {0, 0}; +static Range g_libpthread = {0, 0}; +static bool g_lib_ranges_inited = false; + +// Unified dl_iterate_phdr callback context +struct UnifiedCtx { + void* fbase; // For range_for_fbase functionality + Range* out; // For range_for_fbase functionality + const void** main_phdr; // For getMainPhdr functionality + void* libc_fbase; // For init_lib_ranges_once functionality + void* pthread_fbase; // For init_lib_ranges_once functionality + Range* libc_range; // For init_lib_ranges_once functionality + Range* pthread_range; // For init_lib_ranges_once functionality +}; + +// Unified callback for both range computation and main phdr collection +static int unified_phdr_cb(dl_phdr_info* info, size_t /*unused*/, void* data) { + UnifiedCtx* ctx = (UnifiedCtx*)data; + + // Main executable's program header (first entry) + if (ctx->main_phdr != NULL && *ctx->main_phdr == NULL) { + *ctx->main_phdr = info->dlpi_phdr; + } + + // Range computation for specific fbase (range_for_fbase functionality) + if (ctx->fbase != NULL && (void*)info->dlpi_addr == ctx->fbase) { + uintptr_t minv = (uintptr_t)-1; + uintptr_t maxv = 0; + for (int i = 0; i < info->dlpi_phnum; i++) { + const ElfW(Phdr)* ph = &info->dlpi_phdr[i]; + if (ph->p_type != PT_LOAD) continue; + uintptr_t vaddr = (uintptr_t)info->dlpi_addr + ph->p_vaddr; + uintptr_t vend = vaddr + ph->p_memsz; + if (vaddr < minv) minv = vaddr; + if (vend > maxv) maxv = vend; + } + if (minv != (uintptr_t)-1 && maxv > minv) { + ctx->out->start = minv; + ctx->out->end = maxv; + } + } + + // Library range computation (init_lib_ranges_once functionality) + if (ctx->libc_fbase != NULL && (void*)info->dlpi_addr == ctx->libc_fbase) { + uintptr_t minv = (uintptr_t)-1; + uintptr_t maxv = 0; + for (int i = 0; i < info->dlpi_phnum; i++) { + const ElfW(Phdr)* ph = &info->dlpi_phdr[i]; + if (ph->p_type != PT_LOAD) continue; + uintptr_t vaddr = (uintptr_t)info->dlpi_addr + ph->p_vaddr; + uintptr_t vend = vaddr + ph->p_memsz; + if (vaddr < minv) minv = vaddr; + if (vend > maxv) maxv = vend; + } + if (minv != (uintptr_t)-1 && maxv > minv) { + ctx->libc_range->start = minv; + ctx->libc_range->end = maxv; + } + } + + if (ctx->pthread_fbase != NULL && (void*)info->dlpi_addr == ctx->pthread_fbase) { + uintptr_t minv = (uintptr_t)-1; + uintptr_t maxv = 0; + for (int i = 0; i < info->dlpi_phnum; i++) { + const ElfW(Phdr)* ph = &info->dlpi_phdr[i]; + if (ph->p_type != PT_LOAD) continue; + uintptr_t vaddr = (uintptr_t)info->dlpi_addr + ph->p_vaddr; + uintptr_t vend = vaddr + ph->p_memsz; + if (vaddr < minv) minv = vaddr; + if (vend > maxv) maxv = vend; + } + if (minv != (uintptr_t)-1 && maxv > minv) { + ctx->pthread_range->start = minv; + ctx->pthread_range->end = maxv; + } + } + + return 0; // continue iteration +} + +// Main program header - initialized lazily +static const void* _main_phdr = NULL; +static pthread_once_t _main_phdr_once = PTHREAD_ONCE_INIT; +static const char* _ld_base = (const char*)getauxval(AT_BASE); + +// Initialize main phdr once +static void init_main_phdr_once() { + UnifiedCtx ctx = {NULL, NULL, &_main_phdr, NULL, NULL, NULL, NULL}; + dl_iterate_phdr(&unified_phdr_cb, &ctx); +} + +// Ensure main phdr is initialized +static void ensure_main_phdr_initialized() { + pthread_once(&_main_phdr_once, init_main_phdr_once); +} + +static Range range_for_fbase(void* fbase) { + Range r = {0, 0}; + if (!fbase) return r; + UnifiedCtx ctx = {fbase, &r, NULL, NULL, NULL, NULL, NULL}; + dl_iterate_phdr(&unified_phdr_cb, &ctx); + return r; +} + +static void init_lib_ranges_once() { + if (g_lib_ranges_inited) return; + g_lib_ranges_inited = true; + + // libc anchor: prefer gnu_get_libc_version if present; fallback to strlen + void* libc_sym = dlsym(RTLD_DEFAULT, "gnu_get_libc_version"); + if (!libc_sym) libc_sym = (void*)&strlen; + + Dl_info di = {0}; + void* libc_fbase = NULL; + if (dladdr(libc_sym, &di) && di.dli_fbase) { + libc_fbase = di.dli_fbase; + } + + // pthread anchor: pthread_create (on glibc >= 2.34 this lives in libc) + Dl_info di2 = {0}; + void* pthread_fbase = NULL; + if (dladdr((void*)&pthread_create, &di2) && di2.dli_fbase) { + pthread_fbase = di2.dli_fbase; + } + + // Use unified dl_iterate_phdr call to get all information at once + UnifiedCtx ctx = {NULL, NULL, &_main_phdr, libc_fbase, pthread_fbase, &g_libc, &g_libpthread}; + dl_iterate_phdr(&unified_phdr_cb, &ctx); + + // If pthread couldn't be resolved separately, treat it as libc + if (!range_valid(&g_libpthread)) g_libpthread = g_libc; +} + +static bool pc_in_range(uintptr_t pc, const Range* r) { + return range_valid(r) && pc >= r->start && pc < r->end; +} + +#ifdef __x86_64__ + +#include +#include "vmEntry.h" + +// Workaround for JDK-8312065 on JDK 8: +// replace poll() implementation with ppoll() which is restartable +static int poll_hook(struct pollfd* fds, nfds_t nfds, int timeout) { + if (timeout >= 0) { + struct timespec ts; + ts.tv_sec = timeout / 1000; + ts.tv_nsec = (timeout % 1000) * 1000000; + return ppoll(fds, nfds, &ts, NULL); + } else { + return ppoll(fds, nfds, NULL, NULL); + } +} + +static void applyPatch(CodeCache* cc) { + static bool patch_libnet = VM::hotspot_version() == 8; + + if (patch_libnet) { + size_t len = strlen(cc->name()); + if (len >= 10 && strcmp(cc->name() + len - 10, "/libnet.so") == 0) { + UnloadProtection handle(cc); + if (handle.isValid()) { + cc->patchImport(im_poll, (void*)poll_hook); + patch_libnet = false; + } + } + } +} + +#else + +static void applyPatch(CodeCache* cc) {} + +#endif + + +static bool isMainExecutable(const char* image_base, const void* map_end) { + ensure_main_phdr_initialized(); + return _main_phdr != NULL && _main_phdr >= image_base && _main_phdr < map_end; +} + +static bool isLoader(const char* image_base) { + return _ld_base == image_base; +} + +class SymbolDesc { + private: + const char* _addr; + const char* _desc; + + public: + SymbolDesc(const char* s) { + _addr = s; + _desc = strchr(_addr, ' '); + } + + const char* addr() { return (const char*)strtoul(_addr, NULL, 16); } + char type() { return _desc != NULL ? _desc[1] : 0; } + const char* name() { return _desc + 3; } +}; + +class MemoryMapDesc { + private: + const char* _addr; + const char* _end; + const char* _perm; + const char* _offs; + const char* _dev; + const char* _inode; + const char* _file; + + public: + MemoryMapDesc(const char* s) { + _addr = s; + _end = strchr(_addr, '-') + 1; + _perm = strchr(_end, ' ') + 1; + _offs = strchr(_perm, ' ') + 1; + _dev = strchr(_offs, ' ') + 1; + _inode = strchr(_dev, ' ') + 1; + _file = strchr(_inode, ' '); + + if (_file != NULL) { + while (*_file == ' ') _file++; + } + } + + const char* file() { return _file; } + bool isReadable() { return _perm[0] == 'r'; } + bool isExecutable() { return _perm[2] == 'x'; } + const char* addr() { return (const char*)strtoul(_addr, NULL, 16); } + const char* end() { return (const char*)strtoul(_end, NULL, 16); } + unsigned long offs() { return strtoul(_offs, NULL, 16); } + unsigned long inode() { return strtoul(_inode, NULL, 10); } + + unsigned long dev() { + char* colon; + unsigned long major = strtoul(_dev, &colon, 16); + unsigned long minor = strtoul(colon + 1, NULL, 16); + return major << 8 | minor; + } +}; + +struct SharedLibrary { + char* file; + const char* map_start; + const char* map_end; + const char* image_base; +}; + + +#ifdef __LP64__ +const unsigned char ELFCLASS_SUPPORTED = ELFCLASS64; +typedef Elf64_Ehdr ElfHeader; +typedef Elf64_Shdr ElfSection; +typedef Elf64_Phdr ElfProgramHeader; +typedef Elf64_Nhdr ElfNote; +typedef Elf64_Sym ElfSymbol; +typedef Elf64_Rel ElfRelocation; +typedef Elf64_Dyn ElfDyn; +#define ELF_R_TYPE ELF64_R_TYPE +#define ELF_R_SYM ELF64_R_SYM +#else +const unsigned char ELFCLASS_SUPPORTED = ELFCLASS32; +typedef Elf32_Ehdr ElfHeader; +typedef Elf32_Shdr ElfSection; +typedef Elf32_Phdr ElfProgramHeader; +typedef Elf32_Nhdr ElfNote; +typedef Elf32_Sym ElfSymbol; +typedef Elf32_Rel ElfRelocation; +typedef Elf32_Dyn ElfDyn; +#define ELF_R_TYPE ELF32_R_TYPE +#define ELF_R_SYM ELF32_R_SYM +#endif // __LP64__ + +#if defined(__x86_64__) +# define R_GLOB_DAT R_X86_64_GLOB_DAT +# define R_ABS64 R_X86_64_64 +#elif defined(__i386__) +# define R_GLOB_DAT R_386_GLOB_DAT +# define R_ABS64 -1 +#elif defined(__arm__) || defined(__thumb__) +# define R_GLOB_DAT R_ARM_GLOB_DAT +# define R_ABS64 -1 +#elif defined(__aarch64__) +# define R_GLOB_DAT R_AARCH64_GLOB_DAT +# define R_ABS64 R_AARCH64_ABS64 +#elif defined(__PPC64__) +# define R_GLOB_DAT R_PPC64_GLOB_DAT +# define R_ABS64 -1 +#elif defined(__riscv) && (__riscv_xlen == 64) +// RISC-V does not have GLOB_DAT relocation, use something neutral, +// like the impossible relocation number. +# define R_GLOB_DAT -1 +# define R_ABS64 -1 +#elif defined(__loongarch_lp64) +// LOONGARCH does not have GLOB_DAT relocation, use something neutral, +// like the impossible relocation number. +# define R_GLOB_DAT -1 +# define R_ABS64 -1 +#else +# error "Compiling on unsupported arch" +#endif + + +static char _debuginfod_cache_buf[PATH_MAX] = {0}; + +class ElfParser { + private: + CodeCache* _cc; + const char* _base; + const char* _file_name; + bool _relocate_dyn; + ElfHeader* _header; + const char* _sections; + const char* _vaddr_diff; + + ElfParser(CodeCache* cc, const char* base, const void* addr, const char* file_name, bool relocate_dyn) { + _cc = cc; + _base = base; + _file_name = file_name; + _relocate_dyn = relocate_dyn; + _header = (ElfHeader*)addr; + _sections = (const char*)addr + _header->e_shoff; + } + + bool validHeader() { + unsigned char* ident = _header->e_ident; + return ident[0] == 0x7f && ident[1] == 'E' && ident[2] == 'L' && ident[3] == 'F' + && ident[4] == ELFCLASS_SUPPORTED && ident[5] == ELFDATA2LSB && ident[6] == EV_CURRENT + && _header->e_shstrndx != SHN_UNDEF; + } + + ElfSection* section(int index) { + return (ElfSection*)(_sections + index * _header->e_shentsize); + } + + const char* at(ElfSection* section) { + return (const char*)_header + section->sh_offset; + } + + const char* at(ElfProgramHeader* pheader) { + if (_header->e_type == ET_EXEC) { + return (const char*)pheader->p_vaddr; + } + return _vaddr_diff == NULL ? (const char*)pheader->p_vaddr : _vaddr_diff + pheader->p_vaddr; + } + + const char* base() { + return _header->e_type == ET_EXEC ? NULL : _vaddr_diff; + } + + char* dyn_ptr(ElfDyn* dyn) { + // GNU dynamic linker relocates pointers in the dynamic section, while musl doesn't. + // Also, [vdso] is not relocated, and its vaddr may differ from the load address. + if (_relocate_dyn || (_base != NULL && (char*)dyn->d_un.d_ptr < _base)) { + return _vaddr_diff == NULL ? (char*)dyn->d_un.d_ptr : (char*)_vaddr_diff + dyn->d_un.d_ptr; + } else { + return (char*)dyn->d_un.d_ptr; + } + } + + ElfSection* findSection(uint32_t type, const char* name); + ElfProgramHeader* findProgramHeader(uint32_t type); + + void calcVirtualLoadAddress(); + void parseDynamicSection(); + void parseDwarfInfo(); + uint32_t getSymbolCount(uint32_t* gnu_hash); + void loadSymbols(bool use_debug); + bool loadSymbolsFromDebug(const char* build_id, const int build_id_len); + bool loadSymbolsFromDebuginfodCache(const char* build_id, const int build_id_len); + bool loadSymbolsUsingBuildId(); + bool loadSymbolsUsingDebugLink(); + void loadSymbolTable(const char* symbols, size_t total_size, size_t ent_size, const char* strings); + void addRelocationSymbols(ElfSection* reltab, const char* plt); + const char* getDebuginfodCache(); + + public: + static void parseProgramHeaders(CodeCache* cc, const char* base, const char* end, bool relocate_dyn); + static bool parseFile(CodeCache* cc, const char* base, const char* file_name, bool use_debug); +}; + + +ElfSection* ElfParser::findSection(uint32_t type, const char* name) { + const char* strtab = at(section(_header->e_shstrndx)); + + for (int i = 0; i < _header->e_shnum; i++) { + ElfSection* section = this->section(i); + if (section->sh_type == type && section->sh_name != 0) { + if (strcmp(strtab + section->sh_name, name) == 0) { + return section; + } + } + } + + return NULL; +} + +ElfProgramHeader* ElfParser::findProgramHeader(uint32_t type) { + const char* pheaders = (const char*)_header + _header->e_phoff; + + for (int i = 0; i < _header->e_phnum; i++) { + ElfProgramHeader* pheader = (ElfProgramHeader*)(pheaders + i * _header->e_phentsize); + if (pheader->p_type == type) { + return pheader; + } + } + + return NULL; +} + +bool ElfParser::parseFile(CodeCache* cc, const char* base, const char* file_name, bool use_debug) { + int fd = open(file_name, O_RDONLY); + if (fd == -1) { + return false; + } + + size_t length = (size_t)lseek(fd, 0, SEEK_END); + void* addr = mmap(NULL, length, PROT_READ, MAP_PRIVATE, fd, 0); + close(fd); + + if (addr == MAP_FAILED) { + Log::warn("Could not parse symbols from %s: %s", file_name, strerror(errno)); + } else { + ElfParser elf(cc, base, addr, file_name, false); + if (elf.validHeader()) { + elf.calcVirtualLoadAddress(); + elf.loadSymbols(use_debug); + } + munmap(addr, length); + } + return true; +} + +void ElfParser::parseProgramHeaders(CodeCache* cc, const char* base, const char* end, bool relocate_dyn) { + ElfParser elf(cc, base, base, NULL, relocate_dyn); + if (elf.validHeader() && base + elf._header->e_phoff < end) { + cc->setTextBase(base); + elf.calcVirtualLoadAddress(); + elf.parseDynamicSection(); + elf.parseDwarfInfo(); + } +} + +void ElfParser::calcVirtualLoadAddress() { + // Find a difference between the virtual load address (often zero) and the actual DSO base + if (_base == NULL) { + _vaddr_diff = NULL; + return; + } + const char* pheaders = (const char*)_header + _header->e_phoff; + for (int i = 0; i < _header->e_phnum; i++) { + ElfProgramHeader* pheader = (ElfProgramHeader*)(pheaders + i * _header->e_phentsize); + if (pheader->p_type == PT_LOAD) { + _vaddr_diff = _base - pheader->p_vaddr; + return; + } + } + _vaddr_diff = _base; +} + +void ElfParser::parseDynamicSection() { + ElfProgramHeader* dynamic = findProgramHeader(PT_DYNAMIC); + if (dynamic != NULL) { + const char* symtab = NULL; + const char* strtab = NULL; + char* jmprel = NULL; + char* rel = NULL; + size_t pltrelsz = 0; + size_t relsz = 0; + size_t relent = 0; + size_t relcount = 0; + size_t syment = 0; + uint32_t nsyms = 0; + + const char* dyn_start = at(dynamic); + const char* dyn_end = dyn_start + dynamic->p_memsz; + for (ElfDyn* dyn = (ElfDyn*)dyn_start; dyn < (ElfDyn*)dyn_end; dyn++) { + switch (dyn->d_tag) { + case DT_SYMTAB: + symtab = dyn_ptr(dyn); + break; + case DT_STRTAB: + strtab = dyn_ptr(dyn); + break; + case DT_SYMENT: + syment = dyn->d_un.d_val; + break; + case DT_HASH: + nsyms = ((uint32_t*)dyn_ptr(dyn))[1]; + break; + case DT_GNU_HASH: + if (nsyms == 0) { + nsyms = getSymbolCount((uint32_t*)dyn_ptr(dyn)); + } + break; + case DT_JMPREL: + jmprel = dyn_ptr(dyn); + break; + case DT_PLTRELSZ: + pltrelsz = dyn->d_un.d_val; + break; + case DT_RELA: + case DT_REL: + rel = dyn_ptr(dyn); + break; + case DT_RELASZ: + case DT_RELSZ: + relsz = dyn->d_un.d_val; + break; + case DT_RELAENT: + case DT_RELENT: + relent = dyn->d_un.d_val; + break; + case DT_RELACOUNT: + case DT_RELCOUNT: + relcount = dyn->d_un.d_val; + break; + } + } + + if (symtab == NULL || strtab == NULL || syment == 0 || relent == 0) { + return; + } + + if (!_cc->hasDebugSymbols() && nsyms > 0) { + loadSymbolTable(symtab, syment * nsyms, syment, strtab); + } + + const char* base = this->base(); + if (jmprel != NULL && pltrelsz != 0) { + // Parse .rela.plt table + for (size_t offs = 0; offs < pltrelsz; offs += relent) { + ElfRelocation* r = (ElfRelocation*)(jmprel + offs); + ElfSymbol* sym = (ElfSymbol*)(symtab + ELF_R_SYM(r->r_info) * syment); + if (sym->st_name != 0) { + _cc->addImport((void**)(base + r->r_offset), strtab + sym->st_name); + } + } + } + + if (rel != NULL && relsz != 0) { + // Relocation entries for imports can be found in .rela.dyn, for example + // if a shared library is built without PLT (-fno-plt). However, if both + // entries exist, addImport saves them both. + for (size_t offs = relcount * relent; offs < relsz; offs += relent) { + ElfRelocation* r = (ElfRelocation*)(rel + offs); + if (ELF_R_TYPE(r->r_info) == R_GLOB_DAT || ELF_R_TYPE(r->r_info) == R_ABS64) { + ElfSymbol* sym = (ElfSymbol*)(symtab + ELF_R_SYM(r->r_info) * syment); + if (sym->st_name != 0) { + _cc->addImport((void**)(base + r->r_offset), strtab + sym->st_name); + } + } + } + } + } +} + +void ElfParser::parseDwarfInfo() { + if (!DWARF_SUPPORTED) return; + + ElfProgramHeader* eh_frame_hdr = findProgramHeader(PT_GNU_EH_FRAME); + if (eh_frame_hdr != NULL) { + if (eh_frame_hdr->p_vaddr != 0) { + DwarfParser dwarf(_cc->name(), _base, at(eh_frame_hdr)); + _cc->setDwarfTable(dwarf.table(), dwarf.count()); + } else if (strcmp(_cc->name(), "[vdso]") == 0) { + FrameDesc* table = (FrameDesc*)malloc(sizeof(FrameDesc)); + *table = FrameDesc::empty_frame; + _cc->setDwarfTable(table, 1); + } + } +} + +uint32_t ElfParser::getSymbolCount(uint32_t* gnu_hash) { + uint32_t nbuckets = gnu_hash[0]; + uint32_t* buckets = &gnu_hash[4] + gnu_hash[2] * (sizeof(size_t) / 4); + + uint32_t nsyms = 0; + for (uint32_t i = 0; i < nbuckets; i++) { + if (buckets[i] > nsyms) nsyms = buckets[i]; + } + + if (nsyms > 0) { + uint32_t* chain = &buckets[nbuckets] - gnu_hash[1]; + while (!(chain[nsyms++] & 1)); + } + return nsyms; +} + +void ElfParser::loadSymbols(bool use_debug) { + ElfSection* symtab = findSection(SHT_SYMTAB, ".symtab"); + if (symtab != NULL) { + // Parse debug symbols from the original .so + ElfSection* strtab = section(symtab->sh_link); + loadSymbolTable(at(symtab), symtab->sh_size, symtab->sh_entsize, at(strtab)); + _cc->setDebugSymbols(true); + } else if (use_debug) { + // Try to load symbols from an external debuginfo library + loadSymbolsUsingBuildId() || loadSymbolsUsingDebugLink(); + } + + if (use_debug) { + // Synthesize names for PLT stubs + ElfSection* plt = findSection(SHT_PROGBITS, ".plt"); + if (plt != NULL) { + _cc->setPlt(plt->sh_addr, plt->sh_size); + ElfSection* reltab = findSection(SHT_RELA, ".rela.plt"); + if (reltab != NULL || (reltab = findSection(SHT_REL, ".rel.plt")) != NULL) { + addRelocationSymbols(reltab, base() + plt->sh_addr + PLT_HEADER_SIZE); + } + } + } +} + +const char* ElfParser::getDebuginfodCache() { + if (_debuginfod_cache_buf[0]) { + return _debuginfod_cache_buf; + } + + const char* env_vars[] = {"DEBUGINFOD_CACHE_PATH", "XDG_CACHE_HOME", "HOME"}; + const char* suffixes[] = {"/", "debuginfod_client/", ".cache/debuginfod_client/"}; + + for (int i = 0; i < sizeof(env_vars) / sizeof(env_vars[0]); i++) { + const char* env_val = getenv(env_vars[i]); + if (!env_val || !env_val[0]) { + continue; + } + + if (snprintf(_debuginfod_cache_buf, sizeof(_debuginfod_cache_buf), "%s/%s", env_val, suffixes[i]) < sizeof(_debuginfod_cache_buf)) { + return _debuginfod_cache_buf; + } + } + + _debuginfod_cache_buf[0] = '\0'; + return _debuginfod_cache_buf; +} + +bool ElfParser::loadSymbolsFromDebug(const char* build_id, const int build_id_len) { + char path[PATH_MAX]; + char* p = path + snprintf(path, sizeof(path), "/usr/lib/debug/.build-id/%02hhx/", build_id[0]); + for (int i = 1; i < build_id_len; i++) { + p += snprintf(p, 3, "%02hhx", build_id[i]); + } + strcpy(p, ".debug"); + + return parseFile(_cc, _base, path, false); +} + +bool ElfParser::loadSymbolsFromDebuginfodCache(const char* build_id, const int build_id_len) { + const char* debuginfod_cache = getDebuginfodCache(); + if (debuginfod_cache == NULL || !debuginfod_cache[0]) { + return false; + } + + char path[PATH_MAX]; + const int debuginfod_cache_len = strlen(debuginfod_cache); + if (debuginfod_cache_len + build_id_len + strlen("/debuginfo") >= sizeof(path)) { + Log::warn("Path too long, skipping loading symbols: %s", debuginfod_cache); + return false; + } + + char* p = strcpy(path, debuginfod_cache); + p += debuginfod_cache_len; + for (int i = 0; i < build_id_len; i++) { + p += snprintf(p, 3, "%02hhx", build_id[i]); + } + strcpy(p, "/debuginfo"); + + return parseFile(_cc, _base, path, false); +} + +// Load symbols from the first file that exists in the following locations, in order, where abcdef1234 is Build ID. +// /usr/lib/debug/.build-id/ab/cdef1234.debug +// $DEBUGINFOD_CACHE_PATH/abcdef1234/debuginfo +// $XDG_CACHE_HOME/debuginfod_client/abcdef1234/debuginfo +// $HOME/.cache/debuginfod_client/abcdef1234/debuginfo +bool ElfParser::loadSymbolsUsingBuildId() { + ElfSection* section = findSection(SHT_NOTE, ".note.gnu.build-id"); + if (section == NULL || section->sh_size <= 16) { + return false; + } + + ElfNote* note = (ElfNote*)at(section); + if (note->n_namesz != 4 || note->n_descsz < 2 || note->n_descsz > 64) { + return false; + } + + const char* build_id = (const char*)note + sizeof(*note) + 4; + int build_id_len = note->n_descsz; + + return loadSymbolsFromDebug(build_id, build_id_len) + || loadSymbolsFromDebuginfodCache(build_id, build_id_len); +} + +// Look for debuginfo file specified in .gnu_debuglink section +bool ElfParser::loadSymbolsUsingDebugLink() { + ElfSection* section = findSection(SHT_PROGBITS, ".gnu_debuglink"); + if (section == NULL || section->sh_size <= 4) { + return false; + } + + const char* basename = strrchr(_file_name, '/'); + if (basename == NULL) { + return false; + } + + char* dirname = strndup(_file_name, basename - _file_name); + if (dirname == NULL) { + return false; + } + + const char* debuglink = at(section); + char path[PATH_MAX]; + bool result = false; + + // 1. /path/to/libjvm.so.debug + if (strcmp(debuglink, basename + 1) != 0 && + snprintf(path, PATH_MAX, "%s/%s", dirname, debuglink) < PATH_MAX) { + result = parseFile(_cc, _base, path, false); + } + + // 2. /path/to/.debug/libjvm.so.debug + if (!result && snprintf(path, PATH_MAX, "%s/.debug/%s", dirname, debuglink) < PATH_MAX) { + result = parseFile(_cc, _base, path, false); + } + + // 3. /usr/lib/debug/path/to/libjvm.so.debug + if (!result && snprintf(path, PATH_MAX, "/usr/lib/debug%s/%s", dirname, debuglink) < PATH_MAX) { + result = parseFile(_cc, _base, path, false); + } + + free(dirname); + return result; +} + +void ElfParser::loadSymbolTable(const char* symbols, size_t total_size, size_t ent_size, const char* strings) { + const char* base = this->base(); + for (const char* symbols_end = symbols + total_size; symbols < symbols_end; symbols += ent_size) { + ElfSymbol* sym = (ElfSymbol*)symbols; + if (sym->st_name != 0 && sym->st_value != 0) { + // Skip special AArch64 mapping symbols: $x and $d + if (sym->st_size != 0 || sym->st_info != 0 || strings[sym->st_name] != '$') { + const char* addr; + if (base != NULL) { + // Check for overflow when adding sym->st_value to base + uintptr_t base_addr = (uintptr_t)base; + uint64_t symbol_value = sym->st_value; + + // Skip this symbol if addition would overflow + // First check if symbol_value exceeds the address space + if (symbol_value > UINTPTR_MAX) { + continue; + } + // Then check if addition would overflow + if (base_addr > UINTPTR_MAX - (uintptr_t)symbol_value) { + continue; + } + + // Perform addition using integer arithmetic to avoid pointer overflow + addr = (const char*)(base_addr + (uintptr_t)symbol_value); + } else { + addr = (const char*)sym->st_value; + } + _cc->add(addr, (int)sym->st_size, strings + sym->st_name); + } + } + } +} + +void ElfParser::addRelocationSymbols(ElfSection* reltab, const char* plt) { + ElfSection* symtab = section(reltab->sh_link); + const char* symbols = at(symtab); + + ElfSection* strtab = section(symtab->sh_link); + const char* strings = at(strtab); + + const char* relocations = at(reltab); + const char* relocations_end = relocations + reltab->sh_size; + for (; relocations < relocations_end; relocations += reltab->sh_entsize) { + ElfRelocation* r = (ElfRelocation*)relocations; + ElfSymbol* sym = (ElfSymbol*)(symbols + ELF_R_SYM(r->r_info) * symtab->sh_entsize); + + char name[256]; + if (sym->st_name == 0) { + strcpy(name, "@plt"); + } else { + const char* sym_name = strings + sym->st_name; + snprintf(name, sizeof(name), "%s%cplt", sym_name, sym_name[0] == '_' && sym_name[1] == 'Z' ? '.' : '@'); + name[sizeof(name) - 1] = 0; + } + + _cc->add(plt, PLT_ENTRY_SIZE, name); + plt += PLT_ENTRY_SIZE; + } +} + + +Mutex Symbols::_parse_lock; +bool Symbols::_have_kernel_symbols = false; +bool Symbols::_libs_limit_reported = false; +static std::unordered_set _parsed_inodes; +static bool _in_parse_libraries = false; + +void Symbols::parseKernelSymbols(CodeCache* cc) { + int fd; + if (FdTransferClient::hasPeer()) { + fd = FdTransferClient::requestKallsymsFd(); + } else { + fd = open("/proc/kallsyms", O_RDONLY); + } + + if (fd == -1) { + Log::warn("open(\"/proc/kallsyms\"): %s", strerror(errno)); + return; + } + + FILE* f = fdopen(fd, "r"); + if (f == NULL) { + Log::warn("fdopen(): %s", strerror(errno)); + close(fd); + return; + } + + char str[256]; + while (fgets(str, sizeof(str) - 8, f) != NULL) { + size_t len = strlen(str) - 1; // trim the '\n' + strcpy(str + len, "_[k]"); + + SymbolDesc symbol(str); + char type = symbol.type(); + if (type == 'T' || type == 't' || type == 'W' || type == 'w') { + const char* addr = symbol.addr(); + if (addr != NULL) { + if (!_have_kernel_symbols) { + if (strncmp(symbol.name(), "__LOAD_PHYSICAL_ADDR", 20) == 0 || + strncmp(symbol.name(), "phys_startup", 12) == 0) { + continue; + } + _have_kernel_symbols = true; + } + cc->add(addr, 0, symbol.name()); + } + } + } + + fclose(f); +} + +static void collectSharedLibraries(std::unordered_map& libs, int max_count) { + FILE* f = fopen("/proc/self/maps", "r"); + if (f == NULL) { + return; + } + + const char* image_base = NULL; + u64 last_inode = 0; + char* str = NULL; + size_t str_size = 0; + ssize_t len; + + while (max_count > 0 && (len = getline(&str, &str_size, f)) > 0) { + str[len - 1] = 0; + + MemoryMapDesc map(str); + if (!map.isReadable() || map.file() == NULL || map.file()[0] == 0) { + continue; + } + + u64 inode = u64(map.dev()) << 32 | map.inode(); + if (_parsed_inodes.find(inode) != _parsed_inodes.end()) { + continue; // shared object is already parsed + } + if (inode == 0 && strcmp(map.file(), "[vdso]") != 0) { + continue; // all shared libraries have inode, except vDSO + } + + const char* map_start = map.addr(); + const char* map_end = map.end(); + if (inode != last_inode && map.offs() == 0) { + image_base = map_start; + last_inode = inode; + } + + if (map.isExecutable()) { + SharedLibrary& lib = libs[inode]; + if (lib.file == nullptr) { + lib.file = strdup(map.file()); + lib.map_start = map_start; + lib.map_end = map_end; + lib.image_base = inode == last_inode ? image_base : NULL; + max_count--; + } else { + // The same library may have multiple executable segments mapped + lib.map_end = map_end; + } + } + } + + free(str); + fclose(f); +} + +void Symbols::parseLibraries(CodeCacheArray* array, bool kernel_symbols) { + MutexLocker ml(_parse_lock); + + if (_in_parse_libraries || array->count() >= MAX_NATIVE_LIBS) { + return; + } + _in_parse_libraries = true; + + if (kernel_symbols && !haveKernelSymbols()) { + CodeCache* cc = new CodeCache("[kernel]"); + parseKernelSymbols(cc); + + if (haveKernelSymbols()) { + cc->sort(); + array->add(cc); + } else { + delete cc; + } + } + + std::unordered_map libs; + collectSharedLibraries(libs, MAX_NATIVE_LIBS - array->count()); + + for (auto& it : libs) { + u64 inode = it.first; + _parsed_inodes.insert(inode); + + SharedLibrary& lib = it.second; + CodeCache* cc = new CodeCache(lib.file, array->count(), lib.map_start, lib.map_end, lib.image_base); + + if (strchr(lib.file, ':') != NULL) { + // Do not try to parse pseudofiles like anon_inode:name, /memfd:name + } else if (strcmp(lib.file, "[vdso]") == 0) { + ElfParser::parseProgramHeaders(cc, lib.map_start, lib.map_end, true); + } else if (lib.image_base == NULL) { + // Unlikely case when image base has not been found: not safe to access program headers. + // Be careful: executable file is not always ELF, e.g. classes.jsa + ElfParser::parseFile(cc, lib.map_start, lib.file, true); + } else { + // Parse debug symbols first + ElfParser::parseFile(cc, lib.image_base, lib.file, true); + + UnloadProtection handle(cc); + if (handle.isValid()) { + ElfParser::parseProgramHeaders(cc, lib.image_base, lib.map_end, OS::isMusl()); + } + } + + free(lib.file); + + cc->sort(); + applyPatch(cc); + array->add(cc); + } + + if (array->count() >= MAX_NATIVE_LIBS && !_libs_limit_reported) { + Log::warn("Number of parsed libraries reached the limit of %d", MAX_NATIVE_LIBS); + _libs_limit_reported = true; + } + + _in_parse_libraries = false; +} + +// Check that the base address of the shared object has not changed +static bool verifyBaseAddress(const CodeCache* cc, void* lib_handle) { + Dl_info dl_info; + struct link_map* map; + + if (dlinfo(lib_handle, RTLD_DI_LINKMAP, &map) != 0 || dladdr(map->l_ld, &dl_info) == 0) { + return false; + } + + return cc->imageBase() == (const char*)dl_info.dli_fbase; +} + +UnloadProtection::UnloadProtection(const CodeCache *cc) { + if (OS::isMusl() || isMainExecutable(cc->imageBase(), cc->maxAddress()) || isLoader(cc->imageBase())) { + _lib_handle = NULL; + _valid = true; + return; + } + + // dlopen() can reopen previously loaded libraries even if the underlying file has been deleted + const char* stripped_name = cc->name(); + size_t name_len = strlen(stripped_name); + if (name_len > 10 && strcmp(stripped_name + name_len - 10, " (deleted)") == 0) { + char* buf = (char*) alloca(name_len - 9); + *stpncpy(buf, stripped_name, name_len - 10) = 0; + stripped_name = buf; + } + + // Protect library from unloading while parsing in-memory ELF program headers. + // Also, dlopen() ensures the library is fully loaded. + _lib_handle = dlopen(stripped_name, RTLD_LAZY | RTLD_NOLOAD); + _valid = _lib_handle != NULL && verifyBaseAddress(cc, _lib_handle); +} + +UnloadProtection::~UnloadProtection() { + if (_lib_handle != NULL) { + dlclose(_lib_handle); + } +} + +void Symbols::initLibraryRanges() { + init_lib_ranges_once(); +} + +bool Symbols::isLibcOrPthreadAddress(uintptr_t pc) { + // Fast, allocation-free integer checks — no strings involved. + // initLibraryRanges() must have been called during profiler startup. + if (pc_in_range(pc, &g_libc)) return true; + if (pc_in_range(pc, &g_libpthread)) return true; + return false; +} + + +// Implementation of clearParsingCaches for test compatibility +void Symbols::clearParsingCaches() { + _parsed_inodes.clear(); +} + +#endif // __linux__ diff --git a/ddprof-lib/src/main/cpp/symbols_linux.h b/ddprof-lib/src/main/cpp/symbols_linux.h index 67a19183..19404483 100644 --- a/ddprof-lib/src/main/cpp/symbols_linux.h +++ b/ddprof-lib/src/main/cpp/symbols_linux.h @@ -3,7 +3,7 @@ #include "symbols.h" -// Forward declaration for ElfParser functionality from cpp-external/symbols_linux.cpp +// Forward declaration for ElfParser functionality from symbols_linux.cpp // The actual implementation will be available through the patched upstream file class ElfParser { public: diff --git a/ddprof-lib/src/main/cpp/symbols_macos.cpp b/ddprof-lib/src/main/cpp/symbols_macos.cpp new file mode 100644 index 00000000..268d0cad --- /dev/null +++ b/ddprof-lib/src/main/cpp/symbols_macos.cpp @@ -0,0 +1,231 @@ +/* + * Copyright The async-profiler authors + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifdef __APPLE__ + +#include +#include +#include +#include +#include +#include +#include "symbols.h" +#include "log.h" + +UnloadProtection::UnloadProtection(const CodeCache *cc) { + // Protect library from unloading while parsing in-memory ELF program headers. + // Also, dlopen() ensures the library is fully loaded. + _lib_handle = dlopen(cc->name(), RTLD_LAZY | RTLD_NOLOAD); + _valid = _lib_handle != NULL; +} + +UnloadProtection::~UnloadProtection() { + if (_lib_handle != NULL) { + dlclose(_lib_handle); + } +} + +class MachOParser { + private: + CodeCache* _cc; + const mach_header* _image_base; + const char* _vmaddr_slide; + + static const char* add(const void* base, uint64_t offset) { + return (const char*)base + offset; + } + + void findSymbolPtrSection(const segment_command_64* sc, const section_64** section_ptr) { + const section_64* section = (const section_64*)add(sc, sizeof(segment_command_64)); + for (uint32_t i = 0; i < sc->nsects; i++) { + uint32_t section_type = section->flags & SECTION_TYPE; + if (section_type == S_NON_LAZY_SYMBOL_POINTERS) { + section_ptr[0] = section; + } else if (section_type == S_LAZY_SYMBOL_POINTERS) { + section_ptr[1] = section; + } + section++; + } + } + + const section_64* findSection(const segment_command_64* sc, const char* section_name) { + const section_64* section = (const section_64*)add(sc, sizeof(segment_command_64)); + for (uint32_t i = 0; i < sc->nsects; i++) { + if (strcmp(section->sectname, section_name) == 0) { + return section; + } + section++; + } + return NULL; + } + + void loadSymbols(const symtab_command* symtab, const char* link_base) { + const nlist_64* sym = (const nlist_64*)add(link_base, symtab->symoff); + const char* str_table = add(link_base, symtab->stroff); + bool debug_symbols = false; + + for (uint32_t i = 0; i < symtab->nsyms; i++) { + if ((sym->n_type & 0xee) == 0x0e && sym->n_value != 0) { + const char* addr = _vmaddr_slide + sym->n_value; + const char* name = str_table + sym->n_un.n_strx; + if (name[0] == '_') name++; + _cc->add(addr, 0, name); + debug_symbols = true; + } + sym++; + } + + _cc->setDebugSymbols(debug_symbols); + } + + void loadStubSymbols(const symtab_command* symtab, const dysymtab_command* dysymtab, + const section_64* stubs_section, const char* link_base) { + const nlist_64* sym = (const nlist_64*)add(link_base, symtab->symoff); + const char* str_table = add(link_base, symtab->stroff); + + const uint32_t* isym = (const uint32_t*)add(link_base, dysymtab->indirectsymoff) + stubs_section->reserved1; + uint32_t isym_count = stubs_section->size / stubs_section->reserved2; + const char* stubs_start = _vmaddr_slide + stubs_section->addr; + + for (uint32_t i = 0; i < isym_count; i++) { + if ((isym[i] & (INDIRECT_SYMBOL_LOCAL | INDIRECT_SYMBOL_ABS)) == 0) { + const char* name = str_table + sym[isym[i]].n_un.n_strx; + if (name[0] == '_') name++; + + char stub_name[256]; + snprintf(stub_name, sizeof(stub_name), "stub:%s", name); + _cc->add(stubs_start + i * stubs_section->reserved2, stubs_section->reserved2, stub_name); + } + } + + _cc->setPlt(stubs_section->addr, isym_count * stubs_section->reserved2); + } + + void loadImports(const symtab_command* symtab, const dysymtab_command* dysymtab, + const section_64* symbol_ptr_section, const char* link_base) { + const nlist_64* sym = (const nlist_64*)add(link_base, symtab->symoff); + const char* str_table = add(link_base, symtab->stroff); + + const uint32_t* isym = (const uint32_t*)add(link_base, dysymtab->indirectsymoff) + symbol_ptr_section->reserved1; + uint32_t isym_count = symbol_ptr_section->size / sizeof(void*); + void** slot = (void**)(_vmaddr_slide + symbol_ptr_section->addr); + + for (uint32_t i = 0; i < isym_count; i++) { + if ((isym[i] & (INDIRECT_SYMBOL_LOCAL | INDIRECT_SYMBOL_ABS)) == 0) { + const char* name = str_table + sym[isym[i]].n_un.n_strx; + if (name[0] == '_') name++; + _cc->addImport(&slot[i], name); + } + } + } + + public: + MachOParser(CodeCache* cc, const mach_header* image_base, const char* vmaddr_slide) : + _cc(cc), _image_base(image_base), _vmaddr_slide(vmaddr_slide) {} + + bool parse() { + if (_image_base->magic != MH_MAGIC_64) { + return false; + } + + const mach_header_64* header = (const mach_header_64*)_image_base; + const load_command* lc = (const load_command*)(header + 1); + + const char* link_base = NULL; + const section_64* symbol_ptr[2] = {NULL, NULL}; + const symtab_command* symtab = NULL; + const dysymtab_command* dysymtab = NULL; + const section_64* stubs_section = NULL; + + for (uint32_t i = 0; i < header->ncmds; i++) { + if (lc->cmd == LC_SEGMENT_64) { + const segment_command_64* sc = (const segment_command_64*)lc; + if (strcmp(sc->segname, "__TEXT") == 0) { + _cc->updateBounds(_image_base, add(_image_base, sc->vmsize)); + stubs_section = findSection(sc, "__stubs"); + } else if (strcmp(sc->segname, "__LINKEDIT") == 0) { + link_base = _vmaddr_slide + sc->vmaddr - sc->fileoff; + } else if (strcmp(sc->segname, "__DATA") == 0 || strcmp(sc->segname, "__DATA_CONST") == 0) { + findSymbolPtrSection(sc, symbol_ptr); + } + } else if (lc->cmd == LC_SYMTAB) { + symtab = (const symtab_command*)lc; + } else if (lc->cmd == LC_DYSYMTAB) { + dysymtab = (const dysymtab_command*)lc; + } + lc = (const load_command*)add(lc, lc->cmdsize); + } + + if (symtab != NULL && link_base != NULL) { + loadSymbols(symtab, link_base); + + if (dysymtab != NULL) { + if (symbol_ptr[0] != NULL) loadImports(symtab, dysymtab, symbol_ptr[0], link_base); + if (symbol_ptr[1] != NULL) loadImports(symtab, dysymtab, symbol_ptr[1], link_base); + if (stubs_section != NULL) loadStubSymbols(symtab, dysymtab, stubs_section, link_base); + } + } + + return true; + } +}; + + +Mutex Symbols::_parse_lock; +bool Symbols::_have_kernel_symbols = false; +bool Symbols::_libs_limit_reported = false; +static std::unordered_set _parsed_libraries; + +void Symbols::parseKernelSymbols(CodeCache* cc) { +} + +void Symbols::parseLibraries(CodeCacheArray* array, bool kernel_symbols) { + MutexLocker ml(_parse_lock); + uint32_t images = _dyld_image_count(); + + for (uint32_t i = 0; i < images; i++) { + const mach_header* image_base = _dyld_get_image_header(i); + if (image_base == NULL || !_parsed_libraries.insert(image_base).second) { + continue; // the library was already parsed + } + + int count = array->count(); + if (count >= MAX_NATIVE_LIBS) { + if (!_libs_limit_reported) { + Log::warn("Number of parsed libraries reached the limit of %d", MAX_NATIVE_LIBS); + _libs_limit_reported = true; + } + break; + } + + const char* path = _dyld_get_image_name(i); + const char* vmaddr_slide = (const char*)_dyld_get_image_vmaddr_slide(i); + + CodeCache* cc = new CodeCache(path, count); + cc->setTextBase(vmaddr_slide); + + UnloadProtection handle(cc); + if (handle.isValid()) { + MachOParser parser(cc, image_base, vmaddr_slide); + if (!parser.parse()) { + Log::warn("Could not parse symbols from %s", path); + } + cc->sort(); + array->add(cc); + } else { + delete cc; + } + } +} + +void Symbols::initLibraryRanges() { + // No initialization needed on macOS +} + +bool Symbols::isLibcOrPthreadAddress(uintptr_t pc) { + return false; +} + +#endif // __APPLE__ diff --git a/ddprof-lib/src/main/cpp/trap.cpp b/ddprof-lib/src/main/cpp/trap.cpp new file mode 100644 index 00000000..38e47fc4 --- /dev/null +++ b/ddprof-lib/src/main/cpp/trap.cpp @@ -0,0 +1,64 @@ +/* + * Copyright The async-profiler authors + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include "trap.h" +#include "os.h" + + +uintptr_t Trap::_page_start[TRAP_COUNT] = {0}; + + +bool Trap::isFaultInstruction(uintptr_t pc) { + for (int i = 0; i < TRAP_COUNT; i++) { + if (pc - _page_start[i] < OS::page_size) { + return true; + } + } + return false; +} + +void Trap::assign(const void* address, uintptr_t offset) { + _entry = (uintptr_t)address; + if (_entry == 0) { + return; + } + _entry += offset; + +#if defined(__arm__) || defined(__thumb__) + _breakpoint_insn = (_entry & 1) ? BREAKPOINT_THUMB : BREAKPOINT; + _entry &= ~(uintptr_t)1; +#endif + + _saved_insn = *(instruction_t*)_entry; + _page_start[_id] = _entry & -OS::page_size; +} + +// Two allocation traps are always enabled/disabled together. +// If both traps belong to the same page, protect/unprotect it just once. +void Trap::pair(Trap& second) { + if (_page_start[_id] == _page_start[second._id]) { + _protect = false; + second._unprotect = false; + } +} + +// Patch instruction at the entry point +bool Trap::patch(instruction_t insn) { + if (_unprotect) { + int prot = WX_MEMORY ? (PROT_READ | PROT_WRITE) : (PROT_READ | PROT_WRITE | PROT_EXEC); + if (OS::mprotect((void*)(_entry & -OS::page_size), OS::page_size, prot) != 0) { + return false; + } + } + + *(instruction_t*)_entry = insn; + flushCache(_entry); + + if (_protect) { + OS::mprotect((void*)(_entry & -OS::page_size), OS::page_size, PROT_READ | PROT_EXEC); + } + return true; +} diff --git a/ddprof-lib/src/main/cpp/trap.h b/ddprof-lib/src/main/cpp/trap.h new file mode 100644 index 00000000..97620e44 --- /dev/null +++ b/ddprof-lib/src/main/cpp/trap.h @@ -0,0 +1,56 @@ +/* + * Copyright The async-profiler authors + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _TRAP_H +#define _TRAP_H + +#include +#include "arch.h" + + +const int TRAP_COUNT = 4; + + +class Trap { + private: + int _id; + bool _unprotect; + bool _protect; + uintptr_t _entry; + instruction_t _breakpoint_insn; + instruction_t _saved_insn; + + bool patch(instruction_t insn); + + static uintptr_t _page_start[TRAP_COUNT]; + + public: + Trap(int id) : _id(id), _unprotect(true), _protect(WX_MEMORY), _entry(0), _breakpoint_insn(BREAKPOINT) { + } + + uintptr_t entry() { + return _entry; + } + + bool covers(uintptr_t pc) { + // PC points either to BREAKPOINT instruction or to the next one + return pc - _entry <= sizeof(instruction_t); + } + + void assign(const void* address, uintptr_t offset = BREAKPOINT_OFFSET); + void pair(Trap& second); + + bool install() { + return _entry == 0 || patch(_breakpoint_insn); + } + + bool uninstall() { + return _entry == 0 || patch(_saved_insn); + } + + static bool isFaultInstruction(uintptr_t pc); +}; + +#endif // _TRAP_H diff --git a/ddprof-lib/src/main/cpp/tsc.cpp b/ddprof-lib/src/main/cpp/tsc.cpp new file mode 100644 index 00000000..8811d443 --- /dev/null +++ b/ddprof-lib/src/main/cpp/tsc.cpp @@ -0,0 +1,54 @@ +/* + * Copyright The async-profiler authors + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include "tsc.h" +#include "vmEntry.h" + + +bool TSC::_initialized = false; +bool TSC::_available = false; +bool TSC::_enabled = false; +u64 TSC::_offset = 0; +u64 TSC::_frequency = NANOTIME_FREQ; + +void TSC::enable(Clock clock) { + if (!TSC_SUPPORTED || clock == CLK_MONOTONIC) { + _enabled = false; + return; + } + + if (!_initialized) { + if (VM::loaded()) { + JNIEnv* env = VM::jni(); + + jfieldID jvm; + jmethodID getTicksFrequency, counterTime; + jclass cls = env->FindClass("jdk/jfr/internal/JVM"); + if (cls != NULL + && ((jvm = env->GetStaticFieldID(cls, "jvm", "Ljdk/jfr/internal/JVM;")) != NULL) + && ((getTicksFrequency = env->GetMethodID(cls, "getTicksFrequency", "()J")) != NULL) + && ((counterTime = env->GetStaticMethodID(cls, "counterTime", "()J")) != NULL)) { + u64 frequency = env->CallLongMethod(env->GetStaticObjectField(cls, jvm), getTicksFrequency); + if (frequency > NANOTIME_FREQ) { + // Default 1GHz frequency might mean that rdtsc is not available + u64 jvm_ticks = env->CallStaticLongMethod(cls, counterTime); + _offset = rdtsc() - jvm_ticks; + _frequency = frequency; + _available = true; + } + } + + env->ExceptionClear(); + } else if (cpuHasGoodTimestampCounter()) { + _offset = 0; + _available = true; + } + + _initialized = true; + } + + _enabled = _available; +} diff --git a/ddprof-lib/src/main/cpp/tsc.h b/ddprof-lib/src/main/cpp/tsc.h new file mode 100644 index 00000000..1874b9ed --- /dev/null +++ b/ddprof-lib/src/main/cpp/tsc.h @@ -0,0 +1,105 @@ +/* + * Copyright The async-profiler authors + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _TSC_H +#define _TSC_H + +#include "arguments.h" +#include "os.h" + + +const u64 NANOTIME_FREQ = 1000000000; + + +#if defined(__x86_64__) || defined(__i386__) + +#include + +#define TSC_SUPPORTED true + +static inline u64 rdtsc() { +#if defined(__x86_64__) + u32 lo, hi; + asm volatile("rdtsc" : "=a" (lo), "=d" (hi)); + return ((u64)hi << 32) | lo; +#else + u64 result; + asm volatile("rdtsc" : "=A" (result)); + return result; +#endif +} + +// Returns true if this CPU has a good ("invariant") timestamp counter +static bool cpuHasGoodTimestampCounter() { + unsigned int eax, ebx, ecx, edx; + + // Check if CPUID supports misc feature flags + __cpuid(0x80000000, eax, ebx, ecx, edx); + if (eax < 0x80000007) { + return 0; + } + + // Get misc feature flags + __cpuid(0x80000007, eax, ebx, ecx, edx); + + // Bit 8 of EDX indicates invariant TSC + return (edx & (1 << 8)) != 0; +} + +#elif defined(__aarch64__) + +#define TSC_SUPPORTED true + +static inline u64 rdtsc() { + u64 value; + asm volatile("mrs %0, cntvct_el0" : "=r"(value)); + return value; +} + +static bool cpuHasGoodTimestampCounter() { + // AARCH64 always has a good timestamp counter. + return true; +} + +#else + +#define TSC_SUPPORTED false +#define rdtsc() 0 + +static bool cpuHasGoodTimestampCounter() { + return false; +} + +#endif + + +class TSC { + private: + static bool _initialized; + static bool _available; + static bool _enabled; + static u64 _offset; + static u64 _frequency; + + public: + static void enable(Clock clock); + + static bool enabled() { + return TSC_SUPPORTED && _enabled; + } + + static u64 ticks() { + return enabled() ? rdtsc() - _offset : OS::nanotime(); + } + + // Ticks per second. + // When using the TSC with no JVM, since there is no calibration, + // this function will return an incorrect value. + static u64 frequency() { + return enabled() ? _frequency : NANOTIME_FREQ; + } +}; + +#endif // _TSC_H diff --git a/ddprof-lib/src/main/cpp/vmStructs.cpp b/ddprof-lib/src/main/cpp/vmStructs.cpp new file mode 100644 index 00000000..e7f3b66d --- /dev/null +++ b/ddprof-lib/src/main/cpp/vmStructs.cpp @@ -0,0 +1,762 @@ +/* + * Copyright The async-profiler authors + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include "vmStructs.h" +#include "vmEntry.h" +#include "j9Ext.h" +#include "safeAccess.h" + + +CodeCache* VMStructs::_libjvm = NULL; + +bool VMStructs::_has_class_names = false; +bool VMStructs::_has_method_structs = false; +bool VMStructs::_has_compiler_structs = false; +bool VMStructs::_has_stack_structs = false; +bool VMStructs::_has_class_loader_data = false; +bool VMStructs::_has_native_thread_id = false; +bool VMStructs::_has_perm_gen = false; +bool VMStructs::_can_dereference_jmethod_id = false; +bool VMStructs::_compact_object_headers = false; + +int VMStructs::_klass_name_offset = -1; +int VMStructs::_symbol_length_offset = -1; +int VMStructs::_symbol_length_and_refcount_offset = -1; +int VMStructs::_symbol_body_offset = -1; +int VMStructs::_oop_klass_offset = -1; +int VMStructs::_class_loader_data_offset = -1; +int VMStructs::_class_loader_data_next_offset = -1; +int VMStructs::_methods_offset = -1; +int VMStructs::_jmethod_ids_offset = -1; +int VMStructs::_thread_osthread_offset = -1; +int VMStructs::_thread_anchor_offset = -1; +int VMStructs::_thread_state_offset = -1; +int VMStructs::_thread_vframe_offset = -1; +int VMStructs::_thread_exception_offset = -1; +int VMStructs::_osthread_id_offset = -1; +int VMStructs::_call_wrapper_anchor_offset = -1; +int VMStructs::_comp_env_offset = -1; +int VMStructs::_comp_task_offset = -1; +int VMStructs::_comp_method_offset = -1; +int VMStructs::_anchor_sp_offset = -1; +int VMStructs::_anchor_pc_offset = -1; +int VMStructs::_anchor_fp_offset = -1; +int VMStructs::_blob_size_offset = -1; +int VMStructs::_frame_size_offset = -1; +int VMStructs::_frame_complete_offset = -1; +int VMStructs::_code_offset = -1; +int VMStructs::_data_offset = -1; +int VMStructs::_mutable_data_offset = -1; +int VMStructs::_relocation_size_offset = -1; +int VMStructs::_scopes_pcs_offset = -1; +int VMStructs::_scopes_data_offset = -1; +int VMStructs::_nmethod_name_offset = -1; +int VMStructs::_nmethod_method_offset = -1; +int VMStructs::_nmethod_entry_offset = -1; +int VMStructs::_nmethod_state_offset = -1; +int VMStructs::_nmethod_level_offset = -1; +int VMStructs::_nmethod_metadata_offset = -1; +int VMStructs::_nmethod_immutable_offset = -1; +int VMStructs::_method_constmethod_offset = -1; +int VMStructs::_method_code_offset = -1; +int VMStructs::_constmethod_constants_offset = -1; +int VMStructs::_constmethod_idnum_offset = -1; +int VMStructs::_constmethod_size = -1; +int VMStructs::_pool_holder_offset = -1; +int VMStructs::_array_len_offset = 0; +int VMStructs::_array_data_offset = -1; +int VMStructs::_code_heap_memory_offset = -1; +int VMStructs::_code_heap_segmap_offset = -1; +int VMStructs::_code_heap_segment_shift = -1; +int VMStructs::_heap_block_used_offset = -1; +int VMStructs::_vs_low_bound_offset = -1; +int VMStructs::_vs_high_bound_offset = -1; +int VMStructs::_vs_low_offset = -1; +int VMStructs::_vs_high_offset = -1; +int VMStructs::_flag_name_offset = -1; +int VMStructs::_flag_addr_offset = -1; +int VMStructs::_flag_origin_offset = -1; +const char* VMStructs::_flags_addr = NULL; +int VMStructs::_flag_count = 0; +int VMStructs::_flag_size = 0; +char* VMStructs::_code_heap[3] = {}; +const void* VMStructs::_code_heap_low = NO_MIN_ADDRESS; +const void* VMStructs::_code_heap_high = NO_MAX_ADDRESS; +char** VMStructs::_code_heap_addr = NULL; +const void** VMStructs::_code_heap_low_addr = NULL; +const void** VMStructs::_code_heap_high_addr = NULL; +int* VMStructs::_klass_offset_addr = NULL; +char** VMStructs::_narrow_klass_base_addr = NULL; +char* VMStructs::_narrow_klass_base = NULL; +int* VMStructs::_narrow_klass_shift_addr = NULL; +int VMStructs::_narrow_klass_shift = -1; +char** VMStructs::_collected_heap_addr = NULL; +char* VMStructs::_collected_heap = NULL; +int VMStructs::_collected_heap_reserved_offset = -1; +int VMStructs::_region_start_offset = -1; +int VMStructs::_region_size_offset = -1; +int VMStructs::_markword_klass_shift = -1; +int VMStructs::_markword_monitor_value = -1; +int VMStructs::_entry_frame_call_wrapper_offset = -1; +int VMStructs::_interpreter_frame_bcp_offset = 0; +unsigned char VMStructs::_unsigned5_base = 0; +const void** VMStructs::_call_stub_return_addr = NULL; +const void* VMStructs::_call_stub_return = NULL; +const void* VMStructs::_interpreted_frame_valid_start = NULL; +const void* VMStructs::_interpreted_frame_valid_end = NULL; + +jfieldID VMStructs::_eetop; +jfieldID VMStructs::_tid; +jfieldID VMStructs::_klass = NULL; +int VMStructs::_tls_index = -1; +intptr_t VMStructs::_env_offset = -1; +void* VMStructs::_java_thread_vtbl[6]; + +VMStructs::LockFunc VMStructs::_lock_func; +VMStructs::LockFunc VMStructs::_unlock_func; + + +uintptr_t VMStructs::readSymbol(const char* symbol_name) { + const void* symbol = _libjvm->findSymbol(symbol_name); + if (symbol == NULL) { + // Avoid JVM crash in case of missing symbols + return 0; + } + return *(uintptr_t*)symbol; +} + +// Run at agent load time +void VMStructs::init(CodeCache* libjvm) { + if (libjvm != NULL) { + _libjvm = libjvm; + initOffsets(); + initJvmFunctions(); + } +} + +// Run when VM is initialized and JNI is available +void VMStructs::ready() { + resolveOffsets(); + patchSafeFetch(); + initThreadBridge(); +} + +void VMStructs::initOffsets() { + uintptr_t entry = readSymbol("gHotSpotVMStructs"); + uintptr_t stride = readSymbol("gHotSpotVMStructEntryArrayStride"); + uintptr_t type_offset = readSymbol("gHotSpotVMStructEntryTypeNameOffset"); + uintptr_t field_offset = readSymbol("gHotSpotVMStructEntryFieldNameOffset"); + uintptr_t offset_offset = readSymbol("gHotSpotVMStructEntryOffsetOffset"); + uintptr_t address_offset = readSymbol("gHotSpotVMStructEntryAddressOffset"); + + if (entry != 0 && stride != 0) { + for (;; entry += stride) { + const char* type = *(const char**)(entry + type_offset); + const char* field = *(const char**)(entry + field_offset); + if (type == NULL || field == NULL) { + break; + } + + if (strcmp(type, "Klass") == 0) { + if (strcmp(field, "_name") == 0) { + _klass_name_offset = *(int*)(entry + offset_offset); + } + } else if (strcmp(type, "Symbol") == 0) { + if (strcmp(field, "_length") == 0) { + _symbol_length_offset = *(int*)(entry + offset_offset); + } else if (strcmp(field, "_length_and_refcount") == 0) { + _symbol_length_and_refcount_offset = *(int*)(entry + offset_offset); + } else if (strcmp(field, "_body") == 0) { + _symbol_body_offset = *(int*)(entry + offset_offset); + } + } else if (strcmp(type, "oopDesc") == 0) { + if (strcmp(field, "_metadata._klass") == 0) { + _oop_klass_offset = *(int*)(entry + offset_offset); + } + } else if (strcmp(type, "Universe") == 0 || strcmp(type, "CompressedKlassPointers") == 0) { + if (strcmp(field, "_narrow_klass._base") == 0 || strcmp(field, "_base") == 0) { + _narrow_klass_base_addr = *(char***)(entry + address_offset); + } else if (strcmp(field, "_narrow_klass._shift") == 0 || strcmp(field, "_shift") == 0) { + _narrow_klass_shift_addr = *(int**)(entry + address_offset); + } else if (strcmp(field, "_collectedHeap") == 0) { + _collected_heap_addr = *(char***)(entry + address_offset); + } + } else if (strcmp(type, "CollectedHeap") == 0) { + if (strcmp(field, "_reserved") == 0) { + _collected_heap_reserved_offset = *(int*)(entry + offset_offset); + } + } else if (strcmp(type, "MemRegion") == 0) { + if (strcmp(field, "_start") == 0) { + _region_start_offset = *(int*)(entry + offset_offset); + } else if (strcmp(field, "_word_size") == 0) { + _region_size_offset = *(int*)(entry + offset_offset); + } + } else if (strcmp(type, "CompiledMethod") == 0 || strcmp(type, "nmethod") == 0) { + if (strcmp(field, "_method") == 0) { + _nmethod_method_offset = *(int*)(entry + offset_offset); + } else if (strcmp(field, "_verified_entry_offset") == 0) { + _nmethod_entry_offset = *(int*)(entry + offset_offset); + } else if (strcmp(field, "_verified_entry_point") == 0) { + _nmethod_entry_offset = - *(int*)(entry + offset_offset); + } else if (strcmp(field, "_state") == 0) { + _nmethod_state_offset = *(int*)(entry + offset_offset); + } else if (strcmp(field, "_comp_level") == 0) { + _nmethod_level_offset = *(int*)(entry + offset_offset); + } else if (strcmp(field, "_metadata_offset") == 0) { + _nmethod_metadata_offset = *(int*)(entry + offset_offset); + } else if (strcmp(field, "_immutable_data") == 0) { + _nmethod_immutable_offset = *(int*)(entry + offset_offset); + } else if (strcmp(field, "_scopes_pcs_offset") == 0) { + _scopes_pcs_offset = *(int*)(entry + offset_offset); + } else if (strcmp(field, "_scopes_data_offset") == 0) { + _scopes_data_offset = *(int*)(entry + offset_offset); + } else if (strcmp(field, "_scopes_data_begin") == 0) { + _scopes_data_offset = - *(int*)(entry + offset_offset); + } + } else if (strcmp(type, "Method") == 0) { + if (strcmp(field, "_constMethod") == 0) { + _method_constmethod_offset = *(int*)(entry + offset_offset); + } else if (strcmp(field, "_code") == 0) { + _method_code_offset = *(int*)(entry + offset_offset); + } + } else if (strcmp(type, "ConstMethod") == 0) { + if (strcmp(field, "_constants") == 0) { + _constmethod_constants_offset = *(int*)(entry + offset_offset); + } else if (strcmp(field, "_method_idnum") == 0) { + _constmethod_idnum_offset = *(int*)(entry + offset_offset); + } + } else if (strcmp(type, "ConstantPool") == 0) { + if (strcmp(field, "_pool_holder") == 0) { + _pool_holder_offset = *(int*)(entry + offset_offset); + } + } else if (strcmp(type, "InstanceKlass") == 0) { + if (strcmp(field, "_class_loader_data") == 0) { + _class_loader_data_offset = *(int*)(entry + offset_offset); + } else if (strcmp(field, "_methods") == 0) { + _methods_offset = *(int*)(entry + offset_offset); + } else if (strcmp(field, "_methods_jmethod_ids") == 0) { + _jmethod_ids_offset = *(int*)(entry + offset_offset); + } + } else if (strcmp(type, "ClassLoaderData") == 0) { + if (strcmp(field, "_next") == 0) { + _class_loader_data_next_offset = *(int*)(entry + offset_offset); + } + } else if (strcmp(type, "java_lang_Class") == 0) { + if (strcmp(field, "_klass_offset") == 0) { + _klass_offset_addr = *(int**)(entry + address_offset); + } + } else if (strcmp(type, "Thread") == 0) { + // Since JDK 25, _osthread field belongs to Thread rather than JavaThread + if (strcmp(field, "_osthread") == 0) { + _thread_osthread_offset = *(int*)(entry + offset_offset); + } + } else if (strcmp(type, "JavaThread") == 0) { + if (strcmp(field, "_osthread") == 0) { + _thread_osthread_offset = *(int*)(entry + offset_offset); + } else if (strcmp(field, "_anchor") == 0) { + _thread_anchor_offset = *(int*)(entry + offset_offset); + } else if (strcmp(field, "_thread_state") == 0) { + _thread_state_offset = *(int*)(entry + offset_offset); + } else if (strcmp(field, "_vframe_array_head") == 0) { + _thread_vframe_offset = *(int*)(entry + offset_offset); + } + } else if (strcmp(type, "ThreadShadow") == 0) { + if (strcmp(field, "_exception_file") == 0) { + _thread_exception_offset = *(int*)(entry + offset_offset); + } + } else if (strcmp(type, "OSThread") == 0) { + if (strcmp(field, "_thread_id") == 0) { + _osthread_id_offset = *(int*)(entry + offset_offset); + } + } else if (strcmp(type, "CompilerThread") == 0) { + if (strcmp(field, "_env") == 0) { + _comp_env_offset = *(int*)(entry + offset_offset); + } + } else if (strcmp(type, "ciEnv") == 0) { + if (strcmp(field, "_task") == 0) { + _comp_task_offset = *(int*)(entry + offset_offset); + } + } else if (strcmp(type, "CompileTask") == 0) { + if (strcmp(field, "_method") == 0) { + _comp_method_offset = *(int*)(entry + offset_offset); + } + } else if (strcmp(type, "JavaCallWrapper") == 0) { + if (strcmp(field, "_anchor") == 0) { + _call_wrapper_anchor_offset = *(int*)(entry + offset_offset); + } + } else if (strcmp(type, "JavaFrameAnchor") == 0) { + if (strcmp(field, "_last_Java_sp") == 0) { + _anchor_sp_offset = *(int*)(entry + offset_offset); + } else if (strcmp(field, "_last_Java_pc") == 0) { + _anchor_pc_offset = *(int*)(entry + offset_offset); + } else if (strcmp(field, "_last_Java_fp") == 0) { + _anchor_fp_offset = *(int*)(entry + offset_offset); + } + } else if (strcmp(type, "CodeBlob") == 0) { + if (strcmp(field, "_size") == 0) { + _blob_size_offset = *(int*)(entry + offset_offset); + } else if (strcmp(field, "_frame_size") == 0) { + _frame_size_offset = *(int*)(entry + offset_offset); + } else if (strcmp(field, "_frame_complete_offset") == 0) { + _frame_complete_offset = *(int*)(entry + offset_offset); + } else if (strcmp(field, "_code_offset") == 0) { + _code_offset = *(int*)(entry + offset_offset); + } else if (strcmp(field, "_code_begin") == 0) { + _code_offset = - *(int*)(entry + offset_offset); + } else if (strcmp(field, "_data_offset") == 0) { + _data_offset = *(int*)(entry + offset_offset); + } else if (strcmp(field, "_mutable_data") == 0) { + _mutable_data_offset = *(int*)(entry + offset_offset); + } else if (strcmp(field, "_relocation_size") == 0) { + _relocation_size_offset = *(int*)(entry + offset_offset); + } else if (strcmp(field, "_name") == 0) { + _nmethod_name_offset = *(int*)(entry + offset_offset); + } + } else if (strcmp(type, "CodeCache") == 0) { + if (strcmp(field, "_heap") == 0) { + _code_heap_addr = *(char***)(entry + address_offset); + } else if (strcmp(field, "_heaps") == 0) { + _code_heap_addr = *(char***)(entry + address_offset); + } else if (strcmp(field, "_low_bound") == 0) { + _code_heap_low_addr = *(const void***)(entry + address_offset); + } else if (strcmp(field, "_high_bound") == 0) { + _code_heap_high_addr = *(const void***)(entry + address_offset); + } + } else if (strcmp(type, "CodeHeap") == 0) { + if (strcmp(field, "_memory") == 0) { + _code_heap_memory_offset = *(int*)(entry + offset_offset); + } else if (strcmp(field, "_segmap") == 0) { + _code_heap_segmap_offset = *(int*)(entry + offset_offset); + } else if (strcmp(field, "_log2_segment_size") == 0) { + _code_heap_segment_shift = *(int*)(entry + offset_offset); + } + } else if (strcmp(type, "HeapBlock::Header") == 0) { + if (strcmp(field, "_used") == 0) { + _heap_block_used_offset = *(int*)(entry + offset_offset); + } + } else if (strcmp(type, "VirtualSpace") == 0) { + if (strcmp(field, "_low_boundary") == 0) { + _vs_low_bound_offset = *(int*)(entry + offset_offset); + } else if (strcmp(field, "_high_boundary") == 0) { + _vs_high_bound_offset = *(int*)(entry + offset_offset); + } else if (strcmp(field, "_low") == 0) { + _vs_low_offset = *(int*)(entry + offset_offset); + } else if (strcmp(field, "_high") == 0) { + _vs_high_offset = *(int*)(entry + offset_offset); + } + } else if (strcmp(type, "StubRoutines") == 0) { + if (strcmp(field, "_call_stub_return_address") == 0) { + _call_stub_return_addr = *(const void***)(entry + address_offset); + } + } else if (strcmp(type, "GrowableArrayBase") == 0 || strcmp(type, "GenericGrowableArray") == 0) { + if (strcmp(field, "_len") == 0) { + _array_len_offset = *(int*)(entry + offset_offset); + } + } else if (strcmp(type, "GrowableArray") == 0) { + if (strcmp(field, "_data") == 0) { + _array_data_offset = *(int*)(entry + offset_offset); + } + } else if (strcmp(type, "JVMFlag") == 0 || strcmp(type, "Flag") == 0) { + if (strcmp(field, "_name") == 0 || strcmp(field, "name") == 0) { + _flag_name_offset = *(int*)(entry + offset_offset); + } else if (strcmp(field, "_addr") == 0 || strcmp(field, "addr") == 0) { + _flag_addr_offset = *(int*)(entry + offset_offset); + } else if (strcmp(field, "_flags") == 0 || strcmp(field, "origin") == 0) { + _flag_origin_offset = *(int*)(entry + offset_offset); + } else if (strcmp(field, "flags") == 0) { + _flags_addr = **(char***)(entry + address_offset); + } else if (strcmp(field, "numFlags") == 0) { + _flag_count = **(int**)(entry + address_offset); + } + } else if (strcmp(type, "PcDesc") == 0) { + // TODO + } else if (strcmp(type, "PermGen") == 0) { + _has_perm_gen = true; + } + } + } + + entry = readSymbol("gHotSpotVMTypes"); + stride = readSymbol("gHotSpotVMTypeEntryArrayStride"); + type_offset = readSymbol("gHotSpotVMTypeEntryTypeNameOffset"); + uintptr_t size_offset = readSymbol("gHotSpotVMTypeEntrySizeOffset"); + + if (entry != 0 && stride != 0) { + for (;; entry += stride) { + const char* type = *(const char**)(entry + type_offset); + if (type == NULL) { + break; + } + + if (strcmp(type, "JVMFlag") == 0 || strcmp(type, "Flag") == 0) { + _flag_size = *(int*)(entry + size_offset); + } else if (strcmp(type, "ConstMethod") == 0) { + _constmethod_size = *(int*)(entry + size_offset); + } + } + } + + entry = readSymbol("gHotSpotVMLongConstants"); + stride = readSymbol("gHotSpotVMLongConstantEntryArrayStride"); + uintptr_t name_offset = readSymbol("gHotSpotVMLongConstantEntryNameOffset"); + uintptr_t value_offset = readSymbol("gHotSpotVMLongConstantEntryValueOffset"); + + if (entry != 0 && stride != 0) { + for (;; entry += stride) { + const char* name = *(const char**)(entry + name_offset); + if (name == NULL) { + break; + } + + if (strncmp(name, "markWord::", 10) == 0) { + if (strcmp(name + 10, "klass_shift") == 0) { + _markword_klass_shift = *(long*)(entry + value_offset); + } else if (strcmp(name + 10, "monitor_value") == 0) { + _markword_monitor_value = *(long*)(entry + value_offset); + } + } + } + } + + entry = readSymbol("gHotSpotVMIntConstants"); + stride = readSymbol("gHotSpotVMIntConstantEntryArrayStride"); + name_offset = readSymbol("gHotSpotVMIntConstantEntryNameOffset"); + value_offset = readSymbol("gHotSpotVMIntConstantEntryValueOffset"); + + if (entry != 0 && stride != 0) { + for (;; entry += stride) { + const char* name = *(const char**)(entry + name_offset); + if (name == NULL) { + break; + } + + if (strcmp(name, "frame::entry_frame_call_wrapper_offset") == 0) { + _entry_frame_call_wrapper_offset = *(int*)(entry + value_offset) * sizeof(uintptr_t); + break; // remove it for reading more constants + } + } + } +} + +void VMStructs::resolveOffsets() { + if (VM::isOpenJ9() || VM::isZing()) { + return; + } + + if (_klass_offset_addr != NULL) { + _klass = (jfieldID)(uintptr_t)(*_klass_offset_addr << 2 | 2); + } + + JVMFlag* ccp = JVMFlag::find("UseCompressedClassPointers"); + if (ccp != NULL && ccp->get() && _narrow_klass_base_addr != NULL && _narrow_klass_shift_addr != NULL) { + _narrow_klass_base = *_narrow_klass_base_addr; + _narrow_klass_shift = *_narrow_klass_shift_addr; + } + + JVMFlag* coh = JVMFlag::find("UseCompactObjectHeaders"); + if (coh != NULL && coh->get()) { + _compact_object_headers = true; + } + + _has_class_names = _klass_name_offset >= 0 + && (_compact_object_headers ? (_markword_klass_shift >= 0 && _markword_monitor_value == MONITOR_BIT) + : _oop_klass_offset >= 0) + && (_symbol_length_offset >= 0 || _symbol_length_and_refcount_offset >= 0) + && _symbol_body_offset >= 0 + && _klass != NULL; + + _has_method_structs = _jmethod_ids_offset >= 0 + && _nmethod_method_offset >= 0 + && _nmethod_entry_offset != -1 + && _nmethod_state_offset >= 0 + && _method_constmethod_offset >= 0 + && _method_code_offset >= 0 + && _constmethod_constants_offset >= 0 + && _constmethod_idnum_offset >= 0 + && _constmethod_size >= 0 + && _pool_holder_offset >= 0; + + _has_compiler_structs = _comp_env_offset >= 0 + && _comp_task_offset >= 0 + && _comp_method_offset >= 0; + + _has_class_loader_data = _class_loader_data_offset >= 0 + && _class_loader_data_next_offset == sizeof(uintptr_t) * 8 + 8 + && _methods_offset >= 0 + && _klass != NULL + && _lock_func != NULL && _unlock_func != NULL; + +#if defined(__x86_64__) || defined(__i386__) + _interpreter_frame_bcp_offset = VM::hotspot_version() >= 11 ? -8 : VM::hotspot_version() == 8 ? -7 : 0; +#elif defined(__aarch64__) + _interpreter_frame_bcp_offset = VM::hotspot_version() >= 11 ? -9 : VM::hotspot_version() == 8 ? -7 : 0; + // The constant is missing on ARM, but fortunately, it has been stable for years across all JDK versions + _entry_frame_call_wrapper_offset = -64; +#elif defined(__arm__) || defined(__thumb__) + _interpreter_frame_bcp_offset = VM::hotspot_version() >= 11 ? -8 : 0; + _entry_frame_call_wrapper_offset = 0; +#endif + + // JDK-8292758 has slightly changed ScopeDesc encoding + if (VM::hotspot_version() >= 20) { + _unsigned5_base = 1; + } + + if (_call_stub_return_addr != NULL) { + _call_stub_return = *_call_stub_return_addr; + } + + // Since JDK 23, _metadata_offset is relative to _data_offset. See metadata() + if (_nmethod_immutable_offset < 0) { + _data_offset = 0; + } + + _has_stack_structs = _has_method_structs + && _call_wrapper_anchor_offset >= 0 + && _entry_frame_call_wrapper_offset != -1 + && _interpreter_frame_bcp_offset != 0 + && _code_offset != -1 + && _data_offset >= 0 + && _scopes_data_offset != -1 + && _scopes_pcs_offset >= 0 + && ((_mutable_data_offset >= 0 && _relocation_size_offset >= 0) || _nmethod_metadata_offset >= 0) + && _thread_vframe_offset >= 0 + && _thread_exception_offset >= 0 + && _constmethod_size >= 0; + + // Since JDK-8268406, it is no longer possible to get VMMethod* by dereferencing jmethodID + _can_dereference_jmethod_id = _has_method_structs && VM::hotspot_version() <= 25; + + if (_code_heap_addr != NULL && _code_heap_low_addr != NULL && _code_heap_high_addr != NULL) { + char* code_heaps = *_code_heap_addr; + unsigned int code_heap_count = *(unsigned int*)(code_heaps + _array_len_offset); + if (code_heap_count <= 3 && _array_data_offset >= 0) { + char* code_heap_array = *(char**)(code_heaps + _array_data_offset); + memcpy(_code_heap, code_heap_array, code_heap_count * sizeof(_code_heap[0])); + } + _code_heap_low = *_code_heap_low_addr; + _code_heap_high = *_code_heap_high_addr; + } else if (_code_heap_addr != NULL && _code_heap_memory_offset >= 0) { + _code_heap[0] = *_code_heap_addr; + _code_heap_low = *(const void**)(_code_heap[0] + _code_heap_memory_offset + _vs_low_bound_offset); + _code_heap_high = *(const void**)(_code_heap[0] + _code_heap_memory_offset + _vs_high_bound_offset); + } + + // Invariant: _code_heap[i] != NULL iff all CodeHeap structures are available + if (_code_heap[0] != NULL && _code_heap_segment_shift >= 0) { + _code_heap_segment_shift = *(int*)(_code_heap[0] + _code_heap_segment_shift); + } + if (_code_heap_memory_offset < 0 || _code_heap_segmap_offset < 0 || + _code_heap_segment_shift < 0 || _code_heap_segment_shift > 16 || + _heap_block_used_offset < 0) { + memset(_code_heap, 0, sizeof(_code_heap)); + } + + if (_collected_heap_addr != NULL && _collected_heap_reserved_offset >= 0 && + _region_start_offset >= 0 && _region_size_offset >= 0) { + _collected_heap = *_collected_heap_addr + _collected_heap_reserved_offset; + } +} + +void VMStructs::initJvmFunctions() { + if (VM::hotspot_version() == 8) { + _lock_func = (LockFunc)_libjvm->findSymbol("_ZN7Monitor28lock_without_safepoint_checkEv"); + _unlock_func = (LockFunc)_libjvm->findSymbol("_ZN7Monitor6unlockEv"); + } + + if (VM::hotspot_version() > 0) { + CodeBlob* blob = _libjvm->findBlob("_ZNK5frame26is_interpreted_frame_validEP10JavaThread"); + if (blob != NULL) { + _interpreted_frame_valid_start = blob->_start; + _interpreted_frame_valid_end = blob->_end; + } + } +} + +void VMStructs::patchSafeFetch() { + // Workarounds for JDK-8307549 and JDK-8321116 + if (WX_MEMORY && VM::hotspot_version() == 17) { + void** entry = (void**)_libjvm->findSymbol("_ZN12StubRoutines18_safefetch32_entryE"); + if (entry != NULL) { + *entry = (void*)SafeAccess::load32; + } + } else if (WX_MEMORY && VM::hotspot_version() == 11) { + void** entry = (void**)_libjvm->findSymbol("_ZN12StubRoutines17_safefetchN_entryE"); + if (entry != NULL) { + *entry = (void*)SafeAccess::load; + } + } +} + +void VMStructs::initTLS(void* vm_thread) { + for (int i = 0; i < 1024; i++) { + if (pthread_getspecific((pthread_key_t)i) == vm_thread) { + _tls_index = i; + break; + } + } +} + +void VMStructs::initThreadBridge() { + jthread thread; + if (VM::jvmti()->GetCurrentThread(&thread) != 0) { + return; + } + + JNIEnv* env = VM::jni(); + jclass thread_class = env->FindClass("java/lang/Thread"); + if (thread_class == NULL || (_tid = env->GetFieldID(thread_class, "tid", "J")) == NULL) { + env->ExceptionClear(); + return; + } + + if (VM::isOpenJ9()) { + void* j9thread = J9Ext::j9thread_self(); + if (j9thread != NULL) { + initTLS(j9thread); + } + } else { + // Get eetop field - a bridge from Java Thread to VMThread + if ((_eetop = env->GetFieldID(thread_class, "eetop", "J")) == NULL) { + // No such field - probably not a HotSpot JVM + env->ExceptionClear(); + return; + } + + VMThread* vm_thread = VMThread::fromJavaThread(env, thread); + if (vm_thread != NULL) { + _has_native_thread_id = _thread_osthread_offset >= 0 && _osthread_id_offset >= 0; + initTLS(vm_thread); + _env_offset = (intptr_t)env - (intptr_t)vm_thread; + memcpy(_java_thread_vtbl, vm_thread->vtable(), sizeof(_java_thread_vtbl)); + } + } +} + +VMThread* VMThread::current() { + return _tls_index >= 0 ? (VMThread*)pthread_getspecific((pthread_key_t)_tls_index) : NULL; +} + +int VMThread::nativeThreadId(JNIEnv* jni, jthread thread) { + if (_has_native_thread_id) { + VMThread* vm_thread = fromJavaThread(jni, thread); + return vm_thread != NULL ? vm_thread->osThreadId() : -1; + } + return VM::isOpenJ9() ? J9Ext::GetOSThreadID(thread) : -1; +} + +int VMThread::osThreadId() { + const char* osthread = *(const char**) at(_thread_osthread_offset); + if (osthread != NULL) { + // Java thread may be in the middle of termination, and its osthread structure just released + return SafeAccess::load32((int32_t*)(osthread + _osthread_id_offset), -1); + } + return -1; +} + +JNIEnv* VMThread::jni() { + if (_env_offset < 0) { + return VM::jni(); // fallback for non-HotSpot JVM + } + return isJavaThread() ? (JNIEnv*) at(_env_offset) : NULL; +} + +jmethodID VMMethod::id() { + // We may find a bogus NMethod during stack walking, it does not always point to a valid VMMethod + const char* const_method = (const char*) SafeAccess::load((void**) at(_method_constmethod_offset)); + if (!goodPtr(const_method)) { + return NULL; + } + + const char* cpool = (const char*) SafeAccess::load((void**)(const_method + _constmethod_constants_offset)); + unsigned short num = (unsigned short) SafeAccess::load32((int32_t*)(const_method + _constmethod_idnum_offset), 0); + if (goodPtr(cpool)) { + VMKlass* holder = *(VMKlass**)(cpool + _pool_holder_offset); + if (goodPtr(holder)) { + jmethodID* ids = holder->jmethodIDs(); + if (ids != NULL && num < (size_t)ids[0]) { + return ids[num + 1]; + } + } + } + return NULL; +} + +jmethodID VMMethod::validatedId() { + jmethodID method_id = id(); + if (!_can_dereference_jmethod_id || (goodPtr(method_id) && *(VMMethod**)method_id == this)) { + return method_id; + } + return NULL; +} + +NMethod* CodeHeap::findNMethod(char* heap, const void* pc) { + unsigned char* heap_start = *(unsigned char**)(heap + _code_heap_memory_offset + _vs_low_offset); + unsigned char* segmap = *(unsigned char**)(heap + _code_heap_segmap_offset + _vs_low_offset); + size_t idx = ((unsigned char*)pc - heap_start) >> _code_heap_segment_shift; + + if (segmap[idx] == 0xff) { + return NULL; + } + while (segmap[idx] > 0) { + idx -= segmap[idx]; + } + + unsigned char* block = heap_start + (idx << _code_heap_segment_shift) + _heap_block_used_offset; + return *block ? align(block + sizeof(uintptr_t)) : NULL; +} + +JVMFlag* JVMFlag::find(const char* name) { + if (_flags_addr != NULL && _flag_size > 0) { + for (int i = 0; i < _flag_count; i++) { + JVMFlag* f = (JVMFlag*)(_flags_addr + i * _flag_size); + if (f->name() != NULL && strcmp(f->name(), name) == 0 && f->addr() != NULL) { + return f; + } + } + } + return NULL; +} + +int NMethod::findScopeOffset(const void* pc) { + intptr_t pc_offset = (const char*)pc - code(); + if (pc_offset < 0 || pc_offset > 0x7fffffff) { + return -1; + } + + const int* scopes_pcs = (const int*) at(_scopes_pcs_offset); + PcDesc* pcd = (PcDesc*) immutableDataAt(scopes_pcs[0]); + PcDesc* pcd_end = (PcDesc*) immutableDataAt(scopes_pcs[1]); + int low = 0; + int high = (pcd_end - pcd) - 1; + + while (low <= high) { + int mid = (unsigned int)(low + high) >> 1; + if (pcd[mid]._pc < pc_offset) { + low = mid + 1; + } else if (pcd[mid]._pc > pc_offset) { + high = mid - 1; + } else { + return pcd[mid]._scope_offset; + } + } + + return pcd + low < pcd_end ? pcd[low]._scope_offset : -1; +} + +int ScopeDesc::readInt() { + unsigned char c = *_stream++; + unsigned int n = c - _unsigned5_base; + if (c >= 192) { + for (int shift = 6; ; shift += 6) { + c = *_stream++; + n += (c - _unsigned5_base) << shift; + if (c < 192 || shift >= 24) break; + } + } + return n; +} diff --git a/ddprof-lib/src/main/cpp/vmStructs.h b/ddprof-lib/src/main/cpp/vmStructs.h new file mode 100644 index 00000000..1afd9e52 --- /dev/null +++ b/ddprof-lib/src/main/cpp/vmStructs.h @@ -0,0 +1,705 @@ +/* + * Copyright The async-profiler authors + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _VMSTRUCTS_H +#define _VMSTRUCTS_H + +#include +#include +#include +#include +#include "codeCache.h" + + +class VMStructs { + protected: + enum { MONITOR_BIT = 2 }; + + static CodeCache* _libjvm; + + static bool _has_class_names; + static bool _has_method_structs; + static bool _has_compiler_structs; + static bool _has_stack_structs; + static bool _has_class_loader_data; + static bool _has_native_thread_id; + static bool _has_perm_gen; + static bool _can_dereference_jmethod_id; + static bool _compact_object_headers; + + static int _klass_name_offset; + static int _symbol_length_offset; + static int _symbol_length_and_refcount_offset; + static int _symbol_body_offset; + static int _oop_klass_offset; + static int _class_loader_data_offset; + static int _class_loader_data_next_offset; + static int _methods_offset; + static int _jmethod_ids_offset; + static int _thread_osthread_offset; + static int _thread_anchor_offset; + static int _thread_state_offset; + static int _thread_vframe_offset; + static int _thread_exception_offset; + static int _osthread_id_offset; + static int _call_wrapper_anchor_offset; + static int _comp_env_offset; + static int _comp_task_offset; + static int _comp_method_offset; + static int _anchor_sp_offset; + static int _anchor_pc_offset; + static int _anchor_fp_offset; + static int _blob_size_offset; + static int _frame_size_offset; + static int _frame_complete_offset; + static int _code_offset; + static int _data_offset; + static int _mutable_data_offset; + static int _relocation_size_offset; + static int _scopes_pcs_offset; + static int _scopes_data_offset; + static int _nmethod_name_offset; + static int _nmethod_method_offset; + static int _nmethod_entry_offset; + static int _nmethod_state_offset; + static int _nmethod_level_offset; + static int _nmethod_metadata_offset; + static int _nmethod_immutable_offset; + static int _method_constmethod_offset; + static int _method_code_offset; + static int _constmethod_constants_offset; + static int _constmethod_idnum_offset; + static int _constmethod_size; + static int _pool_holder_offset; + static int _array_len_offset; + static int _array_data_offset; + static int _code_heap_memory_offset; + static int _code_heap_segmap_offset; + static int _code_heap_segment_shift; + static int _heap_block_used_offset; + static int _vs_low_bound_offset; + static int _vs_high_bound_offset; + static int _vs_low_offset; + static int _vs_high_offset; + static int _flag_name_offset; + static int _flag_addr_offset; + static int _flag_origin_offset; + static const char* _flags_addr; + static int _flag_count; + static int _flag_size; + static char* _code_heap[3]; + static const void* _code_heap_low; + static const void* _code_heap_high; + static char** _code_heap_addr; + static const void** _code_heap_low_addr; + static const void** _code_heap_high_addr; + static int* _klass_offset_addr; + static char** _narrow_klass_base_addr; + static char* _narrow_klass_base; + static int* _narrow_klass_shift_addr; + static int _narrow_klass_shift; + static char** _collected_heap_addr; + static char* _collected_heap; + static int _collected_heap_reserved_offset; + static int _region_start_offset; + static int _region_size_offset; + static int _markword_klass_shift; + static int _markword_monitor_value; + static int _entry_frame_call_wrapper_offset; + static int _interpreter_frame_bcp_offset; + static unsigned char _unsigned5_base; + static const void** _call_stub_return_addr; + static const void* _call_stub_return; + static const void* _interpreted_frame_valid_start; + static const void* _interpreted_frame_valid_end; + + static jfieldID _eetop; + static jfieldID _tid; + static jfieldID _klass; + static int _tls_index; + static intptr_t _env_offset; + static void* _java_thread_vtbl[6]; + + typedef void (*LockFunc)(void*); + static LockFunc _lock_func; + static LockFunc _unlock_func; + + static uintptr_t readSymbol(const char* symbol_name); + static void initOffsets(); + static void resolveOffsets(); + static void patchSafeFetch(); + static void initJvmFunctions(); + static void initTLS(void* vm_thread); + static void initThreadBridge(); + + const char* at(int offset) { + return (const char*)this + offset; + } + + static bool goodPtr(const void* ptr) { + return (uintptr_t)ptr >= 0x1000 && ((uintptr_t)ptr & (sizeof(uintptr_t) - 1)) == 0; + } + + template + static T align(const void* ptr) { + static_assert(std::is_pointer::value, "T must be a pointer type"); + return (T)((uintptr_t)ptr & ~(sizeof(T) - 1)); + } + + public: + static void init(CodeCache* libjvm); + static void ready(); + + static CodeCache* libjvm() { + return _libjvm; + } + + static bool hasClassNames() { + return _has_class_names; + } + + static bool hasMethodStructs() { + return _has_method_structs; + } + + static bool hasCompilerStructs() { + return _has_compiler_structs; + } + + static bool hasStackStructs() { + return _has_stack_structs; + } + + static bool hasClassLoaderData() { + return _has_class_loader_data; + } + + static bool hasNativeThreadId() { + return _has_native_thread_id; + } + + static bool hasJavaThreadId() { + return _tid != NULL; + } + + static bool isInterpretedFrameValidFunc(const void* pc) { + return pc >= _interpreted_frame_valid_start && pc < _interpreted_frame_valid_end; + } +}; + + +class MethodList { + public: + enum { SIZE = 8 }; + + private: + intptr_t _method[SIZE]; + int _ptr; + MethodList* _next; + int _padding; + + public: + MethodList(MethodList* next) : _ptr(0), _next(next), _padding(0) { + for (int i = 0; i < SIZE; i++) { + _method[i] = 0x37; + } + } +}; + + +class NMethod; +class VMMethod; + +class VMSymbol : VMStructs { + public: + unsigned short length() { + if (_symbol_length_offset >= 0) { + return *(unsigned short*) at(_symbol_length_offset); + } else { + return *(unsigned int*) at(_symbol_length_and_refcount_offset) >> 16; + } + } + + const char* body() { + return at(_symbol_body_offset); + } +}; + +class ClassLoaderData : VMStructs { + private: + void* mutex() { + return *(void**) at(sizeof(uintptr_t) * 3); + } + + public: + void lock() { + _lock_func(mutex()); + } + + void unlock() { + _unlock_func(mutex()); + } + + MethodList** methodList() { + return (MethodList**) at(sizeof(uintptr_t) * 6 + 8); + } +}; + +class VMKlass : VMStructs { + public: + static VMKlass* fromJavaClass(JNIEnv* env, jclass cls) { + if (_has_perm_gen) { + jobject klassOop = env->GetObjectField(cls, _klass); + return (VMKlass*)(*(uintptr_t**)klassOop + 2); + } else if (sizeof(VMKlass*) == 8) { + return (VMKlass*)(uintptr_t)env->GetLongField(cls, _klass); + } else { + return (VMKlass*)(uintptr_t)env->GetIntField(cls, _klass); + } + } + + static VMKlass* fromHandle(uintptr_t handle) { + if (_has_perm_gen) { + // On JDK 7 KlassHandle is a pointer to klassOop, hence one more indirection + return (VMKlass*)(*(uintptr_t**)handle + 2); + } else { + return (VMKlass*)handle; + } + } + + static VMKlass* fromOop(uintptr_t oop) { + if (_narrow_klass_shift >= 0) { + uintptr_t narrow_klass; + if (_compact_object_headers) { + uintptr_t mark = *(uintptr_t*)oop; + if (mark & MONITOR_BIT) { + mark = *(uintptr_t*)(mark ^ MONITOR_BIT); + } + narrow_klass = mark >> _markword_klass_shift; + } else { + narrow_klass = *(unsigned int*)(oop + _oop_klass_offset); + } + return (VMKlass*)(_narrow_klass_base + (narrow_klass << _narrow_klass_shift)); + } else { + return *(VMKlass**)(oop + _oop_klass_offset); + } + } + + VMSymbol* name() { + return *(VMSymbol**) at(_klass_name_offset); + } + + ClassLoaderData* classLoaderData() { + return *(ClassLoaderData**) at(_class_loader_data_offset); + } + + int methodCount() { + int* methods = *(int**) at(_methods_offset); + return methods == NULL ? 0 : *methods & 0xffff; + } + + jmethodID* jmethodIDs() { + return __atomic_load_n((jmethodID**) at(_jmethod_ids_offset), __ATOMIC_ACQUIRE); + } +}; + +class JavaFrameAnchor : VMStructs { + private: + enum { MAX_CALL_WRAPPER_DISTANCE = 512 }; + + public: + static JavaFrameAnchor* fromEntryFrame(uintptr_t fp) { + const char* call_wrapper = *(const char**)(fp + _entry_frame_call_wrapper_offset); + if (!goodPtr(call_wrapper) || (uintptr_t)call_wrapper - fp > MAX_CALL_WRAPPER_DISTANCE) { + return NULL; + } + return (JavaFrameAnchor*)(call_wrapper + _call_wrapper_anchor_offset); + } + + uintptr_t lastJavaSP() { + return *(uintptr_t*) at(_anchor_sp_offset); + } + + uintptr_t lastJavaFP() { + return *(uintptr_t*) at(_anchor_fp_offset); + } + + const void* lastJavaPC() { + return *(const void**) at(_anchor_pc_offset); + } + + void setLastJavaPC(const void* pc) { + *(const void**) at(_anchor_pc_offset) = pc; + } + + bool getFrame(const void*& pc, uintptr_t& sp, uintptr_t& fp) { + if (lastJavaPC() != NULL && lastJavaSP() != 0) { + pc = lastJavaPC(); + sp = lastJavaSP(); + fp = lastJavaFP(); + return true; + } + return false; + } +}; + +class VMThread : VMStructs { + public: + static VMThread* current(); + + static int key() { + return _tls_index; + } + + static VMThread* fromJavaThread(JNIEnv* env, jthread thread) { + return (VMThread*)(uintptr_t)env->GetLongField(thread, _eetop); + } + + static jlong javaThreadId(JNIEnv* env, jthread thread) { + return env->GetLongField(thread, _tid); + } + + static int nativeThreadId(JNIEnv* jni, jthread thread); + + int osThreadId(); + + JNIEnv* jni(); + + const void** vtable() { + return *(const void***)this; + } + + // This thread is considered a JavaThread if at least 2 of the selected 3 vtable entries + // match those of a known JavaThread (which is either application thread or AttachListener). + // Indexes were carefully chosen to work on OpenJDK 8 to 25, both product an debug builds. + bool isJavaThread() { + const void** vtbl = vtable(); + return (vtbl[1] == _java_thread_vtbl[1]) + + (vtbl[3] == _java_thread_vtbl[3]) + + (vtbl[5] == _java_thread_vtbl[5]) >= 2; + } + + int state() { + return _thread_state_offset >= 0 ? *(int*) at(_thread_state_offset) : 0; + } + + bool inJava() { + return state() == 8; + } + + bool inDeopt() { + return *(void**) at(_thread_vframe_offset) != NULL; + } + + void*& exception() { + return *(void**) at(_thread_exception_offset); + } + + JavaFrameAnchor* anchor() { + return (JavaFrameAnchor*) at(_thread_anchor_offset); + } + + VMMethod* compiledMethod() { + const char* env = *(const char**) at(_comp_env_offset); + if (env != NULL) { + const char* task = *(const char**) (env + _comp_task_offset); + if (task != NULL) { + return *(VMMethod**) (task + _comp_method_offset); + } + } + return NULL; + } +}; + +class VMMethod : VMStructs { + public: + jmethodID id(); + + // Performs extra validation when VMMethod comes from incomplete frame + jmethodID validatedId(); + + // Workaround for JDK-8313816 + static bool isStaleMethodId(jmethodID id) { + if (!_can_dereference_jmethod_id) return false; + VMMethod* vm_method = *(VMMethod**)id; + return vm_method == NULL || vm_method->id() == NULL; + } + + const char* bytecode() { + return *(const char**) at(_method_constmethod_offset) + _constmethod_size; + } + + NMethod* code() { + return *(NMethod**) at(_method_code_offset); + } +}; + +class NMethod : VMStructs { + public: + int size() { + return *(int*) at(_blob_size_offset); + } + + int frameSize() { + return *(int*) at(_frame_size_offset); + } + + short frameCompleteOffset() { + return *(short*) at(_frame_complete_offset); + } + + void setFrameCompleteOffset(int offset) { + if (_nmethod_immutable_offset > 0) { + // _frame_complete_offset is short on JDK 23+ + *(short*) at(_frame_complete_offset) = offset; + } else { + *(int*) at(_frame_complete_offset) = offset; + } + } + + const char* immutableDataAt(int offset) { + if (_nmethod_immutable_offset > 0) { + return *(const char**) at(_nmethod_immutable_offset) + offset; + } + return at(offset); + } + + const char* code() { + if (_code_offset > 0) { + return at(*(int*) at(_code_offset)); + } else { + return *(const char**) at(-_code_offset); + } + } + + const char* scopes() { + if (_scopes_data_offset > 0) { + return immutableDataAt(*(int*) at(_scopes_data_offset)); + } else { + return *(const char**) at(-_scopes_data_offset); + } + } + + const void* entry() { + if (_nmethod_entry_offset > 0) { + return at(*(int*) at(_code_offset) + *(unsigned short*) at(_nmethod_entry_offset)); + } else { + return *(void**) at(-_nmethod_entry_offset); + } + } + + bool contains(const void* pc) { + return pc >= this && pc < at(size()); + } + + bool isFrameCompleteAt(const void* pc) { + return pc >= code() + frameCompleteOffset(); + } + + bool isEntryFrame(const void* pc) { + return pc == _call_stub_return; + } + + const char* name() { + return *(const char**) at(_nmethod_name_offset); + } + + bool isNMethod() { + const char* n = name(); + return n != NULL && (strcmp(n, "nmethod") == 0 || strcmp(n, "native nmethod") == 0); + } + + bool isInterpreter() { + const char* n = name(); + return n != NULL && strcmp(n, "Interpreter") == 0; + } + + bool isStub() { + const char* n = name(); + return n != NULL && strncmp(n, "StubRoutines", 12) == 0; + } + + bool isVTableStub() { + const char* n = name(); + return n != NULL && strcmp(n, "vtable chunks") == 0; + } + + VMMethod* method() { + return *(VMMethod**) at(_nmethod_method_offset); + } + + char state() { + return *at(_nmethod_state_offset); + } + + bool isAlive() { + return state() >= 0 && state() <= 1; + } + + int level() { + return _nmethod_level_offset >= 0 ? *(signed char*) at(_nmethod_level_offset) : 0; + } + + VMMethod** metadata() { + if (_mutable_data_offset >= 0) { + // Since JDK 25 + return (VMMethod**) (*(char**) at(_mutable_data_offset) + *(int*) at(_relocation_size_offset)); + } else if (_data_offset > 0) { + // since JDK 23 + return (VMMethod**) at(*(int*) at(_data_offset) + *(unsigned short*) at(_nmethod_metadata_offset)); + } + return (VMMethod**) at(*(int*) at(_nmethod_metadata_offset)); + } + + int findScopeOffset(const void* pc); +}; + +class CodeHeap : VMStructs { + private: + static bool contains(char* heap, const void* pc) { + return heap != NULL && + pc >= *(const void**)(heap + _code_heap_memory_offset + _vs_low_offset) && + pc < *(const void**)(heap + _code_heap_memory_offset + _vs_high_offset); + } + + static NMethod* findNMethod(char* heap, const void* pc); + + public: + static bool available() { + return _code_heap_addr != NULL; + } + + static bool contains(const void* pc) { + return _code_heap_low <= pc && pc < _code_heap_high; + } + + static void updateBounds(const void* start, const void* end) { + for (const void* low = _code_heap_low; + start < low && !__sync_bool_compare_and_swap(&_code_heap_low, low, start); + low = _code_heap_low); + for (const void* high = _code_heap_high; + end > high && !__sync_bool_compare_and_swap(&_code_heap_high, high, end); + high = _code_heap_high); + } + + static NMethod* findNMethod(const void* pc) { + if (contains(_code_heap[0], pc)) return findNMethod(_code_heap[0], pc); + if (contains(_code_heap[1], pc)) return findNMethod(_code_heap[1], pc); + if (contains(_code_heap[2], pc)) return findNMethod(_code_heap[2], pc); + return NULL; + } +}; + +class CollectedHeap : VMStructs { + public: + static bool created() { + return _collected_heap_addr != NULL && *_collected_heap_addr != NULL; + } + + static CollectedHeap* heap() { + return (CollectedHeap*)_collected_heap; + } + + uintptr_t start() { + return *(uintptr_t*) at(_region_start_offset); + } + + uintptr_t size() { + return (*(uintptr_t*) at(_region_size_offset)) * sizeof(uintptr_t); + } +}; + +class JVMFlag : VMStructs { + private: + enum { + ORIGIN_DEFAULT = 0, + ORIGIN_MASK = 15, + SET_ON_CMDLINE = 1 << 17 + }; + + public: + static JVMFlag* find(const char* name); + + const char* name() { + return *(const char**) at(_flag_name_offset); + } + + char* addr() { + return *(char**) at(_flag_addr_offset); + } + + bool isDefault() { + return _flag_origin_offset < 0 || (*(int*) at(_flag_origin_offset) & ORIGIN_MASK) == ORIGIN_DEFAULT; + } + + void setCmdline() { + if (_flag_origin_offset >= 0) { + *(int*) at(_flag_origin_offset) |= SET_ON_CMDLINE; + } + } + + char get() { + return *addr(); + } + + void set(char value) { + *addr() = value; + } +}; + +class PcDesc { + public: + int _pc; + int _scope_offset; + int _obj_offset; + int _flags; +}; + +class ScopeDesc : VMStructs { + private: + const unsigned char* _scopes; + VMMethod** _metadata; + const unsigned char* _stream; + int _method_offset; + int _bci; + + int readInt(); + + public: + ScopeDesc(NMethod* nm) { + _scopes = (const unsigned char*)nm->scopes(); + _metadata = nm->metadata(); + } + + int decode(int offset) { + _stream = _scopes + offset; + int sender_offset = readInt(); + _method_offset = readInt(); + _bci = readInt() - 1; + return sender_offset; + } + + VMMethod* method() { + return _method_offset > 0 ? _metadata[_method_offset - 1] : NULL; + } + + int bci() { + return _bci; + } +}; + +class InterpreterFrame : VMStructs { + public: + enum { + sender_sp_offset = -1, + method_offset = -3 + }; + + static int bcp_offset() { + return _interpreter_frame_bcp_offset; + } +}; + +#endif // _VMSTRUCTS_H diff --git a/ddprof-lib/src/test/fuzz/fuzz_dwarf.cpp b/ddprof-lib/src/test/fuzz/fuzz_dwarf.cpp index ceae8b48..ad844cb9 100644 --- a/ddprof-lib/src/test/fuzz/fuzz_dwarf.cpp +++ b/ddprof-lib/src/test/fuzz/fuzz_dwarf.cpp @@ -21,7 +21,7 @@ #include #include -// Include the DWARF parser - dwarf.h comes from cpp-external (async-profiler upstream) +// Include the DWARF parser #include "dwarf.h" #include "dwarf_dd.h" diff --git a/ddprof-lib/src/test/make/Makefile b/ddprof-lib/src/test/make/Makefile index 09b62226..df716720 100644 --- a/ddprof-lib/src/test/make/Makefile +++ b/ddprof-lib/src/test/make/Makefile @@ -1,5 +1,5 @@ CC := g++ -SRCDIR := ../../main/cpp-external ../../main/cpp +SRCDIR := ../../main/cpp OBJDIR := ./../../../build/scanbuild_obj CFLAGS := -O0 -Wall -std=c++17 -fno-omit-frame-pointer -momit-leaf-frame-pointer -fvisibility=hidden SRCS := ${wildcard ${SRCDIR}/*.cpp } @@ -29,4 +29,4 @@ $(OBJDIR)/%.o : ${SRCDIR}/%.cpp ${CC} ${CFLAGS} -DEBUG -DPROFILER_VERSION=\"snapshot\" ${INCLUDES} -c $< -o $@ clean : - @rm -rf $(OBJDIR) \ No newline at end of file + @rm -rf $(OBJDIR) diff --git a/doc/event-type-system.md b/doc/event-type-system.md index c14e7189..f93a978f 100644 --- a/doc/event-type-system.md +++ b/doc/event-type-system.md @@ -57,7 +57,7 @@ enum ASGCT_CallFrameType { Uses `EventType` consistently throughout: -- **Function signature** (cpp-external/profiler.h:213): +- **Function signature** (cpp/profiler.h:213): ```cpp u64 recordSample(void* ucontext, u64 counter, EventType event_type, Event* event); ``` diff --git a/gradle/lock.properties b/gradle/lock.properties deleted file mode 100644 index a2d67147..00000000 --- a/gradle/lock.properties +++ /dev/null @@ -1,5 +0,0 @@ -ap.branch=dd/master -ap.commit=1addbbddf55f00e176c1755156bb0ae40266eab7 - -ctx_branch=main -ctx_commit=b33673d801b85a6c38fa0e9f1a139cb246737ce8 diff --git a/gradle/patching.gradle b/gradle/patching.gradle deleted file mode 100644 index b6aba7e3..00000000 --- a/gradle/patching.gradle +++ /dev/null @@ -1,288 +0,0 @@ -/* - * Copyright 2025 Datadog, Inc - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * Unified upstream patching configuration for DataDog Java Profiler - * - * This file defines all modifications applied to async-profiler upstream source files - * to ensure compatibility with DataDog's requirements (ASan, memory safety, API extensions) - * - * CONFIGURATION SYNTAX AND SEMANTICS - * ================================== - * - * Root Structure: - * --------------- - * ext.upstreamPatches = [ - * "filename1.cpp": [patches for file1], - * "filename2.h": [patches for file2] - * ] - * - * File Configuration Structure: - * ---------------------------- - * Each file entry contains: - * - * "filename.ext": [ - * validations: [ // Optional: Pre-patch validation rules - * [contains: "required_text"], // Ensures file contains specific text - * [contains: "another_check"] // Multiple validations run in sequence - * ], - * operations: [ // Required: List of patch operations - * [ - * type: "patch_type", // Required: Type of patch operation - * name: "Human readable name", // Optional: Description of what this patch does - * description: "Detailed...", // Optional: Extended description - * find: "regex_pattern", // Required: Regex pattern to find in file - * replace: "replacement_text", // Required: Text to replace matches with - * idempotent_check: "check_text" // Optional: Text that indicates patch already applied - * ] - * ] - * ] - * - * PATCH OPERATION TYPES - * ==================== - * - * 1. function_attribute: - * Purpose: Add attributes (like __attribute__) to function declarations - * Example: Add ASan no_sanitize attribute to prevent false positives - * find: "(bool\\s+StackFrame::unwindStub\\s*\\()" - * replace: "__attribute__((no_sanitize(\"address\"))) $1" - * - * 2. expression_replace: - * Purpose: Replace unsafe code patterns with safe equivalents - * Example: Replace direct pointer dereference with memcpy for ASan compatibility - * find: "\\*\\(unsigned int\\*\\)\\s*entry" - * replace: "([&] { unsigned int val; memcpy(&val, entry, sizeof(val)); return val; }())" - * - * 3. method_declaration: - * Purpose: Add new method declarations to class definitions - * Example: Add clearParsingCaches method to Symbols class - * find: "(static bool haveKernelSymbols\\(\\) \\{[^}]+\\})" - * replace: "$1\n static void clearParsingCaches();" - * - * 4. method_implementation: - * Purpose: Add complete method implementations to source files - * Example: Add clearParsingCaches implementation with cache clearing logic - * find: "(#endif \\/\\/ __linux__\\s*$)" - * replace: "void Symbols::clearParsingCaches() {\n _parsed_inodes.clear();\n}\n\n$1" - * - * REGEX PATTERNS AND REPLACEMENTS - * =============================== - * - * Pattern Syntax: - * - Use Java regex syntax (java.util.regex.Pattern) - * - Escape special characters: \\( \\) \\{ \\} \\[ \\] \\* \\+ \\? \\. \\| - * - Use \\s for whitespace, \\w for word characters, \\d for digits - * - Use capture groups: (pattern) to capture parts for reuse - * - Use non-capturing groups: (?:pattern) when grouping without capture - * - * Replacement Syntax: - * - Use $1, $2, etc. to reference capture groups from find pattern - * - Use \n for newlines in replacement text - * - Use \t for tabs (though spaces are preferred for consistency) - * - Escape dollar signs as \$ if literal $ needed - * - * IDEMPOTENT OPERATIONS - * ==================== - * - * Purpose: Prevent applying same patch multiple times - * - Set idempotent_check to text that would exist after patch is applied - * - System checks for this text before applying patch - * - If found, patch is skipped with "already applied" message - * - Critical for maintaining clean, predictable builds - * - * Example: - * find: "(bool\\s+StackFrame::unwindStub\\s*\\()" - * replace: "__attribute__((no_sanitize(\"address\"))) $1" - * idempotent_check: "__attribute__((no_sanitize(\"address\"))) bool StackFrame::unwindStub(" - * - * VALIDATION RULES - * =============== - * - * Purpose: Ensure upstream file structure hasn't changed in incompatible ways - * Types: - * - contains: "text" - File must contain this exact text - * - Validates that expected functions, classes, or patterns exist - * - Fails fast if upstream changes break patch assumptions - * - Helps maintain compatibility across upstream updates - * - * Best Practices: - * - Validate key function signatures that patches modify - * - Validate class names and critical code structures - * - Keep validations minimal but sufficient to catch breaking changes - * - * MAINTENANCE GUIDELINES - * ===================== - * - * Adding New Patches: - * 1. Add file entry if not exists: "newfile.cpp": [...] - * 2. Add validations to verify expected code structure - * 3. Add operation with appropriate type, find, replace - * 4. Always include idempotent_check to prevent double-application - * 5. Test thoroughly with clean upstream files - * - * Modifying Existing Patches: - * 1. Update find pattern if upstream code changed - * 2. Update replace text if modification requirements changed - * 3. Update idempotent_check to match new replacement - * 4. Update validations if structural assumptions changed - * - * Removing Patches: - * 1. Remove entire operation block - * 2. Remove validations that are no longer needed - * 3. Remove file entry if no operations remain - * 4. Clean up any orphaned files that depended on removed patches - */ - -ext.upstreamPatches = [ - // Stack frame unwinding patches for ASan compatibility and memory safety - "stackFrame_x64.cpp": [ - validations: [ - [contains: "StackFrame::"], - [contains: "StackFrame::unwindStub"], - [contains: "StackFrame::checkInterruptedSyscall"] - ], - operations: [ - [ - type: "function_attribute", - name: "Add ASan no_sanitize attribute to unwindStub", - description: "Adds __attribute__((no_sanitize(\"address\"))) to unwindStub function to prevent ASan false positives during stack unwinding", - find: "(bool\\s+StackFrame::unwindStub\\s*\\()", - replace: "__attribute__((no_sanitize(\"address\"))) \$1", - idempotent_check: "__attribute__((no_sanitize(\"address\"))) bool StackFrame::unwindStub(" - ], - [ - type: "expression_replace", - name: "Safe memory access for entry pointer check", - description: "Replaces unsafe pointer dereference with safe memcpy-based access to prevent ASan violations", - find: "entry\\s*!=\\s*NULL\\s*&&\\s*\\*\\(unsigned int\\*\\)\\s*entry\\s*==\\s*0xec8b4855", - replace: "entry != NULL && ([&] { unsigned int val; memcpy(&val, entry, sizeof(val)); return val; }()) == 0xec8b4855" - ], - [ - type: "function_attribute", - name: "Add ASan no_sanitize attribute to checkInterruptedSyscall", - description: "Adds __attribute__((no_sanitize(\"address\"))) to checkInterruptedSyscall function", - find: "(bool\\s+StackFrame::checkInterruptedSyscall\\s*\\()", - replace: "__attribute__((no_sanitize(\"address\"))) \$1", - idempotent_check: "__attribute__((no_sanitize(\"address\"))) bool StackFrame::checkInterruptedSyscall(" - ], - [ - type: "expression_replace", - name: "Safe memory access for pc offset read", - description: "Replaces unsafe pointer dereference at pc-6 with safe memcpy-based access", - find: "\\*\\(int\\*\\)\\s*\\(pc\\s*-\\s*6\\)", - replace: "([&] { int val; memcpy(&val, (const void*)(pc - 6), sizeof(val)); return val; }())" - ] - ] - ], - - // Stack walker patches for ASan compatibility - "stackWalker.cpp": [ - validations: [[contains: "StackWalker::"], [contains: "StackWalker::walkVM"]], - operations: [ - [ - type: "function_attribute", - name: "Add ASan no_sanitize attribute to walkVM", - description: "Adds __attribute__((no_sanitize(\"address\"))) to walkVM function to prevent ASan false positives during VM stack walking", - find: "(int\\s+StackWalker::walkVM\\s*\\()", - replace: "__attribute__((no_sanitize(\"address\"))) \$1", - idempotent_check: "__attribute__((no_sanitize(\"address\"))) int StackWalker::walkVM(" - ] - ] - ], - - // Symbol management patches for DataDog-specific API extensions - "symbols.h": [ - validations: [[contains: "class Symbols"], [contains: "static bool haveKernelSymbols"]], - operations: [ - [ - type: "method_declaration", - name: "Add clearParsingCaches method declaration", - description: "Adds clearParsingCaches static method declaration to Symbols class for test compatibility", - find: "(static bool haveKernelSymbols\\(\\) \\{[^}]+\\})", - replace: "\$1\n // Clear internal caches - mainly for test purposes\n static void clearParsingCaches();", - idempotent_check: "static void clearParsingCaches();" - ] - ] - ], - - // Symbol implementation patches for DataDog-specific API extensions - "symbols_linux.cpp": [ - validations: [[contains: "#ifdef __linux__"], [contains: "_parsed_inodes"], [contains: "loadSymbolTable"]], - operations: [ - [ - type: "method_implementation", - name: "Add clearParsingCaches method implementation", - description: "Adds clearParsingCaches static method implementation that clears internal parsing caches", - find: "(#endif \\/\\/ __linux__\\s*\$)", - replace: "// Implementation of clearParsingCaches for test compatibility\nvoid Symbols::clearParsingCaches() {\n _parsed_inodes.clear();\n}\n\n\$1", - idempotent_check: "void Symbols::clearParsingCaches()" - ], - [ - type: "expression_replace", - name: "Add overflow protection to symbol address calculation", - description: "Replace unsafe pointer arithmetic with overflow-protected version to prevent ASAN errors from corrupted ELF symbol values", - find: "const char\\* addr = base != NULL \\? base \\+ sym->st_value : \\(const char\\*\\)sym->st_value;", - replace: "const char* addr;\n if (base != NULL) {\n // Check for overflow when adding sym->st_value to base\n uintptr_t base_addr = (uintptr_t)base;\n uint64_t symbol_value = sym->st_value;\n \n // Skip this symbol if addition would overflow\n // First check if symbol_value exceeds the address space\n if (symbol_value > UINTPTR_MAX) {\n continue;\n }\n // Then check if addition would overflow\n if (base_addr > UINTPTR_MAX - (uintptr_t)symbol_value) {\n continue;\n }\n \n // Perform addition using integer arithmetic to avoid pointer overflow\n addr = (const char*)(base_addr + (uintptr_t)symbol_value);\n } else {\n addr = (const char*)sym->st_value;\n }", - idempotent_check: "if (symbol_value > UINTPTR_MAX)" - ] - ] - ], - - // VM structures patches for safe memory access - "vmStructs.cpp": [ - validations: [ - [contains: "VMMethod::id()"], - [contains: "const_method + _constmethod_constants_offset"] - ], - operations: [ - [ - type: "expression_replace", - name: "Fix unsafe memory access in VMMethod::id", - description: "Replace direct pointer dereference with SafeAccess::load to prevent ASan errors during crash-protected memory access", - find: "const char\\* cpool = \\*\\(const char\\*\\*\\) \\(const_method \\+ _constmethod_constants_offset\\);\\s*unsigned short num = \\*\\(unsigned short\\*\\) \\(const_method \\+ _constmethod_idnum_offset\\);", - replace: "const char* cpool = (const char*) SafeAccess::load((void**)(const_method + _constmethod_constants_offset));\n unsigned short num = (unsigned short) SafeAccess::load32((int32_t*)(const_method + _constmethod_idnum_offset), 0);", - idempotent_check: "SafeAccess::load((void**)(const_method + _constmethod_constants_offset))" - ] - ] - ], - - // Stack frame header patches for DataDog-specific API extensions - "stackFrame.h": [ - validations: [ - [contains: "class StackFrame"], - [contains: "unwindStub"], - [contains: "adjustSP"] - ], - operations: [ - [ - type: "expression_replace", - name: "Make StackFrame constructor explicit", - description: "Add explicit keyword to prevent implicit conversions", - find: "StackFrame\\(void\\* ucontext\\)", - replace: "explicit StackFrame(void* ucontext)", - idempotent_check: "explicit StackFrame(void* ucontext)" - ], - [ - type: "method_declaration", - name: "Add DataDog SP baseline helper methods", - description: "Add sender_sp_baseline, read_caller_pc_from_sp, and read_saved_fp_from_sp methods for DataDog unwinding logic", - find: "(void adjustSP\\(const void\\* entry, const void\\* pc, uintptr_t& sp\\);)", - replace: "\$1\n\n // SP baseline helpers for compiled frame unwinding\n uintptr_t sender_sp_baseline(const NMethod* nm, uintptr_t sp, uintptr_t fp, const void* pc);\n const void* read_caller_pc_from_sp(uintptr_t sp_base);\n uintptr_t read_saved_fp_from_sp(uintptr_t sp_base);", - idempotent_check: "uintptr_t sender_sp_baseline(" - ] - ] - ] -] \ No newline at end of file From 15d16f09218103c75d33506cf61eb9dd7df7e0a9 Mon Sep 17 00:00:00 2001 From: Roman Kennke Date: Tue, 20 Jan 2026 15:18:10 +0100 Subject: [PATCH 2/2] Remove comment --- ddprof-lib/benchmarks/build.gradle | 1 - 1 file changed, 1 deletion(-) diff --git a/ddprof-lib/benchmarks/build.gradle b/ddprof-lib/benchmarks/build.gradle index a752d1a1..c6bd1db5 100644 --- a/ddprof-lib/benchmarks/build.gradle +++ b/ddprof-lib/benchmarks/build.gradle @@ -17,7 +17,6 @@ application { // Include the main library headers tasks.withType(CppCompile).configureEach { - // TODO: Do we need this, or is this included by default? includes file('../src/main/cpp').toString() }