From 6a3c7fe38188fee66bf342bfc3c1fc50547b57f4 Mon Sep 17 00:00:00 2001
From: Roman Kennke <roman.kennke@datadoghq.com>
Date: Mon, 19 Jan 2026 16:46:18 +0100
Subject: [PATCH 1/2] [PROF-13123] Streamline upstream sources

---
 .github/workflows/ci.yml                      |    9 -
 .github/workflows/test_workflow.yml           |   36 -
 .gitignore                                    |    1 -
 CLAUDE.md                                     |   20 +-
 README.md                                     |   61 -
 ddprof-lib/benchmarks/build.gradle            |    4 +-
 ddprof-lib/build.gradle                       |  241 ----
 ddprof-lib/fuzz/build.gradle                  |   10 -
 ddprof-lib/gtest/build.gradle                 |   10 -
 ddprof-lib/src/main/cpp/arch.h                |  211 ++++
 ddprof-lib/src/main/cpp/asprof.h              |  106 ++
 ddprof-lib/src/main/cpp/cpuEngine.h           |   52 +
 ddprof-lib/src/main/cpp/dwarf.h               |  183 +++
 ddprof-lib/src/main/cpp/incbin.h              |   36 +
 ddprof-lib/src/main/cpp/j9StackTraces.h       |   49 +
 ddprof-lib/src/main/cpp/mutex.cpp             |   35 +
 ddprof-lib/src/main/cpp/mutex.h               |   49 +
 ddprof-lib/src/main/cpp/os.h                  |  150 +++
 ddprof-lib/src/main/cpp/os_linux.cpp          |  693 +++++++++++
 ddprof-lib/src/main/cpp/os_macos.cpp          |  458 +++++++
 ddprof-lib/src/main/cpp/stackFrame.h          |   92 ++
 .../src/main/cpp/stackFrame_aarch64.cpp       |  405 +++++++
 ddprof-lib/src/main/cpp/stackFrame_arm.cpp    |  141 +++
 ddprof-lib/src/main/cpp/stackFrame_i386.cpp   |  162 +++
 .../src/main/cpp/stackFrame_loongarch64.cpp   |  116 ++
 ddprof-lib/src/main/cpp/stackFrame_ppc64.cpp  |  162 +++
 .../src/main/cpp/stackFrame_riscv64.cpp       |  118 ++
 ddprof-lib/src/main/cpp/stackFrame_x64.cpp    |  322 +++++
 ddprof-lib/src/main/cpp/stackWalker.cpp       |  590 +++++++++
 ddprof-lib/src/main/cpp/stackWalker.h         |   61 +
 ddprof-lib/src/main/cpp/symbols.h             |   50 +-
 ddprof-lib/src/main/cpp/symbols_linux.cpp     | 1066 +++++++++++++++++
 ddprof-lib/src/main/cpp/symbols_linux.h       |    2 +-
 ddprof-lib/src/main/cpp/symbols_macos.cpp     |  231 ++++
 ddprof-lib/src/main/cpp/trap.cpp              |   64 +
 ddprof-lib/src/main/cpp/trap.h                |   56 +
 ddprof-lib/src/main/cpp/tsc.cpp               |   54 +
 ddprof-lib/src/main/cpp/tsc.h                 |  105 ++
 ddprof-lib/src/main/cpp/vmStructs.cpp         |  762 ++++++++++++
 ddprof-lib/src/main/cpp/vmStructs.h           |  705 +++++++++++
 ddprof-lib/src/test/fuzz/fuzz_dwarf.cpp       |    2 +-
 ddprof-lib/src/test/make/Makefile             |    4 +-
 doc/event-type-system.md                      |    2 +-
 gradle/lock.properties                        |    5 -
 gradle/patching.gradle                        |  288 -----
 45 files changed, 7263 insertions(+), 716 deletions(-)
 create mode 100644 ddprof-lib/src/main/cpp/arch.h
 create mode 100644 ddprof-lib/src/main/cpp/asprof.h
 create mode 100644 ddprof-lib/src/main/cpp/cpuEngine.h
 create mode 100644 ddprof-lib/src/main/cpp/dwarf.h
 create mode 100644 ddprof-lib/src/main/cpp/incbin.h
 create mode 100644 ddprof-lib/src/main/cpp/j9StackTraces.h
 create mode 100644 ddprof-lib/src/main/cpp/mutex.cpp
 create mode 100644 ddprof-lib/src/main/cpp/mutex.h
 create mode 100644 ddprof-lib/src/main/cpp/os.h
 create mode 100644 ddprof-lib/src/main/cpp/os_linux.cpp
 create mode 100644 ddprof-lib/src/main/cpp/os_macos.cpp
 create mode 100644 ddprof-lib/src/main/cpp/stackFrame.h
 create mode 100644 ddprof-lib/src/main/cpp/stackFrame_aarch64.cpp
 create mode 100644 ddprof-lib/src/main/cpp/stackFrame_arm.cpp
 create mode 100644 ddprof-lib/src/main/cpp/stackFrame_i386.cpp
 create mode 100644 ddprof-lib/src/main/cpp/stackFrame_loongarch64.cpp
 create mode 100644 ddprof-lib/src/main/cpp/stackFrame_ppc64.cpp
 create mode 100644 ddprof-lib/src/main/cpp/stackFrame_riscv64.cpp
 create mode 100644 ddprof-lib/src/main/cpp/stackFrame_x64.cpp
 create mode 100644 ddprof-lib/src/main/cpp/stackWalker.cpp
 create mode 100644 ddprof-lib/src/main/cpp/stackWalker.h
 create mode 100644 ddprof-lib/src/main/cpp/symbols_linux.cpp
 create mode 100644 ddprof-lib/src/main/cpp/symbols_macos.cpp
 create mode 100644 ddprof-lib/src/main/cpp/trap.cpp
 create mode 100644 ddprof-lib/src/main/cpp/trap.h
 create mode 100644 ddprof-lib/src/main/cpp/tsc.cpp
 create mode 100644 ddprof-lib/src/main/cpp/tsc.h
 create mode 100644 ddprof-lib/src/main/cpp/vmStructs.cpp
 create mode 100644 ddprof-lib/src/main/cpp/vmStructs.h
 delete mode 100644 gradle/lock.properties
 delete mode 100644 gradle/patching.gradle

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 5390f3d5..050070a4 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -110,15 +110,6 @@ jobs:
           restore-keys: |
             gradle-caches-${{ runner.os }}-
 
-      - name: Cache async-profiler
-        uses: actions/cache@v4
-        with:
-          path: ddprof-lib/build/async-profiler
-          key: async-profiler-${{ runner.os }}-${{ hashFiles('gradle/lock.properties') }}
-          enableCrossOsArchive: true
-          restore-keys: |
-            async-profiler-${{ runner.os }}-
-
       - name: Validate Javadoc
         run: |
           # Note: javadoc task depends on copyReleaseLibs which requires building native libraries
diff --git a/.github/workflows/test_workflow.yml b/.github/workflows/test_workflow.yml
index 62fc7575..3deb10c4 100644
--- a/.github/workflows/test_workflow.yml
+++ b/.github/workflows/test_workflow.yml
@@ -52,15 +52,6 @@ jobs:
           key: gradle-caches-${{ runner.os }}-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }}
           restore-keys: |
             gradle-caches-${{ runner.os }}-
-      - name: Cache async-profiler
-        if: steps.set_enabled.outputs.enabled == 'true'
-        uses: actions/cache@v4
-        with:
-          path: ddprof-lib/build/async-profiler
-          key: async-profiler-${{ runner.os }}-${{ hashFiles('gradle/lock.properties') }}
-          enableCrossOsArchive: true
-          restore-keys: |
-            async-profiler-${{ runner.os }}-
       - name: Setup cached JDK
         id: cache-jdk
         if: steps.set_enabled.outputs.enabled == 'true'
@@ -173,15 +164,6 @@ jobs:
           key: gradle-caches-${{ runner.os }}-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }}
           restore-keys: |
             gradle-caches-${{ runner.os }}-
-      - name: Cache async-profiler
-        if: steps.set_enabled.outputs.enabled == 'true'
-        uses: actions/cache@v4
-        with:
-          path: ddprof-lib/build/async-profiler
-          key: async-profiler-${{ runner.os }}-${{ hashFiles('gradle/lock.properties') }}
-          enableCrossOsArchive: true
-          restore-keys: |
-            async-profiler-${{ runner.os }}-
       - name: Setup cached JDK
         id: cache-jdk
         uses: ./.github/actions/setup_cached_java
@@ -311,15 +293,6 @@ jobs:
         with:
           version: ${{ matrix.java_version }}
           arch: 'aarch64'
-      - name: Cache async-profiler
-        if: steps.set_enabled.outputs.enabled == 'true'
-        uses: actions/cache@v4
-        with:
-          path: ddprof-lib/build/async-profiler
-          key: async-profiler-${{ runner.os }}-${{ hashFiles('gradle/lock.properties') }}
-          enableCrossOsArchive: true
-          restore-keys: |
-            async-profiler-${{ runner.os }}-
       - name: Setup OS
         if: steps.set_enabled.outputs.enabled == 'true'
         run: |
@@ -427,15 +400,6 @@ jobs:
          with:
            version: ${{ matrix.java_version }}
            arch: 'aarch64-musl'
-       - name: Cache async-profiler
-         if: steps.set_enabled.outputs.enabled == 'true'
-         uses: actions/cache@v4
-         with:
-           path: ddprof-lib/build/async-profiler
-           key: async-profiler-${{ runner.os }}-${{ hashFiles('gradle/lock.properties') }}
-           enableCrossOsArchive: true
-           restore-keys: |
-             async-profiler-${{ runner.os }}-
        - name: Extract Versions
          uses: ./.github/actions/extract_versions
        - name: Test
diff --git a/.gitignore b/.gitignore
index 37256dda..98ebc2de 100644
--- a/.gitignore
+++ b/.gitignore
@@ -17,7 +17,6 @@
 .tmp
 *.iml
 /ddprof-stresstest/jmh-result.*
-/ddprof-lib/src/main/cpp-external/**/*
 
 **/.resources/
 
diff --git a/CLAUDE.md b/CLAUDE.md
index e1ec9ca1..c0a9b6dd 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -136,17 +136,9 @@ The project supports multiple build configurations per platform:
 - **asan**: AddressSanitizer build for memory error detection
 - **tsan**: ThreadSanitizer build for thread safety validation
 
-### Upstream Integration
-The project maintains integration with async-profiler upstream:
-- `cloneAsyncProfiler`: Clones DataDog's async-profiler fork
-- `copyUpstreamFiles`: Copies selected upstream files to `ddprof-lib/src/main/cpp-external`
-- `patchStackFrame`/`patchStackWalker`: Applies necessary patches for ASAN compatibility
-- Lock file: `gradle/ap-lock.properties` specifies branch/commit
-
 ### Key Source Locations
 - Java API: `ddprof-lib/src/main/java/com/datadoghq/profiler/JavaProfiler.java`
 - C++ engine: `ddprof-lib/src/main/cpp/`
-- Upstream C++ code: `ddprof-lib/src/main/cpp-external/` (generated)
 - Native libraries: `ddprof-lib/build/lib/main/{config}/{os}/{arch}/`
 - Test resources: `ddprof-test/src/test/java/`
 
@@ -221,9 +213,7 @@ The profiler uses a sophisticated double-buffered storage system for call traces
 - **Buffer Management**: Thread-local recording buffers with configurable flush thresholds
 
 ### Native Integration Patterns
-- **Upstream Sync**: Uses DataDog fork of async-profiler with branch `dd/master`
 - **Adapter Pattern**: `*_dd.h` files adapt upstream code for Datadog needs
-- **External Code**: Upstream files copied to `cpp-external/` with minimal patches
 - **Signal Handler Safety**: Careful memory management in signal handler contexts
 
 ### Multi-Engine Profiling System
@@ -256,7 +246,7 @@ The profiler uses a sophisticated double-buffered storage system for call traces
 ### Code Organization Principles
 - **Namespace Separation**: Use `ddprof` namespace for adapted upstream classes
 - **File Naming**: Datadog adaptations use `*_dd` suffix (e.g., `stackWalker_dd.h`)
-- **External Dependencies**: Upstream code in `cpp-external/`, local code in `cpp/`
+- **External Dependencies**: Local code in `cpp/`
 
 ### Performance Constraints
 - **Algorithmic Complexity**: Use O(N) or better, max 256 elements for linear scans
@@ -275,14 +265,6 @@ The profiler uses a sophisticated double-buffered storage system for call traces
 - **Static Analysis**: `scanBuild` for additional code quality checks
 - **Test Logging**: Use `TEST_LOG` macro for debug output in tests
 
-### Upstream Integration Workflow
-The project maintains a carefully managed relationship with async-profiler upstream:
-1. **Lock File**: `gradle/ap-lock.properties` specifies exact upstream commit
-2. **Branch Tracking**: `dd/master` branch contains safe upstream changes
-3. **File Copying**: `copyUpstreamFiles` task selectively imports upstream code
-4. **Minimal Patching**: Only essential patches for ASan compatibility
-5. **Cherry-pick Strategy**: Rare cherry-picks only for critical fixes
-
 ## Build System Architecture
 
 ### Gradle Multi-project Structure
diff --git a/README.md b/README.md
index 7455d5a3..2f141132 100644
--- a/README.md
+++ b/README.md
@@ -37,16 +37,6 @@ cd java-profiler
 The resulting artifact will be in `ddprof-lib/build/libs/ddprof-<version>.jar`
 
 #### Gritty details
-To smoothen the absorption of the upstream changes, we are using parts of the upstream codebase in (mostly) vanilla form.
-
-For this, we have several gradle tasks in [ddprof-lib/build.gradle](ddprof-lib/build.gradle):
-- `cloneAsyncProfiler` - clones the [DataDog/async-profiler](https://github.com/DataDog/async-profiler) repository into `ddprof-lib/build/async-profiler` using the commit lock specified in [gradle/lock.properties](gradle/lock.properties)
-  - in that repository, we are maintaining a branch called `dd/master` where we keep the upstream code in sync with the 'safe' changes from the upstream `master` branch
-  - cherry-picks into that branch should be rare and only done for critical fixes that are needed in the project
-  - otherwise, we should wait for the next upstream release to avoid conflicts
-- `copyUpstreamFiles` - copies the selected upstream source files into the `ddprof-lib/src/main/cpp-external` directory
-- `patchUpstreamFiles` - applies unified patches to upstream files for ASan compatibility, memory safety, and API extensions
-
 Since the upstream code might not be 100% compatible with the current version of the project, we need to provide adapters.
 The adapters are sharing the same file name as the upstream files but are suffixed with `_dd` (e.g. `arch_dd.h`).
 
@@ -55,57 +45,6 @@ conflicts with the upstream code. This allows us to use the upstream code as-is
 
 See [ddprof-lib/src/main/cpp/stackWalker_dd.h](ddprof-lib/src/main/cpp/stackWalker_dd.h) for an example of how we adapt the upstream code to fit our needs.
 
-### Unified Patching System
-
-The project uses a unified configuration-driven patching system to apply modifications to upstream source files:
-
-- **Configuration File**: All patches are defined in `gradle/patching.gradle` using structured Gradle DSL
-- **Direct Source Modification**: Patches are applied directly to upstream source files using regex-based find/replace
-- **Idempotent Operations**: Each patch includes checks to prevent double-application
-- **Validation System**: Pre-patch validation ensures upstream structure hasn't changed incompatibly
-- **Single Unified Task**: One `patchUpstreamFiles` task replaces multiple fragmented patch tasks
-
-## Patch Configuration Structure
-
-Patches are defined in `gradle/patching.gradle` with this structure:
-
-```groovy
-ext.upstreamPatches = [
-  "filename.cpp": [
-    validations: [
-      [contains: "expected_function"],
-      [contains: "expected_class"]
-    ],
-    operations: [
-      [
-        type: "function_attribute",
-        name: "Add ASan compatibility attribute",
-        find: "(bool\\s+StackFrame::unwindStub\\s*\\()",
-        replace: "__attribute__((no_sanitize(\"address\"))) \$1",
-        idempotent_check: "__attribute__((no_sanitize(\"address\"))) bool StackFrame::unwindStub("
-      ]
-    ]
-  ]
-]
-```
-
-### Patch Operation Types
-
-1. **function_attribute**: Add attributes (like `__attribute__`) to function declarations
-2. **expression_replace**: Replace unsafe code patterns with safe equivalents
-3. **method_declaration**: Add new method declarations to class definitions
-4. **method_implementation**: Add complete method implementations to source files
-
-### Adding New Patches
-
-1. **Edit Configuration**: Add patch definition to `gradle/patching.gradle`
-2. **Add Validations**: Ensure expected code structure exists
-3. **Define Operations**: Specify find/replace patterns with appropriate type
-4. **Include Idempotency**: Add `idempotent_check` to prevent double-application
-5. **Test Thoroughly**: Verify patch works with clean upstream files
-
-For detailed syntax documentation, see the comprehensive comments in `gradle/patching.gradle`.
-
 ## Claude Code Integration
 
 This project includes Claude Code commands for streamlined development workflows when using [Claude Code](https://claude.ai/code):
diff --git a/ddprof-lib/benchmarks/build.gradle b/ddprof-lib/benchmarks/build.gradle
index 5b57b3bc..a752d1a1 100644
--- a/ddprof-lib/benchmarks/build.gradle
+++ b/ddprof-lib/benchmarks/build.gradle
@@ -17,10 +17,8 @@ application {
 
 // Include the main library headers
 tasks.withType(CppCompile).configureEach {
-  dependsOn ':ddprof-lib:patchUpstreamFiles'
-
+  // TODO: Do we need this, or is this included by default?
   includes file('../src/main/cpp').toString()
-  includes file('../src/main/cpp-external').toString()
 }
 
 // Add a task to run the benchmark
diff --git a/ddprof-lib/build.gradle b/ddprof-lib/build.gradle
index 10e3866d..b826a56e 100644
--- a/ddprof-lib/build.gradle
+++ b/ddprof-lib/build.gradle
@@ -180,14 +180,6 @@ description = "Datadog Java Profiler Library"
 
 def component_version = project.hasProperty("ddprof_version") ? project.ddprof_version : project.version
 
-def props = new Properties()
-file("${rootDir}/gradle/lock.properties").withInputStream { stream ->
-  props.load(stream)
-}
-
-def ap_branch_lock = props.getProperty("ap.branch")
-def ap_commit_lock = props.getProperty("ap.commit")
-
 // this feels weird but it is the only way invoking `./gradlew :ddprof-lib:*` tasks will work
 if (rootDir.toString().endsWith("ddprof-lib")) {
   apply from: rootProject.file('../common.gradle')
@@ -289,224 +281,6 @@ tasks.register('copyExternalLibs', Copy) {
   }
 }
 
-def cloneAPTask = tasks.register('cloneAsyncProfiler') {
-  description = 'Clones async-profiler repo if directory is missing or updates it if commit hash differs'
-  inputs.file("${rootDir}/gradle/lock.properties")
-  outputs.dir("${projectDir}/build/async-profiler")
-  outputs.upToDateWhen {
-    def targetDir = file("${projectDir}/build/async-profiler")
-    if (!targetDir.exists()) {
-      return false
-    }
-    def currentCommit = ""
-    try {
-      new ByteArrayOutputStream().withStream { os ->
-        exec {
-          workingDir targetDir.absolutePath
-          commandLine 'git', 'rev-parse', 'HEAD'
-          standardOutput = os
-        }
-        currentCommit = os.toString().trim()
-      }
-      return currentCommit == ap_commit_lock
-    } catch (Exception e) {
-      return false
-    }
-  }
-  doLast {
-    // Fix for CI environments where git detects dubious ownership
-    exec {
-      commandLine 'git', 'config', '--global', '--add', 'safe.directory', projectDir.parentFile.absolutePath
-      ignoreExitValue = true  // Don't fail if this command fails
-    }
-
-    def targetDir = file("${projectDir}/build/async-profiler")
-    if (!targetDir.exists()) {
-      println "Cloning missing async-profiler git subdirectory..."
-      exec {
-        commandLine 'git', 'clone', '--branch', ap_branch_lock, 'https://github.com/datadog/async-profiler.git', targetDir.absolutePath
-      }
-      exec {
-        workingDir targetDir.absolutePath
-        commandLine 'git', 'checkout', ap_commit_lock
-      }
-    } else {
-      // Also fix git ownership for existing directory
-      exec {
-        workingDir targetDir.absolutePath
-        commandLine 'git', 'config', '--global', '--add', 'safe.directory', targetDir.absolutePath
-        ignoreExitValue = true
-      }
-
-      def currentCommit = ""
-      new ByteArrayOutputStream().withStream { os ->
-        exec {
-          workingDir targetDir.absolutePath
-          commandLine 'git', 'rev-parse', 'HEAD'
-          standardOutput = os
-        }
-        currentCommit = os.toString().trim()
-      }
-
-      if (currentCommit != ap_commit_lock) {
-        println "async-profiler commit hash differs (current: ${currentCommit}, expected: ${ap_commit_lock}), updating..."
-        exec {
-          workingDir targetDir.absolutePath
-          commandLine 'rm', '-rf', targetDir.absolutePath
-        }
-        exec {
-          commandLine 'git', 'clone', '--branch', ap_branch_lock, 'https://github.com/datadog/async-profiler.git', targetDir.absolutePath
-        }
-        exec {
-          workingDir targetDir.absolutePath
-          commandLine 'git', 'checkout', ap_commit_lock
-        }
-      } else {
-        println "async-profiler git subdirectory present with correct commit hash."
-      }
-    }
-  }
-}
-
-def copyUpstreamFiles = tasks.register('copyUpstreamFiles', Copy) {
-  configure {
-    dependsOn cloneAPTask
-  }
-  onlyIf {
-    !project.hasProperty("debug-ap")
-  }
-  description = 'Copy shared upstream files'
-  from("${projectDir}/build/async-profiler/src") {
-    include "arch.h"
-    include "asprof.h"
-    include "cpuEngine.h"
-    include "dwarf.h"
-    include "incbin.h"
-    include "j9StackTraces.h"
-    include "log.h"
-    include "mutex.h"
-    include "mutex.cpp"
-    include "os.h"
-    include "os_*.cpp"
-    include "spinLock.h"
-    include "stackFrame.h"
-    include "stackWalker.h"
-    include "stackWalker.cpp"
-    include "stackFrame*.cpp"
-    include "symbols.h"
-    include "symbols_*.cpp"
-    include "trap.h"
-    include "trap.cpp"
-    include "tsc.h"
-    include "tsc.cpp"
-    include "vmStructs.h"
-    include "vmStructs.cpp"
-  }
-  into "${projectDir}/src/main/cpp-external"
-}
-
-tasks.named("spotlessMisc") {
-  configure {
-    dependsOn patchUpstreamFiles
-  }
-}
-
-// Load patch configuration from external file
-apply from: "${rootDir}/gradle/patching.gradle"
-
-def patchUpstreamFiles = tasks.register("patchUpstreamFiles") {
-  description = 'Apply all upstream patches via unified configuration system'
-  configure {
-    dependsOn copyUpstreamFiles
-  }
-
-  inputs.file("${rootDir}/gradle/patching.gradle")
-  inputs.files(fileTree("${projectDir}/src/main/cpp-external").include("*.cpp", "*.h"))
-  outputs.files(fileTree("${projectDir}/src/main/cpp-external").include("*.cpp", "*.h"))
-
-  doLast {
-    try {
-      // Use configuration from gradle/patching.gradle
-      def patches = upstreamPatches
-
-      // Apply patches using simplified inline logic
-      def totalFiles = patches.size()
-      def totalOperations = 0
-      patches.each { fileName, fileConfig ->
-        totalOperations += fileConfig.operations?.size() ?: 0
-      }
-
-      logger.quiet("Unified patching system: processing ${totalFiles} files with ${totalOperations} total operations")
-
-      // Apply patches to all configured files
-      patches.each { fileName, fileConfig ->
-        def filePath = "${projectDir}/src/main/cpp-external/${fileName}"
-        def targetFile = file(filePath)
-
-        if (targetFile.exists()) {
-          def content = targetFile.getText('UTF-8')
-          def originalContent = content
-          def patchCount = 0
-
-          // Run validations first
-          fileConfig.validations?.each { validation ->
-            if (validation.contains && !content.contains(validation.contains)) {
-              throw new RuntimeException("Validation failed for ${fileName}: required text '${validation.contains}' not found. Upstream structure may have changed.")
-            }
-          }
-
-          // Apply operations in order
-          fileConfig.operations?.each { operation ->
-            // Check if already applied (idempotent check)
-            if (operation.idempotent_check && content.contains(operation.idempotent_check)) {
-              logger.quiet("Skipped patch '${operation.name ?: operation.type}' for ${fileName} (already applied)")
-              return
-            }
-
-            // Apply regex pattern
-            def pattern = java.util.regex.Pattern.compile(operation.find)
-            def matcher = pattern.matcher(content)
-
-            if (matcher.find()) {
-              def newContent = matcher.replaceAll(operation.replace)
-
-              if (newContent != content) {
-                content = newContent
-                patchCount++
-                logger.quiet("Applied patch '${operation.name ?: operation.type}' to ${fileName}")
-              }
-            } else {
-              logger.warn("Pattern '${operation.find}' not found in ${fileName} for operation: ${operation.name ?: operation.type}")
-            }
-          }
-
-          // Write back if any modifications were made
-          if (patchCount > 0) {
-            targetFile.write(content, 'UTF-8')
-            logger.quiet("Patched ${fileName} with ${patchCount} operations")
-          } else {
-            logger.quiet("No patches applied to ${fileName} (all already present)")
-          }
-        } else {
-          logger.warn("Patch target file not found: ${fileName}")
-        }
-      }
-
-      logger.quiet("Unified patching completed successfully")
-
-    } catch (Exception e) {
-      throw new GradleException("Unified patching failed: ${e.message}", e)
-    }
-  }
-}
-
-
-def initSubrepoTask = tasks.register('initSubrepo') {
-  configure {
-    dependsOn patchUpstreamFiles
-  }
-}
-
 tasks.register('assembleAll') {}
 
 // use the build config names to create configurations, copy lib and asemble jar tasks
@@ -568,10 +342,6 @@ configurations {
 // added by the cpp-library plugin
 tasks.whenTaskAdded { task ->
   if (task instanceof CppCompile) {
-    configure {
-      dependsOn patchUpstreamFiles
-    }
-
     if (!task.name.startsWith('compileLib') && task.name.contains('Release')) {
       buildConfigurations.each { config ->
         if (config.os == osIdentifier() && config.arch == archIdentifier()) {
@@ -589,7 +359,6 @@ tasks.whenTaskAdded { task ->
             toolChain = task.toolChain
             targetPlatform = task.targetPlatform
             includes task.includes
-            includes project(':ddprof-lib').file('src/main/cpp-external').toString()
             includes project(':ddprof-lib').file('src/main/cpp').toString()
             includes "${javaHome()}/include"
             includes project(':malloc-shim').file('src/main/public').toString()
@@ -673,9 +442,7 @@ tasks.withType(LinkSharedLibrary).configureEach {
 library {
   baseName = "javaProfiler"
   source.from file('src/main/cpp')
-  source.from file('src/main/cpp-external')
   privateHeaders.from file('src/main/cpp')
-  privateHeaders.from file('src/main/cpp-external')
 
   // aarch64 support is still incubating
   // for the time being an aarch64 linux machine will match 'machines.linux.x86_64'
@@ -758,10 +525,6 @@ gradle.projectsEvaluated {
     if (javadocTask != null && copyReleaseLibs != null) {
       javadocTask.dependsOn copyReleaseLibs
     }
-    def initTask = tasks.findByName("initSubrepo")
-    if (initTask != null) {
-      compileTask.dependsOn initTask
-    }
   }
 }
 
@@ -858,7 +621,3 @@ tasks.withType(AbstractPublishToMaven).configureEach {
     mustRunAfter tasks.matching { it instanceof VerificationTask }
   }
 }
-
-clean {
-  delete "${projectDir}/src/main/cpp-external"
-}
diff --git a/ddprof-lib/fuzz/build.gradle b/ddprof-lib/fuzz/build.gradle
index 27c3607b..ed706cb0 100644
--- a/ddprof-lib/fuzz/build.gradle
+++ b/ddprof-lib/fuzz/build.gradle
@@ -168,7 +168,6 @@ tasks.whenTaskAdded { task ->
                   toolChain = task.toolChain
                   targetPlatform = task.targetPlatform
                   includes task.includes
-                  includes project(':ddprof-lib').file('src/main/cpp-external').toString()
                   includes project(':ddprof-lib').file('src/main/cpp').toString()
                   includes "${javaHome()}/include"
                   includes project(':malloc-shim').file('src/main/public').toString()
@@ -182,9 +181,6 @@ tasks.whenTaskAdded { task ->
                   source project(':ddprof-lib').fileTree('src/main/cpp') {
                     include '**/*'
                   }
-                  source project(':ddprof-lib').fileTree('src/main/cpp-external') {
-                    include '**/*'
-                  }
                   // Compile the fuzz target itself
                   source fuzzFile
 
@@ -195,12 +191,6 @@ tasks.whenTaskAdded { task ->
                 if (linkTask != null) {
                   linkTask.dependsOn fuzzCompileTask
                 }
-                def subrepoInitTask = project(':ddprof-lib').tasks.named("initSubrepo")
-                if (subrepoInitTask != null) {
-                  fuzzCompileTask.configure {
-                    dependsOn subrepoInitTask
-                  }
-                }
               }
             }
           }
diff --git a/ddprof-lib/gtest/build.gradle b/ddprof-lib/gtest/build.gradle
index d474eec0..d7ee2027 100644
--- a/ddprof-lib/gtest/build.gradle
+++ b/ddprof-lib/gtest/build.gradle
@@ -109,7 +109,6 @@ tasks.whenTaskAdded { task ->
               toolChain = task.toolChain
               targetPlatform = task.targetPlatform
               includes task.includes
-              includes project(':ddprof-lib').file('src/main/cpp-external').toString()
               includes project(':ddprof-lib').file('src/main/cpp').toString()
               includes "${javaHome()}/include"
               includes project(':malloc-shim').file('src/main/public').toString()
@@ -123,9 +122,6 @@ tasks.whenTaskAdded { task ->
               source project(':ddprof-lib').fileTree('src/main/cpp') {
                 include '**/*'
               }
-              source project(':ddprof-lib').fileTree('src/main/cpp-external') {
-                include '**/*'
-              }
               source testFile
 
               inputs.files source
@@ -135,12 +131,6 @@ tasks.whenTaskAdded { task ->
             if (linkTask != null) {
               linkTask.get().dependsOn gtestCompileTask
             }
-            def subrepoInitTask = project(':ddprof-lib').tasks.named("initSubrepo")
-            if (subrepoInitTask != null) {
-              gtestCompileTask.configure {
-                dependsOn subrepoInitTask
-              }
-            }
           }
         }
       }
diff --git a/ddprof-lib/src/main/cpp/arch.h b/ddprof-lib/src/main/cpp/arch.h
new file mode 100644
index 00000000..8d7701a6
--- /dev/null
+++ b/ddprof-lib/src/main/cpp/arch.h
@@ -0,0 +1,211 @@
+/*
+ * Copyright The async-profiler authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#ifndef _ARCH_H
+#define _ARCH_H
+
+
+#ifndef likely
+#  define likely(x)    (__builtin_expect(!!(x), 1))
+#endif
+
+#ifndef unlikely
+#  define unlikely(x)  (__builtin_expect(!!(x), 0))
+#endif
+
+#ifdef _LP64
+#  define LP64_ONLY(code) code
+#else // !_LP64
+#  define LP64_ONLY(code)
+#endif // _LP64
+
+
+typedef unsigned char u8;
+typedef unsigned short u16;
+typedef unsigned int u32;
+typedef unsigned long long u64;
+
+static inline u64 atomicInc(volatile u64& var, u64 increment = 1) {
+    return __sync_fetch_and_add(&var, increment);
+}
+
+static inline int atomicInc(volatile u32& var, int increment = 1) {
+    return __sync_fetch_and_add(&var, increment);
+}
+
+static inline int atomicInc(volatile int& var, int increment = 1) {
+    return __sync_fetch_and_add(&var, increment);
+}
+
+static inline u64 loadAcquire(u64& var) {
+    return __atomic_load_n(&var, __ATOMIC_ACQUIRE);
+}
+
+static inline void storeRelease(u64& var, u64 value) {
+    return __atomic_store_n(&var, value, __ATOMIC_RELEASE);
+}
+
+
+#if defined(__x86_64__) || defined(__i386__)
+
+typedef unsigned char instruction_t;
+const instruction_t BREAKPOINT = 0xcc;
+const int BREAKPOINT_OFFSET = 0;
+
+const int SYSCALL_SIZE = 2;
+const int FRAME_PC_SLOT = 1;
+const int PROBE_SP_LIMIT = 4;
+const int PLT_HEADER_SIZE = 16;
+const int PLT_ENTRY_SIZE = 16;
+const int PERF_REG_PC = 8;  // PERF_REG_X86_IP
+
+#define spinPause()       asm volatile("pause")
+#define rmb()             asm volatile("lfence" : : : "memory")
+#define flushCache(addr)  asm volatile("mfence; clflush (%0); mfence" : : "r" (addr) : "memory")
+
+#define callerPC()        __builtin_return_address(0)
+#define callerFP()        __builtin_frame_address(1)
+#define callerSP()        ((void**)__builtin_frame_address(0) + 2)
+
+#elif defined(__arm__) || defined(__thumb__)
+
+typedef unsigned int instruction_t;
+const instruction_t BREAKPOINT = 0xe7f001f0;
+const instruction_t BREAKPOINT_THUMB = 0xde01de01;
+const int BREAKPOINT_OFFSET = 0;
+
+const int SYSCALL_SIZE = sizeof(instruction_t);
+const int FRAME_PC_SLOT = 1;
+const int PROBE_SP_LIMIT = 0;
+const int PLT_HEADER_SIZE = 20;
+const int PLT_ENTRY_SIZE = 12;
+const int PERF_REG_PC = 15;  // PERF_REG_ARM_PC
+
+#define spinPause()       asm volatile("yield")
+#define rmb()             asm volatile("dmb ish" : : : "memory")
+#define flushCache(addr)  __builtin___clear_cache((char*)(addr), (char*)(addr) + sizeof(instruction_t))
+
+#define callerPC()        __builtin_return_address(0)
+#define callerFP()        __builtin_frame_address(1)
+#define callerSP()        __builtin_frame_address(1)
+
+#elif defined(__aarch64__)
+
+typedef unsigned int instruction_t;
+const instruction_t BREAKPOINT = 0xd4200000;
+const int BREAKPOINT_OFFSET = 0;
+
+const int SYSCALL_SIZE = sizeof(instruction_t);
+const int FRAME_PC_SLOT = 1;
+const int PROBE_SP_LIMIT = 0;
+const int PLT_HEADER_SIZE = 32;
+const int PLT_ENTRY_SIZE = 16;
+const int PERF_REG_PC = 32;  // PERF_REG_ARM64_PC
+
+#define spinPause()       asm volatile("isb")
+#define rmb()             asm volatile("dmb ish" : : : "memory")
+#define flushCache(addr)  __builtin___clear_cache((char*)(addr), (char*)(addr) + sizeof(instruction_t))
+
+#define callerPC()        ({ void* pc; asm volatile("adr %0, ."  : "=r"(pc)); pc; })
+#define callerFP()        ({ void* fp; asm volatile("mov %0, fp" : "=r"(fp)); fp; })
+#define callerSP()        ({ void* sp; asm volatile("mov %0, sp" : "=r"(sp)); sp; })
+
+#elif defined(__PPC64__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+
+typedef unsigned int instruction_t;
+const instruction_t BREAKPOINT = 0x7fe00008;
+// We place the break point in the third instruction slot on PPCLE as the first two are skipped if
+// the call comes from within the same compilation unit according to the LE ABI.
+const int BREAKPOINT_OFFSET = 8;
+
+const int SYSCALL_SIZE = sizeof(instruction_t);
+const int FRAME_PC_SLOT = 2;
+const int PROBE_SP_LIMIT = 0;
+const int PLT_HEADER_SIZE = 24;
+const int PLT_ENTRY_SIZE = 24;
+const int PERF_REG_PC = 32;  // PERF_REG_POWERPC_NIP
+
+#define spinPause()       asm volatile("yield") // does nothing, but using or 1,1,1 would lead to other problems
+#define rmb()             asm volatile ("sync" : : : "memory") // lwsync would do but better safe than sorry
+#define flushCache(addr)  __builtin___clear_cache((char*)(addr), (char*)(addr) + sizeof(instruction_t))
+
+#define callerPC()        __builtin_return_address(0)
+#define callerFP()        __builtin_frame_address(1)
+#define callerSP()        __builtin_frame_address(0)
+
+#elif defined(__riscv) && (__riscv_xlen == 64)
+
+typedef unsigned int instruction_t;
+#if defined(__riscv_compressed)
+const instruction_t BREAKPOINT = 0x9002; // EBREAK (compressed form)
+#else
+const instruction_t BREAKPOINT = 0x00100073; // EBREAK
+#endif
+const int BREAKPOINT_OFFSET = 0;
+
+const int SYSCALL_SIZE = sizeof(instruction_t);
+const int FRAME_PC_SLOT = 1;    // return address is at -1 from FP
+const int PROBE_SP_LIMIT = 0;
+const int PLT_HEADER_SIZE = 24; // Best guess from examining readelf
+const int PLT_ENTRY_SIZE = 24;  // ...same...
+const int PERF_REG_PC = 0;      // PERF_REG_RISCV_PC
+
+#define spinPause()       // No architecture support
+#define rmb()             asm volatile ("fence" : : : "memory")
+#define flushCache(addr)  __builtin___clear_cache((char*)(addr), (char*)(addr) + sizeof(instruction_t))
+
+#define callerPC()        __builtin_return_address(0)
+#define callerFP()        __builtin_frame_address(1)
+#define callerSP()        __builtin_frame_address(0)
+
+#elif defined(__loongarch_lp64)
+
+typedef unsigned int instruction_t;
+const instruction_t BREAKPOINT = 0x002a0005; // EBREAK
+const int BREAKPOINT_OFFSET = 0;
+
+const int SYSCALL_SIZE = sizeof(instruction_t);
+const int FRAME_PC_SLOT = 1;
+const int PROBE_SP_LIMIT = 0;
+const int PLT_HEADER_SIZE = 32;
+const int PLT_ENTRY_SIZE = 16;
+const int PERF_REG_PC = 0;      // PERF_REG_LOONGARCH_PC
+
+#define spinPause()       asm volatile("ibar 0x0")
+#define rmb()             asm volatile("dbar 0x0" : : : "memory")
+#define flushCache(addr)  __builtin___clear_cache((char*)(addr), (char*)(addr) + sizeof(instruction_t))
+
+#define callerPC()        __builtin_return_address(0)
+#define callerFP()        __builtin_frame_address(1)
+#define callerSP()        __builtin_frame_address(0)
+
+#else
+
+#error "Compiling on unsupported arch"
+
+#endif
+
+
+// On Apple M1 and later processors, memory is either writable or executable (W^X)
+#if defined(__aarch64__) && defined(__APPLE__)
+#  define WX_MEMORY  true
+#else
+#  define WX_MEMORY  false
+#endif
+
+// Pointer authentication (PAC) support.
+// Only 48-bit virtual addresses are currently supported.
+#ifdef __aarch64__
+const unsigned long PAC_MASK = WX_MEMORY ? 0x7fffffffffffUL : 0xffffffffffffUL;
+
+static inline const void* stripPointer(const void* p) {
+    return (const void*) ((unsigned long)p & PAC_MASK);
+}
+#else
+#  define stripPointer(p)  (p)
+#endif
+
+
+#endif // _ARCH_H
diff --git a/ddprof-lib/src/main/cpp/asprof.h b/ddprof-lib/src/main/cpp/asprof.h
new file mode 100644
index 00000000..3f6cbfdc
--- /dev/null
+++ b/ddprof-lib/src/main/cpp/asprof.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright The async-profiler authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#ifndef _ASPROF_H
+#define _ASPROF_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#ifdef __clang__
+#  define DLLEXPORT __attribute__((visibility("default")))
+#else
+#  define DLLEXPORT __attribute__((visibility("default"),externally_visible))
+#endif
+
+#define WEAK __attribute__((weak))
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+typedef const char* asprof_error_t;
+typedef void (*asprof_writer_t)(const char* buf, size_t size);
+
+// Should be called once prior to any other API functions
+DLLEXPORT void asprof_init();
+typedef void (*asprof_init_t)();
+
+// Returns an error message for the given error code or NULL if there is no error
+DLLEXPORT const char* asprof_error_str(asprof_error_t err);
+typedef const char* (*asprof_error_str_t)(asprof_error_t err);
+
+// Executes async-profiler command using output_callback as an optional sink
+// for the profiler output. Returning an error code or NULL on success.
+DLLEXPORT asprof_error_t asprof_execute(const char* command, asprof_writer_t output_callback);
+typedef asprof_error_t (*asprof_execute_t)(const char* command, asprof_writer_t output_callback);
+
+// This API is UNSTABLE and might change or be removed in the next version of async-profiler.
+typedef struct {
+    // A thread-local sample counter, which increments (not necessarily by 1) every time a
+    // stack profiling sample is taken using a profiling signal.
+    //
+    // The counter might be initialized lazily, only starting counting from 0 the first time
+    // `asprof_get_thread_local_data` is called on a given thread. Further calls to
+    // `asprof_get_thread_local_data` on a given thread will of course not reset the counter.
+    volatile uint64_t sample_counter;
+} asprof_thread_local_data;
+
+// This API is UNSTABLE and might change or be removed in the next version of async-profiler.
+//
+// Gets a pointer to asprof's thread-local data structure, see `asprof_thread_local_data`'s
+// documentation for the details of each field. This function might lazily initialize that
+// structure.
+//
+// This function can return NULL either if the profiler is not yet initializer, or in
+// case of an allocation failure.
+//
+// This function is *not* async-signal-safe. However, it is safe to call concurrently
+// with async-profiler operations, including initialization.
+DLLEXPORT asprof_thread_local_data* asprof_get_thread_local_data(void);
+typedef asprof_thread_local_data* (*asprof_get_thread_local_data_t)(void);
+
+
+typedef int asprof_jfr_event_key;
+
+// This API is UNSTABLE and might change or be removed in the next version of async-profiler.
+//
+// Return a asprof_jfr_event_key identifier for a user-defined JFR key.
+// That identifier can then be used in `asprof_emit_jfr_event`
+//
+// The name is required to be valid (since it's a C string, NUL-free) UTF-8.
+//
+// Returns -1 on failure.
+DLLEXPORT asprof_jfr_event_key asprof_register_jfr_event(const char* name);
+typedef asprof_jfr_event_key (*asprof_register_jfr_event_t)(const char* name);
+
+
+#define ASPROF_MAX_JFR_EVENT_LENGTH 2048
+
+// This API is UNSTABLE and might change or be removed in the next version of async-profiler.
+//
+// Emits a custom, user-defined JFR event. The key should be created via `asprof_register_jfr_event`.
+// The data can be arbitrary binary data, with size <= ASPROF_MAX_JFR_EVENT_LENGTH.
+//
+// User-defined events are included in the JFR under a `profiler.UserEvent` event type. That type will contain
+// (at least) the following fields:
+// 1. `startTime` [Long] - the emitted event's time in ticks.
+// 2. `eventThread` [java.lang.Thread] - the thread that emitted the events.
+// 3. `type` [profiler.types.UserEventType] - the event's type,
+//    where `profiler.types.UserEventType` is an indexed string from the JFR constant pool.
+// 4. `data` [String] - the event data. This is the Latin-1 encoded version of the inputted data.
+//    The Latin-1 encoding is used as a way to stuff the arbitrary byte input into something
+//    that JFR supports (JFR technically supports byte arrays, but `jfr print` doesn't).
+//
+// Returns an error code or NULL on success.
+DLLEXPORT asprof_error_t asprof_emit_jfr_event(asprof_jfr_event_key type, const uint8_t* data, size_t len);
+typedef asprof_error_t (*asprof_emit_jfr_event_t)(asprof_jfr_event_key type, const uint8_t* data, size_t len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // _ASPROF_H
diff --git a/ddprof-lib/src/main/cpp/cpuEngine.h b/ddprof-lib/src/main/cpp/cpuEngine.h
new file mode 100644
index 00000000..da8becbd
--- /dev/null
+++ b/ddprof-lib/src/main/cpp/cpuEngine.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright The async-profiler authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#ifndef _CPUENGINE_H
+#define _CPUENGINE_H
+
+#include <signal.h>
+#include "engine.h"
+
+
+// Base class for CPU sampling engines: PerfEvents, CTimer, ITimer
+class CpuEngine : public Engine {
+  protected:
+    static void** _pthread_entry;
+    static CpuEngine* _current;
+
+    static long _interval;
+    static CStack _cstack;
+    static int _signal;
+    static bool _count_overrun;
+
+    static void signalHandler(int signo, siginfo_t* siginfo, void* ucontext);
+    static void signalHandlerJ9(int signo, siginfo_t* siginfo, void* ucontext);
+
+    static bool setupThreadHook();
+
+    void enableThreadHook();
+    void disableThreadHook();
+
+    bool isResourceLimit(int err);
+
+    int createForAllThreads();
+
+    virtual int createForThread(int tid) { return -1; }
+    virtual void destroyForThread(int tid) {}
+
+  public:
+    const char* title() {
+        return "CPU profile";
+    }
+
+    const char* units() {
+        return "ns";
+    }
+
+    static void onThreadStart();
+    static void onThreadEnd();
+};
+
+#endif // _CPUENGINE_H
diff --git a/ddprof-lib/src/main/cpp/dwarf.h b/ddprof-lib/src/main/cpp/dwarf.h
new file mode 100644
index 00000000..6cc3a483
--- /dev/null
+++ b/ddprof-lib/src/main/cpp/dwarf.h
@@ -0,0 +1,183 @@
+/*
+ * Copyright The async-profiler authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#ifndef _DWARF_H
+#define _DWARF_H
+
+#include <stddef.h>
+#include <string.h>
+#include "arch.h"
+
+
+const int DW_REG_PLT = 128;      // denotes special rule for PLT entries
+const int DW_REG_INVALID = 255;  // denotes unsupported configuration
+
+const int DW_PC_OFFSET = 1;
+const int DW_SAME_FP = 0x80000000;
+const int DW_LINK_REGISTER = 0x80000000;
+const int DW_STACK_SLOT = sizeof(void*);
+
+
+#if defined(__x86_64__)
+
+#define DWARF_SUPPORTED true
+
+const int DW_REG_FP = 6;
+const int DW_REG_SP = 7;
+const int DW_REG_PC = 16;
+const int EMPTY_FRAME_SIZE = DW_STACK_SLOT;
+const int LINKED_FRAME_SIZE = 2 * DW_STACK_SLOT;
+const int INITIAL_PC_OFFSET = -EMPTY_FRAME_SIZE;
+
+#elif defined(__i386__)
+
+#define DWARF_SUPPORTED true
+
+const int DW_REG_FP = 5;
+const int DW_REG_SP = 4;
+const int DW_REG_PC = 8;
+const int EMPTY_FRAME_SIZE = DW_STACK_SLOT;
+const int LINKED_FRAME_SIZE = 2 * DW_STACK_SLOT;
+const int INITIAL_PC_OFFSET = -EMPTY_FRAME_SIZE;
+
+#elif defined(__aarch64__)
+
+#define DWARF_SUPPORTED true
+
+const int DW_REG_FP = 29;
+const int DW_REG_SP = 31;
+const int DW_REG_PC = 30;
+const int EMPTY_FRAME_SIZE = 0;
+const int LINKED_FRAME_SIZE = 0;
+const int INITIAL_PC_OFFSET = DW_LINK_REGISTER;
+
+#else
+
+#define DWARF_SUPPORTED false
+
+const int DW_REG_FP = 0;
+const int DW_REG_SP = 1;
+const int DW_REG_PC = 2;
+const int EMPTY_FRAME_SIZE = 0;
+const int LINKED_FRAME_SIZE = 0;
+const int INITIAL_PC_OFFSET = DW_LINK_REGISTER;
+
+#endif
+
+
+struct FrameDesc {
+    u32 loc;
+    int cfa;
+    int fp_off;
+    int pc_off;
+
+    static FrameDesc empty_frame;
+    static FrameDesc default_frame;
+
+    static int comparator(const void* p1, const void* p2) {
+        FrameDesc* fd1 = (FrameDesc*)p1;
+        FrameDesc* fd2 = (FrameDesc*)p2;
+        return (int)(fd1->loc - fd2->loc);
+    }
+};
+
+
+class DwarfParser {
+  private:
+    const char* _name;
+    const char* _image_base;
+    const char* _ptr;
+
+    int _capacity;
+    int _count;
+    FrameDesc* _table;
+    FrameDesc* _prev;
+
+    u32 _code_align;
+    int _data_align;
+
+    const char* add(size_t size) {
+        const char* ptr = _ptr;
+        _ptr = ptr + size;
+        return ptr;
+    }
+
+    u8 get8() {
+        return *_ptr++;
+    }
+
+    u16 get16() {
+        const char* ptr = add(2);
+        u16 result;
+        memcpy(&result, ptr, sizeof(u16));
+        return result;
+    }
+
+    u32 get32() {
+        const char* ptr = add(4);
+        u32 result;
+        memcpy(&result, ptr, sizeof(u32));
+        return result;
+    }
+
+    u32 getLeb() {
+        u32 result = 0;
+        for (u32 shift = 0; ; shift += 7) {
+            u8 b = *_ptr++;
+            result |= (b & 0x7f) << shift;
+            if ((b & 0x80) == 0) {
+                return result;
+            }
+        }
+    }
+
+    int getSLeb() {
+        int result = 0;
+        for (u32 shift = 0; ; shift += 7) {
+            u8 b = *_ptr++;
+            result |= (b & 0x7f) << shift;
+            if ((b & 0x80) == 0) {
+                if ((b & 0x40) != 0 && (shift += 7) < 32) {
+                    result |= ~0U << shift;
+                }
+                return result;
+            }
+        }
+    }
+
+    void skipLeb() {
+        while (*_ptr++ & 0x80) {}
+    }
+
+    const char* getPtr() {
+        const char* ptr = _ptr;
+        const char* offset_ptr = add(4);
+        int offset;
+        memcpy(&offset, offset_ptr, sizeof(int));
+        return ptr + offset;
+    }
+
+    void parse(const char* eh_frame_hdr);
+    void parseCie();
+    void parseFde();
+    void parseInstructions(u32 loc, const char* end);
+    int parseExpression();
+
+    void addRecord(u32 loc, u32 cfa_reg, int cfa_off, int fp_off, int pc_off);
+    FrameDesc* addRecordRaw(u32 loc, int cfa, int fp_off, int pc_off);
+
+  public:
+    DwarfParser(const char* name, const char* image_base, const char* eh_frame_hdr);
+
+    FrameDesc* table() const {
+        return _table;
+    }
+
+    int count() const {
+        return _count;
+    }
+};
+
+#endif // _DWARF_H
diff --git a/ddprof-lib/src/main/cpp/incbin.h b/ddprof-lib/src/main/cpp/incbin.h
new file mode 100644
index 00000000..afbc7629
--- /dev/null
+++ b/ddprof-lib/src/main/cpp/incbin.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright The async-profiler authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#ifndef _INCBIN_H
+#define _INCBIN_H
+
+#ifdef __APPLE__
+#  define INCBIN_SECTION ".const_data"
+#  define INCBIN_SYMBOL  "_"
+#else
+#  define INCBIN_SECTION ".section \".rodata\", \"a\""
+#  define INCBIN_SYMBOL
+#endif
+
+#define INCBIN(NAME, FILE) \
+    extern "C" const char NAME[];\
+    extern "C" const char NAME##_END[];\
+    asm(INCBIN_SECTION "\n"\
+        ".globl " INCBIN_SYMBOL #NAME "\n"\
+        INCBIN_SYMBOL #NAME ":\n"\
+        ".incbin \"" FILE "\"\n"\
+        ".globl " INCBIN_SYMBOL #NAME "_END\n"\
+        INCBIN_SYMBOL #NAME "_END:\n"\
+        ".byte 0x00\n"\
+        ".previous\n"\
+    );
+
+#define INCBIN_SIZEOF(NAME) (NAME##_END - NAME)
+
+#define INCLUDE_HELPER_CLASS(NAME_VAR, DATA_VAR, NAME) \
+    static const char* const NAME_VAR = NAME;\
+    INCBIN(DATA_VAR, "src/helper/" NAME ".class")
+
+#endif // _INCBIN_H
diff --git a/ddprof-lib/src/main/cpp/j9StackTraces.h b/ddprof-lib/src/main/cpp/j9StackTraces.h
new file mode 100644
index 00000000..c0703253
--- /dev/null
+++ b/ddprof-lib/src/main/cpp/j9StackTraces.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright The async-profiler authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#ifndef _J9STACKTRACES_H
+#define _J9STACKTRACES_H
+
+#include <pthread.h>
+#include "arch.h"
+#include "arguments.h"
+
+
+const int MAX_J9_NATIVE_FRAMES = 128;
+
+struct J9StackTraceNotification {
+    void* env;
+    u64 counter;
+    int num_frames;
+    int reserved;
+    const void* addr[MAX_J9_NATIVE_FRAMES];
+
+    size_t size() {
+        return sizeof(*this) - sizeof(this->addr) + num_frames * sizeof(const void*);
+    }
+};
+
+
+class J9StackTraces {
+  private:
+    static pthread_t _thread;
+    static int _max_stack_depth;
+    static int _pipe[2];
+
+    static void* threadEntry(void* unused) {
+        timerLoop();
+        return NULL;
+    }
+
+    static void timerLoop();
+
+  public:
+    static Error start(Arguments& args);
+    static void stop();
+
+    static void checkpoint(u64 counter, J9StackTraceNotification* notif);
+};
+
+#endif // _J9STACKTRACES_H
diff --git a/ddprof-lib/src/main/cpp/mutex.cpp b/ddprof-lib/src/main/cpp/mutex.cpp
new file mode 100644
index 00000000..85228dbf
--- /dev/null
+++ b/ddprof-lib/src/main/cpp/mutex.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright The async-profiler authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "mutex.h"
+
+
+Mutex::Mutex() {
+    pthread_mutexattr_t attr;
+    pthread_mutexattr_init(&attr);
+    pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
+    pthread_mutex_init(&_mutex, &attr);
+}
+
+void Mutex::lock() {
+    pthread_mutex_lock(&_mutex);
+}
+
+void Mutex::unlock() {
+    pthread_mutex_unlock(&_mutex);
+}
+
+WaitableMutex::WaitableMutex() : Mutex() {
+    pthread_cond_init(&_cond, NULL);
+}
+
+bool WaitableMutex::waitUntil(u64 wall_time) {
+    struct timespec ts = {(time_t)(wall_time / 1000000), (long)(wall_time % 1000000) * 1000};
+    return pthread_cond_timedwait(&_cond, &_mutex, &ts) != 0;
+}
+
+void WaitableMutex::notify() {
+    pthread_cond_signal(&_cond);
+}
diff --git a/ddprof-lib/src/main/cpp/mutex.h b/ddprof-lib/src/main/cpp/mutex.h
new file mode 100644
index 00000000..7d017536
--- /dev/null
+++ b/ddprof-lib/src/main/cpp/mutex.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright The async-profiler authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#ifndef _MUTEX_H
+#define _MUTEX_H
+
+#include <pthread.h>
+#include "arch.h"
+
+
+class Mutex {
+  protected:
+    pthread_mutex_t _mutex;
+
+  public:
+    Mutex();
+
+    void lock();
+    void unlock();
+};
+
+class WaitableMutex : public Mutex {
+  protected:
+    pthread_cond_t _cond;
+
+  public:
+    WaitableMutex();
+
+    bool waitUntil(u64 wall_time);
+    void notify();
+};
+
+class MutexLocker {
+  private:
+    Mutex* _mutex;
+
+  public:
+    MutexLocker(Mutex& mutex) : _mutex(&mutex) {
+        _mutex->lock();
+    }
+
+    ~MutexLocker() {
+        _mutex->unlock();
+    }
+};
+
+#endif // _MUTEX_H
diff --git a/ddprof-lib/src/main/cpp/os.h b/ddprof-lib/src/main/cpp/os.h
new file mode 100644
index 00000000..0ad8b1e2
--- /dev/null
+++ b/ddprof-lib/src/main/cpp/os.h
@@ -0,0 +1,150 @@
+/*
+ * Copyright The async-profiler authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#ifndef _OS_H
+#define _OS_H
+
+#include <signal.h>
+#include <stddef.h>
+#include <sys/types.h>
+#include "arch.h"
+
+
+typedef void (*SigAction)(int, siginfo_t*, void*);
+typedef void (*SigHandler)(int);
+typedef void (*TimerCallback)(void*);
+
+// Interrupt threads with this signal. The same signal is used inside JDK to interrupt I/O operations.
+const int WAKEUP_SIGNAL = SIGIO;
+
+enum ThreadState {
+    THREAD_UNKNOWN,
+    THREAD_RUNNING,
+    THREAD_SLEEPING
+};
+
+struct ProcessInfo {
+    int pid = 0;
+    int ppid = 0;
+    char name[16];           // Process name from /proc/{pid}/stats
+    char cmdline[2048];      // Command line from /proc/{pid}/cmdline
+    unsigned int uid = 0;    // User ID
+    unsigned char state = 0; // Process state (R, S, D, Z, T, etc.)
+    u64 start_time = 0;      // Process start time (milliseconds since epoch)
+
+    // CPU & thread stats
+    float cpu_user = 0;    // User CPU time (seconds)
+    float cpu_system = 0;  // System CPU time (seconds)
+    float cpu_percent = 0; // CPU utilization percentage
+    int threads = 0;       // Number of threads
+
+    // Memory stats (in bytes)
+    u64 vm_size = 0;   // Total virtual memory size
+    u64 vm_rss = 0;    // Resident memory size
+    u64 rss_anon = 0;  // Resident anonymous memory
+    u64 rss_files = 0; // Resident file mappings
+    u64 rss_shmem = 0; // Resident shared memory
+
+    // Page fault stats
+    u64 minor_faults = 0; // Minor page faults (no I/O required)
+    u64 major_faults = 0; // Major page faults (I/O required)
+
+    // I/O stats
+    u64 io_read = 0;  // KB read from storage
+    u64 io_write = 0; // KB written to storage
+};
+
+
+class ThreadList {
+  protected:
+    u32 _index;
+    u32 _count;
+
+    ThreadList() : _index(0), _count(0) {
+    }
+
+  public:
+    virtual ~ThreadList() {}
+
+    u32 index() const { return _index; }
+    u32 count() const { return _count; }
+
+    bool hasNext() const {
+        return _index < _count;
+    }
+
+    virtual int next() = 0;
+    virtual void update() = 0;
+};
+
+
+// W^X memory support
+class JitWriteProtection {
+  private:
+    u64 _prev;
+    bool _restore;
+
+  public:
+    JitWriteProtection(bool enable);
+    ~JitWriteProtection();
+};
+
+
+class OS {
+  public:
+    static const size_t page_size;
+    static const size_t page_mask;
+    static const long clock_ticks_per_sec;
+
+    static u64 nanotime();
+    static u64 micros();
+    static u64 processStartTime();
+    static void sleep(u64 nanos);
+    static void uninterruptibleSleep(u64 nanos, volatile bool* flag);
+    static u64 overrun(siginfo_t* siginfo);
+
+    static u64 hton64(u64 x);
+    static u64 ntoh64(u64 x);
+
+    static int getMaxThreadId();
+    static int processId();
+    static int threadId();
+    static const char* schedPolicy(int thread_id);
+    static bool threadName(int thread_id, char* name_buf, size_t name_len);
+    static ThreadState threadState(int thread_id);
+    static u64 threadCpuTime(int thread_id);
+    static ThreadList* listThreads();
+
+    static bool isLinux();
+    static bool isMusl();
+
+    static SigAction installSignalHandler(int signo, SigAction action, SigHandler handler = NULL);
+    static SigAction replaceCrashHandler(SigAction action);
+    static int getProfilingSignal(int mode);
+    static bool sendSignalToThread(int thread_id, int signo);
+
+    static void* safeAlloc(size_t size);
+    static void safeFree(void* addr, size_t size);
+
+    static bool getCpuDescription(char* buf, size_t size);
+    static int getCpuCount();
+    static u64 getProcessCpuTime(u64* utime, u64* stime);
+    static u64 getTotalCpuTime(u64* utime, u64* stime);
+
+    static int createMemoryFile(const char* name);
+    static void copyFile(int src_fd, int dst_fd, off_t offset, size_t size);
+    static void freePageCache(int fd, off_t start_offset);
+    static int mprotect(void* addr, size_t size, int prot);
+
+    static bool checkPreloaded();
+
+    static u64 getSystemBootTime();
+    static u64 getRamSize();
+    static int getProcessIds(int* pids, int max_pids);
+    static bool getBasicProcessInfo(int pid, ProcessInfo* info);
+    static bool getDetailedProcessInfo(ProcessInfo* info);
+};
+
+#endif // _OS_H
diff --git a/ddprof-lib/src/main/cpp/os_linux.cpp b/ddprof-lib/src/main/cpp/os_linux.cpp
new file mode 100644
index 00000000..8e2cd7dd
--- /dev/null
+++ b/ddprof-lib/src/main/cpp/os_linux.cpp
@@ -0,0 +1,693 @@
+/*
+ * Copyright The async-profiler authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#ifdef __linux__
+
+#include <arpa/inet.h>
+#include <byteswap.h>
+#include <dirent.h>
+#include <dlfcn.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <link.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/sendfile.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/times.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+#include "os.h"
+
+
+#ifdef __LP64__
+#  define MMAP_SYSCALL __NR_mmap
+#else
+#  define MMAP_SYSCALL __NR_mmap2
+#endif
+
+#define COMM_LEN 16
+
+class LinuxThreadList : public ThreadList {
+  private:
+    DIR* _dir;
+    int* _thread_array;
+    u32 _capacity;
+
+    void addThread(int thread_id) {
+        if (_count >= _capacity) {
+            _capacity = _count * 2;
+            _thread_array = (int*)realloc(_thread_array, _capacity * sizeof(int));
+        }
+        _thread_array[_count++] = thread_id;
+    }
+
+    void fillThreadArray() {
+        if (_dir != NULL) {
+            rewinddir(_dir);
+            struct dirent* entry;
+            while ((entry = readdir(_dir)) != NULL) {
+                if (entry->d_name[0] != '.') {
+                    addThread(atoi(entry->d_name));
+                }
+            }
+        }
+    }
+
+  public:
+    LinuxThreadList() : ThreadList() {
+        _dir = opendir("/proc/self/task");
+        _capacity = 128;
+        _thread_array = (int*)malloc(_capacity * sizeof(int));
+        fillThreadArray();
+    }
+
+    ~LinuxThreadList() {
+        free(_thread_array);
+        if (_dir != NULL) {
+            closedir(_dir);
+        }
+    }
+
+    int next() {
+        return _thread_array[_index++];
+    }
+
+    void update() {
+        _index = _count = 0;
+        fillThreadArray();
+    }
+};
+
+
+JitWriteProtection::JitWriteProtection(bool enable) {
+    // Not used on Linux
+}
+
+JitWriteProtection::~JitWriteProtection() {
+    // Not used on Linux
+}
+
+
+static SigAction installed_sigaction[64];
+
+const size_t OS::page_size = sysconf(_SC_PAGESIZE);
+const size_t OS::page_mask = OS::page_size - 1;
+const long OS::clock_ticks_per_sec = sysconf(_SC_CLK_TCK);
+
+
+u64 OS::nanotime() {
+    struct timespec ts;
+    clock_gettime(CLOCK_MONOTONIC, &ts);
+    return (u64)ts.tv_sec * 1000000000 + ts.tv_nsec;
+}
+
+u64 OS::micros() {
+    struct timeval tv;
+    gettimeofday(&tv, NULL);
+    return (u64)tv.tv_sec * 1000000 + tv.tv_usec;
+}
+
+u64 OS::processStartTime() {
+    static u64 start_time = 0;
+
+    if (start_time == 0) {
+        char buf[64];
+        snprintf(buf, sizeof(buf), "/proc/%d", processId());
+
+        struct stat st;
+        if (stat(buf, &st) == 0) {
+            start_time = (u64)st.st_mtim.tv_sec * 1000 + st.st_mtim.tv_nsec / 1000000;
+        }
+    }
+
+    return start_time;
+}
+
+void OS::sleep(u64 nanos) {
+    struct timespec ts = {(time_t)(nanos / 1000000000), (long)(nanos % 1000000000)};
+    nanosleep(&ts, NULL);
+}
+
+void OS::uninterruptibleSleep(u64 nanos, volatile bool* flag) {
+    // Workaround nanosleep bug: https://man7.org/linux/man-pages/man2/nanosleep.2.html#BUGS
+    u64 deadline = OS::nanotime() + nanos;
+    struct timespec ts = {(time_t)(deadline / 1000000000), (long)(deadline % 1000000000)};
+    while (*flag && clock_nanosleep(CLOCK_MONOTONIC, TIMER_ABSTIME, &ts, &ts) == EINTR);
+}
+
+u64 OS::overrun(siginfo_t* siginfo) {
+    return siginfo->si_overrun;
+}
+
+u64 OS::hton64(u64 x) {
+    return htonl(1) == 1 ? x : bswap_64(x);
+}
+
+u64 OS::ntoh64(u64 x) {
+    return ntohl(1) == 1 ? x : bswap_64(x);
+}
+
+int OS::getMaxThreadId() {
+    char buf[16] = "65536";
+    int fd = open("/proc/sys/kernel/pid_max", O_RDONLY);
+    if (fd != -1) {
+        ssize_t r = read(fd, buf, sizeof(buf) - 1);
+        (void) r;
+        close(fd);
+    }
+    return atoi(buf);
+}
+
+int OS::processId() {
+    static const int self_pid = getpid();
+
+    return self_pid;
+}
+
+int OS::threadId() {
+    return syscall(__NR_gettid);
+}
+
+const char* OS::schedPolicy(int thread_id) {
+    int sched_policy = sched_getscheduler(thread_id);
+    if (sched_policy >= SCHED_BATCH) {
+        return sched_policy >= SCHED_IDLE ? "SCHED_IDLE" : "SCHED_BATCH";
+    }
+    return "SCHED_OTHER";
+}
+
+bool OS::threadName(int thread_id, char* name_buf, size_t name_len) {
+    char buf[64];
+    snprintf(buf, sizeof(buf), "/proc/self/task/%d/comm", thread_id);
+    int fd = open(buf, O_RDONLY);
+    if (fd == -1) {
+        return false;
+    }
+
+    ssize_t r = read(fd, name_buf, name_len);
+    close(fd);
+
+    if (r > 0) {
+        name_buf[r - 1] = 0;
+        return true;
+    }
+    return false;
+}
+
+ThreadState OS::threadState(int thread_id) {
+    char buf[512];
+    snprintf(buf, sizeof(buf), "/proc/self/task/%d/stat", thread_id);
+    int fd = open(buf, O_RDONLY);
+    if (fd == -1) {
+        return THREAD_UNKNOWN;
+    }
+
+    ThreadState state = THREAD_UNKNOWN;
+    if (read(fd, buf, sizeof(buf)) > 0) {
+        char* s = strchr(buf, ')');
+        state = s != NULL && (s[2] == 'R' || s[2] == 'D') ? THREAD_RUNNING : THREAD_SLEEPING;
+    }
+
+    close(fd);
+    return state;
+}
+
+u64 OS::threadCpuTime(int thread_id) {
+    clockid_t thread_cpu_clock;
+    if (thread_id) {
+        thread_cpu_clock = ((~(unsigned int)(thread_id)) << 3) | 6;  // CPUCLOCK_SCHED | CPUCLOCK_PERTHREAD_MASK
+    } else {
+        thread_cpu_clock = CLOCK_THREAD_CPUTIME_ID;
+    }
+
+    struct timespec ts;
+    if (clock_gettime(thread_cpu_clock, &ts) == 0) {
+        return (u64)ts.tv_sec * 1000000000 + ts.tv_nsec;
+    }
+    return 0;
+}
+
+ThreadList* OS::listThreads() {
+    return new LinuxThreadList();
+}
+
+bool OS::isLinux() {
+    return true;
+}
+
+// _CS_GNU_LIBC_VERSION is not defined on musl
+const static bool musl = confstr(_CS_GNU_LIBC_VERSION, NULL, 0) == 0 && errno != 0;
+
+bool OS::isMusl() {
+    return musl;
+}
+
+SigAction OS::installSignalHandler(int signo, SigAction action, SigHandler handler) {
+    struct sigaction sa;
+    struct sigaction oldsa;
+    sigemptyset(&sa.sa_mask);
+
+    if (handler != NULL) {
+        sa.sa_handler = handler;
+        sa.sa_flags = 0;
+    } else {
+        sa.sa_sigaction = action;
+        sa.sa_flags = SA_SIGINFO | SA_RESTART;
+        if (signo > 0 && signo < sizeof(installed_sigaction) / sizeof(installed_sigaction[0])) {
+            installed_sigaction[signo] = action;
+        }
+    }
+
+    sigaction(signo, &sa, &oldsa);
+    return oldsa.sa_sigaction;
+}
+
+static void restoreSignalHandler(int signo, siginfo_t* siginfo, void* ucontext) {
+    signal(signo, SIG_DFL);
+}
+
+SigAction OS::replaceCrashHandler(SigAction action) {
+    struct sigaction sa;
+    sigaction(SIGSEGV, NULL, &sa);
+    SigAction old_action = sa.sa_handler == SIG_DFL ? restoreSignalHandler : sa.sa_sigaction;
+    sigemptyset(&sa.sa_mask);
+    sa.sa_sigaction = action;
+    sa.sa_flags |= SA_SIGINFO | SA_RESTART | SA_NODEFER;
+    sigaction(SIGSEGV, &sa, NULL);
+    return old_action;
+}
+
+int OS::getProfilingSignal(int mode) {
+    static int preferred_signals[2] = {SIGPROF, SIGVTALRM};
+
+    const u64 allowed_signals =
+        1ULL << SIGPROF | 1ULL << SIGVTALRM | 1ULL << SIGSTKFLT | 1ULL << SIGPWR | -(1ULL << SIGRTMIN);
+
+    int& signo = preferred_signals[mode];
+    int initial_signo = signo;
+    int other_signo = preferred_signals[1 - mode];
+
+    do {
+        struct sigaction sa;
+        if ((allowed_signals & (1ULL << signo)) != 0 && signo != other_signo && sigaction(signo, NULL, &sa) == 0) {
+            if (sa.sa_handler == SIG_DFL || sa.sa_handler == SIG_IGN || sa.sa_sigaction == installed_sigaction[signo]) {
+                return signo;
+            }
+        }
+    } while ((signo = (signo + 53) & 63) != initial_signo);
+
+    return signo;
+}
+
+bool OS::sendSignalToThread(int thread_id, int signo) {
+    return syscall(__NR_tgkill, processId(), thread_id, signo) == 0;
+}
+
+void* OS::safeAlloc(size_t size) {
+    // Naked syscall can be used inside a signal handler.
+    // Also, we don't want to catch our own calls when profiling mmap.
+    intptr_t result = syscall(MMAP_SYSCALL, NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+    if (result < 0 && result > -4096) {
+        return NULL;
+    }
+    return (void*)result;
+}
+
+void OS::safeFree(void* addr, size_t size) {
+    syscall(__NR_munmap, addr, size);
+}
+
+bool OS::getCpuDescription(char* buf, size_t size) {
+    int fd = open("/proc/cpuinfo", O_RDONLY);
+    if (fd == -1) {
+        return false;
+    }
+
+    ssize_t r = read(fd, buf, size);
+    close(fd);
+    if (r <= 0) {
+        return false;
+    }
+    buf[r < size ? r : size - 1] = 0;
+
+    char* c;
+    do {
+        c = strchr(buf, '\n');
+    } while (c != NULL && *(buf = c + 1) != '\n');
+
+    *buf = 0;
+    return true;
+}
+
+int OS::getCpuCount() {
+    return sysconf(_SC_NPROCESSORS_ONLN);
+}
+
+u64 OS::getProcessCpuTime(u64* utime, u64* stime) {
+    struct tms buf;
+    clock_t real = times(&buf);
+    *utime = buf.tms_utime;
+    *stime = buf.tms_stime;
+    return real;
+}
+
+u64 OS::getTotalCpuTime(u64* utime, u64* stime) {
+    int fd = open("/proc/stat", O_RDONLY);
+    if (fd == -1) {
+        return (u64)-1;
+    }
+
+    u64 real = (u64)-1;
+    char buf[128] = {0};
+    if (read(fd, buf, sizeof(buf)) >= 12) {
+        u64 user, nice, system, idle;
+        if (sscanf(buf + 4, "%llu %llu %llu %llu", &user, &nice, &system, &idle) == 4) {
+            *utime = user + nice;
+            *stime = system;
+            real = user + nice + system + idle;
+        }
+    }
+
+    close(fd);
+    return real;
+}
+
+int OS::createMemoryFile(const char* name) {
+    return syscall(__NR_memfd_create, name, 0);
+}
+
+void OS::copyFile(int src_fd, int dst_fd, off_t offset, size_t size) {
+    // copy_file_range() is probably better, but not supported on all kernels
+    while (size > 0) {
+        ssize_t bytes = sendfile(dst_fd, src_fd, &offset, size);
+        if (bytes <= 0) {
+            break;
+        }
+        size -= (size_t)bytes;
+    }
+}
+
+void OS::freePageCache(int fd, off_t start_offset) {
+    posix_fadvise(fd, start_offset & ~page_mask, 0, POSIX_FADV_DONTNEED);
+}
+
+int OS::mprotect(void* addr, size_t size, int prot) {
+    return ::mprotect(addr, size, prot);
+}
+
+static int checkPreloadedCallback(dl_phdr_info* info, size_t size, void* data) {
+    Dl_info* dl_info = (Dl_info*)data;
+
+    Dl_info libprofiler = dl_info[0];
+    Dl_info libc = dl_info[1];
+
+    if ((void*)info->dlpi_addr == libprofiler.dli_fbase) {
+        // async-profiler found first
+        return 1;
+    } else if ((void*)info->dlpi_addr == libc.dli_fbase) {
+        // libc found first
+        return -1;
+    }
+
+    return 0;
+}
+
+// Checks if async-profiler is preloaded through the LD_PRELOAD mechanism.
+// This is done by analyzing the order of loaded dynamic libraries.
+bool OS::checkPreloaded() {
+    if (getenv("LD_PRELOAD") == NULL) {
+        return false;
+    }
+
+    // Find async-profiler shared object
+    Dl_info libprofiler;
+    if (dladdr((const void*)OS::checkPreloaded, &libprofiler) == 0) {
+        return false;
+    }
+
+    // Find libc shared object
+    Dl_info libc;
+    if (dladdr((const void*)exit, &libc) == 0) {
+        return false;
+    }
+
+    Dl_info info[2] = {libprofiler, libc};
+    return dl_iterate_phdr(checkPreloadedCallback, (void*)info) == 1;
+}
+
+u64 OS::getRamSize() {
+    static u64 mem_total = 0;
+
+    if (mem_total == 0) {
+        FILE* file = fopen("/proc/meminfo", "r");
+        if (!file) return 0;
+
+        char line[1024];
+        while (fgets(line, sizeof(line), file)) {
+            if (strncmp(line, "MemTotal:", 9) == 0) {
+                mem_total = strtoull(line + 9, NULL, 10) * 1024;
+                break;
+            }
+        }
+
+        fclose(file);
+    }
+
+    return mem_total;
+}
+
+u64 OS::getSystemBootTime() {
+    static u64 system_boot_time = 0;
+
+    if (system_boot_time == 0) {
+        FILE* file = fopen("/proc/stat", "r");
+        if (!file) return 0;
+
+        char line[1024];
+        while (fgets(line, sizeof(line), file)) {
+            if (strncmp(line, "btime", 5) == 0) {
+                system_boot_time = strtoull(line + 5, NULL, 10);
+                break;
+            }
+        }
+
+        fclose(file);
+    }
+
+    return system_boot_time;
+}
+
+int OS::getProcessIds(int* pids, int max_pids) {
+    int count = 0;
+    DIR* proc = opendir("/proc");
+    if (!proc) return 0;
+
+    for (dirent* de; (de = readdir(proc)) && count < max_pids;) {
+        int pid = atoi(de->d_name);
+        if (pid > 0) {
+            pids[count++] = pid;
+        }
+    }
+
+    closedir(proc);
+    return count;
+}
+
+static bool readProcessCmdline(int pid, ProcessInfo* info) {
+    char path[64];
+    snprintf(path, sizeof(path), "/proc/%d/cmdline", pid);
+
+    int fd = open(path, O_RDONLY);
+    if (fd == -1) {
+        return false;
+    }
+
+    const size_t max_read = sizeof(info->cmdline) - 1;
+    size_t len = 0;
+
+    ssize_t r;
+    while (r = read(fd, info->cmdline + len, max_read - len)) {
+        if (r > 0) {
+            len += (size_t)r;
+            if (len == max_read) break;
+        } else {
+            if (errno == EINTR) continue;
+            close(fd);
+            return false;
+        }
+    }
+
+    close(fd);
+
+    // Replace null bytes with spaces (arguments are separated by null bytes)
+    for (size_t i = 0; i < len; i++) {
+        if (info->cmdline[i] == '\0') {
+            info->cmdline[i] = ' ';
+        }
+    }
+
+    // Ensure null termination
+    info->cmdline[len] = '\0';
+
+    // Remove trailing space if present
+    while (len > 0 && info->cmdline[len - 1] == ' ') {
+        info->cmdline[--len] = '\0';
+    }
+
+    return true;
+}
+
+static bool readProcessStats(int pid, ProcessInfo* info) {
+    char path[64];
+    snprintf(path, sizeof(path), "/proc/%d/stat", pid);
+
+    int fd = open(path, O_RDONLY);
+    if (fd == -1) return false;
+
+    char buffer[4096];
+    size_t len = 0;
+
+    ssize_t r;
+    while (r = read(fd, buffer + len, sizeof(buffer) - 1 - len)) {
+        if (r > 0) {
+            len += (size_t)r;
+            if (len == sizeof(buffer) - 1) break;
+        } else {
+            if (errno == EINTR) continue;
+            close(fd);
+            return false;
+        }
+    }
+    close(fd);
+
+    if (len == 0) return false;
+    buffer[len] = '\0';
+
+    int parsed_pid, ppid;
+    char comm[COMM_LEN] = {0};
+    char state;
+    u64 minflt, majflt, utime, stime;
+    u64 starttime;
+    u64 vsize, rss;
+    int threads;
+    int parsed =
+        sscanf(buffer,
+               "%d "                    /*  1 pid                                   */
+               "(%15[^)]) "             /*  2 comm (read until ')')                 */
+               "%c %d "                 /*  3 state, 4 ppid                         */
+               "%*d %*d %*d %*d %*u "   /*  5-9 skip                                */
+               "%llu %*u %llu %*u "     /* 10-13 minflt,-,majflt,-                  */
+               "%llu %llu "             /* 14-15 utime, stime                       */
+               "%*d %*d %*d %*d "       /* 16-19 skip                               */
+               "%d "                    /* 20 threads                               */
+               "%*d "                   /* 21 skip                                  */
+               "%llu "                  /* 22 starttime                             */
+               "%llu "                  /* 23 vsize                                 */
+               "%llu",                  /* 24 rss                                   */
+               &parsed_pid, comm, &state, &ppid, &minflt, &majflt, &utime, &stime, &threads, &starttime, &vsize, &rss);
+
+    if (parsed < 12) return false;
+
+    memcpy(info->name, comm, COMM_LEN);
+    info->pid = parsed_pid;
+    info->ppid = ppid;
+    info->state = (unsigned char)state;
+    info->minor_faults = minflt;
+    info->major_faults = majflt;
+    info->cpu_user = (float)utime / OS::clock_ticks_per_sec;
+    info->cpu_system = (float)stime / OS::clock_ticks_per_sec;
+    info->threads = threads;
+    info->vm_size = vsize;
+    // (24) rss - convert from number of pages to bytes
+    info->vm_rss = rss * OS::page_size;
+    info->start_time = (OS::getSystemBootTime() + starttime / OS::clock_ticks_per_sec) * 1000;
+    return true;
+}
+
+static bool readProcessStatus(int pid, ProcessInfo* info) {
+    char path[64];
+    snprintf(path, sizeof(path), "/proc/%d/status", pid);
+    FILE* file = fopen(path, "r");
+    if (!file) {
+        return false;
+    }
+
+    int read_count = 0;
+    char line[1024];
+    char key[32];
+    u64 value;
+    while (fgets(line, sizeof(line), file) && read_count < 6) {
+        if (sscanf(line, "%31s %llu", key, &value) != 2) {
+            continue;
+        }
+
+        if (strncmp(key, "Uid", 3) == 0) {
+            read_count++;
+            info->uid = (unsigned int)value;
+        } else if (strncmp(key, "RssAnon", 7) == 0) {
+            read_count++;
+            info->rss_anon = value * 1024;
+        } else if (strncmp(key, "RssFile", 7) == 0) {
+            read_count++;
+            info->rss_files = value * 1024;
+        } else if (strncmp(key, "RssShmem", 8) == 0) {
+            read_count++;
+            info->rss_shmem = value * 1024;
+        } else if (strncmp(key, "VmSize", 6) == 0) {
+            read_count++;
+            info->vm_size = value * 1024;
+        } else if (strncmp(key, "VmRSS", 5) == 0) {
+            read_count++;
+            info->vm_rss = value * 1024;
+        }
+    }
+
+    fclose(file);
+    return true;
+}
+
+static bool readProcessIO(int pid, ProcessInfo* info) {
+    char path[64];
+    snprintf(path, sizeof(path), "/proc/%d/io", pid);
+    FILE* file = fopen(path, "r");
+    if (!file) return false;
+
+    int read_count = 0;
+    char line[1024];
+    while (fgets(line, sizeof(line), file) && read_count < 2) {
+        if (strncmp(line, "read_bytes:", 11) == 0) {
+            u64 read_bytes = strtoull(line + 11, NULL, 10);
+            info->io_read = read_bytes >> 10;
+            read_count++;
+        } else if (strncmp(line, "write_bytes:", 12) == 0) {
+            u64 write_bytes = strtoull(line + 12, NULL, 10);
+            info->io_write = write_bytes >> 10;
+            read_count++;
+        }
+    }
+
+    fclose(file);
+    return true;
+}
+
+bool OS::getBasicProcessInfo(int pid, ProcessInfo* info) {
+    return readProcessStats(pid, info);
+}
+
+bool OS::getDetailedProcessInfo(ProcessInfo* info) {
+    readProcessStatus(info->pid, info);
+    readProcessIO(info->pid, info);
+    readProcessCmdline(info->pid, info);
+    return true;
+}
+
+#endif // __linux__
diff --git a/ddprof-lib/src/main/cpp/os_macos.cpp b/ddprof-lib/src/main/cpp/os_macos.cpp
new file mode 100644
index 00000000..e9410d71
--- /dev/null
+++ b/ddprof-lib/src/main/cpp/os_macos.cpp
@@ -0,0 +1,458 @@
+/*
+ * Copyright The async-profiler authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#ifdef __APPLE__
+
+#include <dlfcn.h>
+#include <errno.h>
+#include <libkern/OSByteOrder.h>
+#include <libproc.h>
+#include <mach/mach.h>
+#include <mach/mach_host.h>
+#include <mach/mach_time.h>
+#include <mach/processor_info.h>
+#include <mach/vm_map.h>
+#include <mach-o/dyld.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/sysctl.h>
+#include <sys/time.h>
+#include <sys/times.h>
+#include <time.h>
+#include <unistd.h>
+#include "os.h"
+
+
+class MacThreadList : public ThreadList {
+  private:
+    task_t _task;
+    thread_array_t _thread_array;
+
+    void deallocate() {
+        if (_thread_array != NULL) {
+            for (u32 i = 0; i < _count; i++) {
+                mach_port_deallocate(_task, _thread_array[i]);
+            }
+            vm_deallocate(_task, (vm_address_t)_thread_array, _count * sizeof(thread_t));
+            _thread_array = NULL;
+        }
+    }
+
+  public:
+    MacThreadList() {
+        _task = mach_task_self();
+        _thread_array = NULL;
+        task_threads(_task, &_thread_array, &_count);
+    }
+
+    ~MacThreadList() {
+        deallocate();
+    }
+
+    int next() {
+        return (int)_thread_array[_index++];
+    }
+
+    void update() {
+        deallocate();
+        _index = _count = 0;
+        task_threads(_task, &_thread_array, &_count);
+    }
+};
+
+
+JitWriteProtection::JitWriteProtection(bool enable) {
+#ifdef __aarch64__
+    // Mimic pthread_jit_write_protect_np(), but save the previous state
+    if (*(volatile char*)0xfffffc10c) {
+        u64 val = enable ? *(volatile u64*)0xfffffc118 : *(volatile u64*)0xfffffc110;
+        u64 prev;
+        asm volatile("mrs %0, s3_6_c15_c1_5" : "=r" (prev) : : );
+        if (prev != val) {
+            _prev = prev;
+            _restore = true;
+            asm volatile("msr s3_6_c15_c1_5, %0\n"
+                         "isb"
+                         : "+r" (val) : : "memory");
+            return;
+        }
+    }
+    // Already in the required mode, or write protection is not supported
+    _restore = false;
+#endif
+}
+
+JitWriteProtection::~JitWriteProtection() {
+#ifdef __aarch64__
+    if (_restore) {
+        u64 prev = _prev;
+        asm volatile("msr s3_6_c15_c1_5, %0\n"
+                     "isb"
+                     : "+r" (prev) : : "memory");
+    }
+#endif
+}
+
+
+static SigAction installed_sigaction[32];
+static SigAction orig_sigbus_handler;
+static SigAction orig_sigsegv_handler;
+
+const size_t OS::page_size = sysconf(_SC_PAGESIZE);
+const size_t OS::page_mask = OS::page_size - 1;
+const long OS::clock_ticks_per_sec = sysconf(_SC_CLK_TCK);
+
+static mach_timebase_info_data_t timebase = {0, 0};
+
+u64 OS::nanotime() {
+    if (timebase.denom == 0) {
+        mach_timebase_info(&timebase);
+    }
+    return (u64)mach_absolute_time() * timebase.numer / timebase.denom;
+}
+
+u64 OS::micros() {
+    struct timeval tv;
+    gettimeofday(&tv, NULL);
+    return (u64)tv.tv_sec * 1000000 + tv.tv_usec;
+}
+
+void OS::sleep(u64 nanos) {
+    struct timespec ts = {(time_t)(nanos / 1000000000), (long)(nanos % 1000000000)};
+    nanosleep(&ts, NULL);
+}
+
+void OS::uninterruptibleSleep(u64 nanos, volatile bool* flag) {
+    struct timespec ts = {(time_t)(nanos / 1000000000), (long)(nanos % 1000000000)};
+    while (*flag && nanosleep(&ts, &ts) < 0 && errno == EINTR);
+}
+
+u64 OS::overrun(siginfo_t* siginfo) {
+    return 0;
+}
+
+u64 OS::processStartTime() {
+    static u64 start_time = 0;
+
+    if (start_time == 0) {
+        struct proc_bsdinfo info;
+        if (proc_pidinfo(processId(), PROC_PIDTBSDINFO, 0, &info, sizeof(info)) > 0) {
+            start_time = (u64)info.pbi_start_tvsec * 1000 + info.pbi_start_tvusec / 1000;
+        }
+    }
+
+    return start_time;
+}
+
+u64 OS::hton64(u64 x) {
+    return OSSwapHostToBigInt64(x);
+}
+
+u64 OS::ntoh64(u64 x) {
+    return OSSwapBigToHostInt64(x);
+}
+
+int OS::getMaxThreadId() {
+    return 0x7fffffff;
+}
+
+int OS::processId() {
+    static const int self_pid = getpid();
+
+    return self_pid;
+}
+
+int OS::threadId() {
+    // Used to be pthread_mach_thread_np(pthread_self()),
+    // but pthread_mach_thread_np is not async signal safe
+    mach_port_t port = mach_thread_self();
+    mach_port_deallocate(mach_task_self(), port);
+    return (int)port;
+}
+
+const char* OS::schedPolicy(int thread_id) {
+    // Not used on macOS
+    return "SCHED_OTHER";
+}
+
+bool OS::threadName(int thread_id, char* name_buf, size_t name_len) {
+    pthread_t thread = pthread_from_mach_thread_np(thread_id);
+    return thread && pthread_getname_np(thread, name_buf, name_len) == 0 && name_buf[0] != 0;
+}
+
+ThreadState OS::threadState(int thread_id) {
+    struct thread_basic_info info;
+    mach_msg_type_number_t size = sizeof(info);
+    if (thread_info((thread_act_t)thread_id, THREAD_BASIC_INFO, (thread_info_t)&info, &size) != 0) {
+        return THREAD_UNKNOWN;
+    }
+    return info.run_state == TH_STATE_RUNNING ? THREAD_RUNNING : THREAD_SLEEPING;
+}
+
+u64 OS::threadCpuTime(int thread_id) {
+    if (thread_id == 0) thread_id = threadId();
+
+    struct thread_basic_info info;
+    mach_msg_type_number_t size = sizeof(info);
+    if (thread_info((thread_act_t)thread_id, THREAD_BASIC_INFO, (thread_info_t)&info, &size) != 0) {
+        return 0;
+    }
+    return u64(info.user_time.seconds + info.system_time.seconds) * 1000000000 +
+           u64(info.user_time.microseconds + info.system_time.microseconds) * 1000;
+}
+
+ThreadList* OS::listThreads() {
+    return new MacThreadList();
+}
+
+bool OS::isLinux() {
+    return false;
+}
+
+bool OS::isMusl() {
+    return false;
+}
+
+SigAction OS::installSignalHandler(int signo, SigAction action, SigHandler handler) {
+    struct sigaction sa;
+    struct sigaction oldsa;
+    sigemptyset(&sa.sa_mask);
+
+    if (handler != NULL) {
+        sa.sa_handler = handler;
+        sa.sa_flags = 0;
+    } else {
+        sa.sa_sigaction = action;
+        sa.sa_flags = SA_SIGINFO | SA_RESTART;
+        if (signo > 0 && signo < sizeof(installed_sigaction) / sizeof(installed_sigaction[0])) {
+            installed_sigaction[signo] = action;
+        }
+    }
+
+    sigaction(signo, &sa, &oldsa);
+    return oldsa.sa_sigaction;
+}
+
+static void restoreSignalHandler(int signo, siginfo_t* siginfo, void* ucontext) {
+    signal(signo, SIG_DFL);
+}
+
+SigAction OS::replaceCrashHandler(SigAction action) {
+    // It is not well specified when macOS raises SIGBUS and when SIGSEGV.
+    // HotSpot handles both similarly, so do we.
+    struct sigaction sa;
+
+    sigaction(SIGBUS, NULL, &sa);
+    orig_sigbus_handler = sa.sa_handler == SIG_DFL ? restoreSignalHandler : sa.sa_sigaction;
+    sigemptyset(&sa.sa_mask);
+    sa.sa_sigaction = action;
+    sa.sa_flags |= SA_SIGINFO | SA_RESTART | SA_NODEFER;
+    sigaction(SIGBUS, &sa, NULL);
+
+    sigaction(SIGSEGV, NULL, &sa);
+    orig_sigsegv_handler = sa.sa_handler == SIG_DFL ? restoreSignalHandler : sa.sa_sigaction;
+    sigemptyset(&sa.sa_mask);
+    sa.sa_sigaction = action;
+    sa.sa_flags |= SA_SIGINFO | SA_RESTART| SA_NODEFER;
+    sigaction(SIGSEGV, &sa, NULL);
+
+    // Return an action that dispatches to one of the original handlers depending on signo,
+    // so that the caller does not need to deal with multiple handlers
+    return [](int signo, siginfo_t* siginfo, void* ucontext) {
+        (signo == SIGBUS ? orig_sigbus_handler : orig_sigsegv_handler)(signo, siginfo, ucontext);
+    };
+}
+
+int OS::getProfilingSignal(int mode) {
+    static int preferred_signals[2] = {SIGPROF, SIGVTALRM};
+
+    const u64 allowed_signals =
+        1ULL << SIGPROF | 1ULL << SIGVTALRM | 1ULL << SIGEMT | 1ULL << SIGSYS;
+
+    int& signo = preferred_signals[mode];
+    int initial_signo = signo;
+    int other_signo = preferred_signals[1 - mode];
+
+    do {
+        struct sigaction sa;
+        if ((allowed_signals & (1ULL << signo)) != 0 && signo != other_signo && sigaction(signo, NULL, &sa) == 0) {
+            if (sa.sa_handler == SIG_DFL || sa.sa_handler == SIG_IGN || sa.sa_sigaction == installed_sigaction[signo]) {
+                return signo;
+            }
+        }
+    } while ((signo = (signo + 1) & 31) != initial_signo);
+
+    return signo;
+}
+
+bool OS::sendSignalToThread(int thread_id, int signo) {
+#ifdef __aarch64__
+    register long x0 asm("x0") = thread_id;
+    register long x1 asm("x1") = signo;
+    register long x16 asm("x16") = 328;
+    asm volatile("svc #0x80"
+                 : "+r" (x0)
+                 : "r" (x1), "r" (x16)
+                 : "memory");
+    return x0 == 0;
+#else
+    int result;
+    asm volatile("syscall"
+                 : "=a" (result)
+                 : "a" (0x2000148), "D" (thread_id), "S" (signo)
+                 : "rcx", "r11", "memory");
+    return result == 0;
+#endif
+}
+
+void* OS::safeAlloc(size_t size) {
+    // mmap() is not guaranteed to be async signal safe, but in practice, it is.
+    // There is no a reasonable alternative anyway.
+    void* result = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+    if (result == MAP_FAILED) {
+        return NULL;
+    }
+    return result;
+}
+
+void OS::safeFree(void* addr, size_t size) {
+    munmap(addr, size);
+}
+
+bool OS::getCpuDescription(char* buf, size_t size) {
+    return sysctlbyname("machdep.cpu.brand_string", buf, &size, NULL, 0) == 0;
+}
+
+int OS::getCpuCount() {
+    int cpu_count;
+    size_t size = sizeof(cpu_count);
+    return sysctlbyname("hw.logicalcpu", &cpu_count, &size, NULL, 0) == 0 ? cpu_count : 1;
+}
+
+u64 OS::getProcessCpuTime(u64* utime, u64* stime) {
+    struct tms buf;
+    clock_t real = times(&buf);
+    *utime = buf.tms_utime;
+    *stime = buf.tms_stime;
+    return real;
+}
+
+u64 OS::getTotalCpuTime(u64* utime, u64* stime) {
+    natural_t cpu_count;
+    processor_info_array_t cpu_info_array;
+    mach_msg_type_number_t cpu_info_count;
+
+    host_name_port_t host = mach_host_self();
+    kern_return_t ret = host_processor_info(host, PROCESSOR_CPU_LOAD_INFO, &cpu_count, &cpu_info_array, &cpu_info_count);
+    mach_port_deallocate(mach_task_self(), host);
+    if (ret != 0) {
+        return (u64)-1;
+    }
+
+    processor_cpu_load_info_data_t* cpu_load = (processor_cpu_load_info_data_t*)cpu_info_array;
+    u64 user = 0;
+    u64 system = 0;
+    u64 idle = 0;
+    for (natural_t i = 0; i < cpu_count; i++) {
+        user += cpu_load[i].cpu_ticks[CPU_STATE_USER] + cpu_load[i].cpu_ticks[CPU_STATE_NICE];
+        system += cpu_load[i].cpu_ticks[CPU_STATE_SYSTEM];
+        idle += cpu_load[i].cpu_ticks[CPU_STATE_IDLE];
+    }
+    vm_deallocate(mach_task_self(), (vm_address_t)cpu_info_array, cpu_info_count * sizeof(int));
+
+    *utime = user;
+    *stime = system;
+    return user + system + idle;
+}
+
+int OS::createMemoryFile(const char* name) {
+    // Not supported on macOS
+    return -1;
+}
+
+void OS::copyFile(int src_fd, int dst_fd, off_t offset, size_t size) {
+    char* buf = (char*)mmap(NULL, size + offset, PROT_READ, MAP_PRIVATE, src_fd, 0);
+    if (buf == NULL) {
+        return;
+    }
+
+    while (size > 0) {
+        ssize_t bytes = write(dst_fd, buf + offset, size < 262144 ? size : 262144);
+        if (bytes <= 0) {
+            break;
+        }
+        offset += (size_t)bytes;
+        size -= (size_t)bytes;
+    }
+
+    munmap(buf, offset);
+}
+
+void OS::freePageCache(int fd, off_t start_offset) {
+    // Not supported on macOS
+}
+
+int OS::mprotect(void* addr, size_t size, int prot) {
+    if (prot & PROT_WRITE) prot |= VM_PROT_COPY;
+    return vm_protect(mach_task_self(), (vm_address_t)addr, size, 0, prot);
+}
+
+// Checks if async-profiler is preloaded through the DYLD_INSERT_LIBRARIES mechanism.
+// This is done by analyzing the order of loaded dynamic libraries.
+bool OS::checkPreloaded() {
+    if (getenv("DYLD_INSERT_LIBRARIES") == NULL) {
+        return false;
+    }
+
+    // Find async-profiler shared object
+    Dl_info libprofiler;
+    if (dladdr((const void*)OS::checkPreloaded, &libprofiler) == 0) {
+        return false;
+    }
+
+    // Find libc shared object
+    Dl_info libc;
+    if (dladdr((const void*)exit, &libc) == 0) {
+        return false;
+    }
+
+    uint32_t images = _dyld_image_count();
+    for (uint32_t i = 0; i < images; i++) {
+        void* image_base = (void*)_dyld_get_image_header(i);
+
+        if (image_base == libprofiler.dli_fbase) {
+            // async-profiler found first
+            return true;
+        } else if (image_base == libc.dli_fbase) {
+            // libc found first
+            return false;
+        }
+    }
+
+    return false;
+}
+
+u64 OS::getSystemBootTime() {
+    return 0;
+}
+
+u64 OS::getRamSize() {
+    return 0;
+}
+
+int OS::getProcessIds(int* pids, int max_pids) {
+    return 0;
+}
+
+bool OS::getBasicProcessInfo(int pid, ProcessInfo* info) {
+    return false;
+}
+
+bool OS::getDetailedProcessInfo(ProcessInfo* info) {
+    return false;
+}
+
+#endif // __APPLE__
diff --git a/ddprof-lib/src/main/cpp/stackFrame.h b/ddprof-lib/src/main/cpp/stackFrame.h
new file mode 100644
index 00000000..a8d5b6fa
--- /dev/null
+++ b/ddprof-lib/src/main/cpp/stackFrame.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright The async-profiler authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#ifndef _STACKFRAME_H
+#define _STACKFRAME_H
+
+#include <stddef.h>
+#include <stdint.h>
+#include <ucontext.h>
+#include "arch.h"
+
+
+class NMethod;
+
+class StackFrame {
+  private:
+    ucontext_t* _ucontext;
+
+    static bool withinCurrentStack(uintptr_t address) {
+        // Check that the address is not too far from the stack pointer of current context
+        void* real_sp;
+        return address - (uintptr_t)&real_sp <= 0xffff;
+    }
+
+  public:
+    explicit StackFrame(void* ucontext) {
+        _ucontext = (ucontext_t*)ucontext;
+    }
+
+    void restore(uintptr_t saved_pc, uintptr_t saved_sp, uintptr_t saved_fp) {
+        if (_ucontext != nullptr) {
+            pc() = saved_pc;
+            sp() = saved_sp;
+            fp() = saved_fp;
+        }
+    }
+
+    uintptr_t stackAt(int slot) {
+        return ((uintptr_t*)sp())[slot];
+    }
+
+    uintptr_t& pc();
+    uintptr_t& sp();
+    uintptr_t& fp();
+
+    uintptr_t& retval();
+    uintptr_t link();
+    uintptr_t arg0();
+    uintptr_t arg1();
+    uintptr_t arg2();
+    uintptr_t arg3();
+    uintptr_t jarg0();
+    uintptr_t method();
+    uintptr_t senderSP();
+
+    void ret();
+
+    bool unwindStub(instruction_t* entry, const char* name) {
+        return unwindStub(entry, name, pc(), sp(), fp());
+    }
+
+    bool unwindCompiled(NMethod* nm) {
+        return unwindCompiled(nm, pc(), sp(), fp());
+    }
+
+    bool unwindStub(instruction_t* entry, const char* name, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp);
+    bool unwindAtomicStub(const void*& pc);
+
+    // TODO: this function will be removed once `vm` becomes the default stack walking mode
+    bool unwindCompiled(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp);
+
+    bool unwindPrologue(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp);
+    bool unwindEpilogue(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp);
+
+    void adjustSP(const void* entry, const void* pc, uintptr_t& sp);
+
+    // SP baseline helpers for compiled frame unwinding
+    uintptr_t sender_sp_baseline(const NMethod* nm, uintptr_t sp, uintptr_t fp, const void* pc);
+    const void* read_caller_pc_from_sp(uintptr_t sp_base);
+    uintptr_t read_saved_fp_from_sp(uintptr_t sp_base);
+
+    bool skipFaultInstruction();
+
+    bool checkInterruptedSyscall();
+
+    // Check if PC points to a syscall instruction
+    static bool isSyscall(instruction_t* pc);
+};
+
+#endif // _STACKFRAME_H
diff --git a/ddprof-lib/src/main/cpp/stackFrame_aarch64.cpp b/ddprof-lib/src/main/cpp/stackFrame_aarch64.cpp
new file mode 100644
index 00000000..12d17611
--- /dev/null
+++ b/ddprof-lib/src/main/cpp/stackFrame_aarch64.cpp
@@ -0,0 +1,405 @@
+/*
+ * Copyright The async-profiler authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#ifdef __aarch64__
+
+#include <errno.h>
+#include <string.h>
+#include <sys/syscall.h>
+#include "stackFrame.h"
+#include "safeAccess.h"
+#include "vmStructs.h"
+
+
+#ifdef __APPLE__
+#  define REG(l, m)  _ucontext->uc_mcontext->__ss.__##m
+#else
+#  define REG(l, m)  _ucontext->uc_mcontext.l
+#endif
+
+
+uintptr_t& StackFrame::pc() {
+    return (uintptr_t&)REG(pc, pc);
+}
+
+uintptr_t& StackFrame::sp() {
+    return (uintptr_t&)REG(sp, sp);
+}
+
+uintptr_t& StackFrame::fp() {
+    return (uintptr_t&)REG(regs[29], fp);
+}
+
+uintptr_t& StackFrame::retval() {
+    return (uintptr_t&)REG(regs[0], x[0]);
+}
+
+uintptr_t StackFrame::link() {
+    return (uintptr_t)REG(regs[30], lr);
+}
+
+uintptr_t StackFrame::arg0() {
+    return (uintptr_t)REG(regs[0], x[0]);
+}
+
+uintptr_t StackFrame::arg1() {
+    return (uintptr_t)REG(regs[1], x[1]);
+}
+
+uintptr_t StackFrame::arg2() {
+    return (uintptr_t)REG(regs[2], x[2]);
+}
+
+uintptr_t StackFrame::arg3() {
+    return (uintptr_t)REG(regs[3], x[3]);
+}
+
+uintptr_t StackFrame::jarg0() {
+    return arg1();
+}
+
+uintptr_t StackFrame::method() {
+    return (uintptr_t)REG(regs[12], x[12]);
+}
+
+uintptr_t StackFrame::senderSP() {
+    return (uintptr_t)REG(regs[19], x[19]);
+}
+
+void StackFrame::ret() {
+    pc() = link();
+}
+
+static inline bool isSTP(instruction_t insn) {
+    // stp  xn, xm, [sp, #-imm]!
+    // stp  dn, dm, [sp, #-imm]!
+    return (insn & 0xffe003e0) == 0xa9a003e0 || (insn & 0xffe003e0) == 0x6da003e0;
+}
+
+// Check if this is a well-known leaf stub with a constant size frame
+static inline bool isFixedSizeFrame(const char* name) {
+    // Dispatch by the first character to optimize lookup
+    switch (name[0]) {
+        case 'i':
+            return strncmp(name, "indexof_linear_", 15) == 0;
+        case 'm':
+            return strncmp(name, "md5_implCompress", 16) == 0;
+        case 's':
+            return strncmp(name, "sha256_implCompress", 19) == 0
+                || strncmp(name, "string_indexof_linear_", 22) == 0
+                || strncmp(name, "slow_subtype_check", 18) == 0;
+        default:
+            return false;
+    }
+}
+
+// Check if this is a well-known leaf stub that does not change stack pointer
+static inline bool isZeroSizeFrame(const char* name) {
+    // Dispatch by the first character to optimize lookup
+    switch (name[0]) {
+        case 'I':
+            return strcmp(name, "InlineCacheBuffer") == 0;
+        case 'S':
+            return strncmp(name, "SafeFetch", 9) == 0;
+        case 'a':
+            return strncmp(name, "atomic", 6) == 0;
+        case 'b':
+            return strncmp(name, "bigInteger", 10) == 0
+                || strcmp(name, "base64_encodeBlock") == 0;
+        case 'c':
+            return strncmp(name, "copy_", 5) == 0
+                || strncmp(name, "compare_long_string_", 20) == 0;
+        case 'e':
+            return strcmp(name, "encodeBlock") == 0;
+        case 'f':
+            return strcmp(name, "f2hf") == 0;
+        case 'g':
+            return strcmp(name, "ghash_processBlocks") == 0;
+        case 'h':
+            return strcmp(name, "hf2f") == 0;
+        case 'i':
+            return strncmp(name, "itable", 6) == 0;
+        case 'l':
+            return strcmp(name, "large_byte_array_inflate") == 0
+                || strncmp(name, "lookup_secondary_supers_", 24) == 0;
+        case 'm':
+            return strncmp(name, "md5_implCompress", 16) == 0;
+        case 's':
+            return strncmp(name, "sha1_implCompress", 17) == 0
+                || strncmp(name, "compare_long_string_same_encoding", 33) == 0
+                || strcmp(name, "compare_long_string_LL") == 0
+                || strcmp(name, "compare_long_string_UU") == 0;
+        case 'u':
+            return strcmp(name, "updateBytesAdler32") == 0;
+        case 'v':
+            return strncmp(name, "vtable", 6) == 0;
+        case 'z':
+            return strncmp(name, "zero_", 5) == 0;
+        default:
+            return false;
+    }
+}
+
+bool StackFrame::unwindStub(instruction_t* entry, const char* name, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) {
+    instruction_t* ip = (instruction_t*)pc;
+    if (ip == entry || *ip == 0xd65f03c0) {
+        pc = link();
+        return true;
+    } else if (entry != NULL && entry[0] == 0xa9bf7bfd) {
+        // The stub begins with
+        //   stp  x29, x30, [sp, #-16]!
+        //   mov  x29, sp
+        if (ip == entry + 1) {
+            sp += 16;
+            pc = ((uintptr_t*)sp)[-1];
+            return true;
+        } else if (entry[1] == 0x910003fd && withinCurrentStack(fp)) {
+            sp = fp + 16;
+            fp = ((uintptr_t*)sp)[-2];
+            pc = ((uintptr_t*)sp)[-1];
+            return true;
+        }
+    } else if (entry != NULL && isSTP(entry[0]) && isFixedSizeFrame(name)) {
+        // The stub begins with
+        //   stp  xn, xm, [sp, #-imm]!
+        int offset = int(entry[0] << 10) >> 25;
+        sp = (intptr_t)sp - offset * 8;
+        pc = link();
+        return true;
+    } else if (isZeroSizeFrame(name)) {
+        // Should be done after isSTP check, since frame size may vary between JVM versions
+        pc = link();
+        return true;
+    } else if (strcmp(name, "forward_copy_longs") == 0
+            || strcmp(name, "backward_copy_longs") == 0
+            // There is a typo in JDK 8
+            || strcmp(name, "foward_copy_longs") == 0) {
+        // These are called from arraycopy stub that maintains the regular frame link
+        if (&pc == &this->pc() && withinCurrentStack(fp)) {
+            // Unwind both stub frames for AsyncGetCallTrace
+            sp = fp + 16;
+            fp = ((uintptr_t*)sp)[-2];
+            pc = ((uintptr_t*)sp)[-1] - sizeof(instruction_t);
+        } else {
+            // When cstack=vm, unwind stub frames one by one
+            pc = link();
+        }
+        return true;
+    }
+    return false;
+}
+
+static inline bool isEntryBarrier(instruction_t* ip) {
+    // ldr  w9, [x28, #32]
+    // cmp  x8, x9
+    return ip[0] == 0xb9402389 && ip[1] == 0xeb09011f;
+}
+
+bool StackFrame::unwindCompiled(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) {
+    instruction_t* ip = (instruction_t*)pc;
+    instruction_t* entry = (instruction_t*)nm->entry();
+    if ((*ip & 0xffe07fff) == 0xa9007bfd) {
+        // stp  x29, x30, [sp, #offset]
+        // SP has been adjusted, but FP not yet stored in a new frame
+        unsigned int offset = (*ip >> 12) & 0x1f8;
+        sp += offset + 16;
+        pc = link();
+    } else if (ip > entry && ip[0] == 0x910003fd && ip[-1] == 0xa9bf7bfd) {
+        // stp  x29, x30, [sp, #-16]!
+        // mov  x29, sp
+        sp += 16;
+        pc = ((uintptr_t*)sp)[-1];
+    } else if (ip > entry + 3 && !nm->isFrameCompleteAt(ip) &&
+               (isEntryBarrier(ip) || isEntryBarrier(ip + 1))) {
+        // Frame should be complete at this point
+        sp += nm->frameSize() * sizeof(void*);
+        fp = ((uintptr_t*)sp)[-2];
+        pc = ((uintptr_t*)sp)[-1];
+    } else {
+        // Just try
+        pc = link();
+    }
+    return true;
+}
+
+static inline bool isFrameComplete(instruction_t* entry, instruction_t* ip) {
+    // Frame is fully constructed after sp is decremented by the frame size.
+    // Check if there is such an instruction anywhere between
+    // the method entry and the current instruction pointer.
+    while (--ip >= entry) {
+        if ((*ip & 0xff8003ff) == 0xd10003ff) {  // sub sp, sp, #frame_size
+            return true;
+        }
+    }
+    return false;
+}
+
+bool StackFrame::unwindPrologue(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) {
+    // C1/C2 methods:
+    //   {stack_bang}
+    //   sub  sp, sp, #0x40
+    //   stp  x29, x30, [sp, #48]
+    //
+    // Native wrappers:
+    //   {stack_bang}
+    //   stp  x29, x30, [sp, #-16]!
+    //   mov  x29, sp
+    //   sub  sp, sp, #0x50
+    //
+    instruction_t* ip = (instruction_t*)pc;
+    instruction_t* entry = (instruction_t*)nm->entry();
+    if (ip <= entry) {
+        pc = link();
+    } else if ((*ip & 0xffe07fff) == 0xa9007bfd) {
+        // stp  x29, x30, [sp, #offset]
+        // SP has been adjusted, but FP not yet stored in a new frame
+        unsigned int offset = (*ip >> 12) & 0x1f8;
+        sp += offset + 16;
+        pc = link();
+    } else if (ip[0] == 0x910003fd && ip[-1] == 0xa9bf7bfd) {
+        // stp  x29, x30, [sp, #-16]!
+        // mov  x29, sp
+        sp += 16;
+        pc = ((uintptr_t*)sp)[-1];
+    } else if (ip <= entry + 16 && isFrameComplete(entry, ip)) {
+        sp += nm->frameSize() * sizeof(void*);
+        fp = ((uintptr_t*)sp)[-2];
+        pc = ((uintptr_t*)sp)[-1];
+    } else {
+        pc = link();
+    }
+    return true;
+}
+
+static inline bool isPollReturn(instruction_t* ip) {
+    // JDK 17+
+    //   add  sp, sp, #0x30
+    //   ldr  x8, [x28, #832]
+    //   cmp  sp, x8
+    //   b.hi offset
+    //   ret
+    //
+    // JDK 11
+    //   add  sp, sp, #0x30
+    //   ldr  x8, [x28, #264]
+    //   ldr  wzr, [x8]
+    //   ret
+    //
+    // JDK 8
+    //   add  sp, sp, #0x30
+    //   adrp x8, polling_page
+    //   ldr  wzr, [x8]
+    //   ret
+    //
+    if ((ip[0] & 0xffc003ff) == 0xf9400388 && (ip[-1] & 0xff8003ff) == 0x910003ff) {
+        // ldr x8, preceded by add sp
+        return true;
+    } else if ((ip[0] & 0x9f00001f) == 0x90000008 && (ip[-1] & 0xff8003ff) == 0x910003ff) {
+        // adrp x8, preceded by add sp
+        return true;
+    } else if (ip[0] == 0xeb2863ff && ip[2] == 0xd65f03c0) {
+        // cmp sp, x8, followed by ret
+        return true;
+    } else if ((ip[0] & 0xff000010) == 0x54000000 && ip[1] == 0xd65f03c0) {
+        // b.cond, followed by ret
+        return true;
+    } else if (ip[0] == 0xb940011f && ip[1] == 0xd65f03c0) {
+        // ldr wzr, followed by ret
+        return true;
+    }
+    return false;
+}
+
+bool StackFrame::unwindEpilogue(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) {
+    //  ldp  x29, x30, [sp, #32]
+    //  add  sp, sp, #0x30
+    //  {poll_return}
+    //  ret
+    instruction_t* ip = (instruction_t*)pc;
+    if (*ip == 0xd65f03c0 || isPollReturn(ip)) {  // ret
+        pc = link();
+        return true;
+    }
+    return false;
+}
+
+bool StackFrame::unwindAtomicStub(const void*& pc) {
+    // VM threads may call generated atomic stubs, which are not normally walkable
+    const void* lr = (const void*)link();
+    if (VMStructs::libjvm()->contains(lr)) {
+        NMethod* nm = CodeHeap::findNMethod(pc);
+        if (nm != NULL && strncmp(nm->name(), "Stub", 4) == 0) {
+            pc = lr;
+            return true;
+        }
+    }
+    return false;
+}
+
+void StackFrame::adjustSP(const void* entry, const void* pc, uintptr_t& sp) {
+    instruction_t* ip = (instruction_t*)pc;
+    if (ip > entry && (ip[-1] == 0xa9bf27ff || (ip[-1] == 0xd63f0100 && ip[-2] == 0xa9bf27ff))) {
+        // When calling a leaf native from Java, JVM puts a dummy frame link onto the stack,
+        // thus breaking the invariant: sender_sp == current_sp + frame_size.
+        // Since JDK 21, there are more instructions between `blr` and `add`,
+        // ignore them now for the sake of simplicity.
+        //   stp  xzr, x9, [sp, #-16]!
+        //   blr  x8
+        //   ...
+        //   add  sp, sp, #0x10
+        sp += 16;
+    }
+}
+
+bool StackFrame::skipFaultInstruction() {
+    return false;
+}
+
+bool StackFrame::checkInterruptedSyscall() {
+#ifdef __APPLE__
+    // We are not interested in syscalls that do not check error code, e.g. semaphore_wait_trap
+    if (*(instruction_t*)pc() == 0xd65f03c0) {
+        return true;
+    }
+    // If carry flag is set, the error code is in low byte of x0
+    if (REG(pstate, cpsr) & (1 << 29)) {
+        return (retval() & 0xff) == EINTR || (retval() & 0xff) == ETIMEDOUT;
+    } else {
+        return retval() == (uintptr_t)-EINTR;
+    }
+#else
+    if (retval() == (uintptr_t)-EINTR) {
+        // Workaround for JDK-8237858: restart the interrupted poll / epoll_wait manually
+        uintptr_t nr = (uintptr_t)REG(regs[8], x[8]);
+        if (nr == SYS_ppoll || (nr == SYS_epoll_pwait && (int)arg3() == -1)) {
+            // Check against unreadable page for the loop below
+            const uintptr_t max_distance = 24;
+            if ((pc() & 0xfff) < max_distance && SafeAccess::load32((int32_t*)(pc() - max_distance)) == 0) {
+                return true;
+            }
+            // Try to restore the original value of x0 saved in another register
+            for (uintptr_t prev_pc = pc() - 4; pc() - prev_pc <= max_distance; prev_pc -= 4) {
+                instruction_t insn = *(instruction_t*)prev_pc;
+                unsigned int reg = (insn >> 16) & 31;
+                if ((insn & 0xffe0ffff) == 0xaa0003e0 && reg >= 6) {
+                    // mov x0, reg
+                    REG(regs[0], x[0]) = REG(regs[reg], x[reg]);
+                    pc() -= sizeof(instruction_t);
+                    break;
+                }
+            }
+        }
+        return true;
+    }
+    return false;
+#endif
+}
+
+bool StackFrame::isSyscall(instruction_t* pc) {
+    // svc #0 or svc #80
+    return (*pc & 0xffffefff) == 0xd4000001;
+}
+
+#endif // __aarch64__
diff --git a/ddprof-lib/src/main/cpp/stackFrame_arm.cpp b/ddprof-lib/src/main/cpp/stackFrame_arm.cpp
new file mode 100644
index 00000000..e175c964
--- /dev/null
+++ b/ddprof-lib/src/main/cpp/stackFrame_arm.cpp
@@ -0,0 +1,141 @@
+/*
+ * Copyright The async-profiler authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#if defined(__arm__) || defined(__thumb__)
+
+#include <errno.h>
+#include <string.h>
+#include "stackFrame.h"
+#include "vmStructs.h"
+
+
+uintptr_t& StackFrame::pc() {
+    return (uintptr_t&)_ucontext->uc_mcontext.arm_pc;
+}
+
+uintptr_t& StackFrame::sp() {
+    return (uintptr_t&)_ucontext->uc_mcontext.arm_sp;
+}
+
+uintptr_t& StackFrame::fp() {
+    return (uintptr_t&)_ucontext->uc_mcontext.arm_fp;
+}
+
+uintptr_t& StackFrame::retval() {
+    return (uintptr_t&)_ucontext->uc_mcontext.arm_r0;
+}
+
+uintptr_t StackFrame::link() {
+    return (uintptr_t)_ucontext->uc_mcontext.arm_lr;
+}
+
+uintptr_t StackFrame::arg0() {
+    return (uintptr_t)_ucontext->uc_mcontext.arm_r0;
+}
+
+uintptr_t StackFrame::arg1() {
+    return (uintptr_t)_ucontext->uc_mcontext.arm_r1;
+}
+
+uintptr_t StackFrame::arg2() {
+    return (uintptr_t)_ucontext->uc_mcontext.arm_r2;
+}
+
+uintptr_t StackFrame::arg3() {
+    return (uintptr_t)_ucontext->uc_mcontext.arm_r3;
+}
+
+uintptr_t StackFrame::jarg0() {
+    // Unimplemented
+    return 0;
+}
+
+uintptr_t StackFrame::method() {
+    return (uintptr_t)_ucontext->uc_mcontext.arm_r9;
+}
+
+uintptr_t StackFrame::senderSP() {
+    return (uintptr_t)_ucontext->uc_mcontext.arm_r4;
+}
+
+void StackFrame::ret() {
+    pc() = link();
+}
+
+
+bool StackFrame::unwindStub(instruction_t* entry, const char* name, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) {
+    instruction_t* ip = (instruction_t*)pc;
+    if (ip == entry || *ip == 0xe12fff1e
+        || strncmp(name, "itable", 6) == 0
+        || strncmp(name, "vtable", 6) == 0
+        || strcmp(name, "InlineCacheBuffer") == 0)
+    {
+        pc = link();
+        return true;
+    }
+    return false;
+}
+
+bool StackFrame::unwindCompiled(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) {
+    instruction_t* ip = (instruction_t*)pc;
+    instruction_t* entry = (instruction_t*)nm->entry();
+    if (ip > entry && ip <= entry + 4 && (*ip & 0xffffff00) == 0xe24dd000) {
+        //    push  {r11, lr}
+        //    mov   r11, sp (optional)
+        // -> sub   sp, sp, #offs
+        fp = ((uintptr_t*)sp)[0];
+        pc = ((uintptr_t*)sp)[1];
+        sp += 8;
+        return true;
+    } else if (*ip == 0xe8bd4800) {
+        //    add   sp, sp, #offs
+        // -> pop   {r11, lr}
+        fp = ((uintptr_t*)sp)[0];
+        pc = ((uintptr_t*)sp)[1];
+        sp += 8;
+        return true;
+    }
+    pc = link();
+    return true;
+}
+
+bool StackFrame::unwindPrologue(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) {
+    instruction_t* ip = (instruction_t*)pc;
+    instruction_t* entry = (instruction_t*)nm->entry();
+    if (ip <= entry) {
+        pc = link();
+        return true;
+    }
+    return false;
+}
+
+bool StackFrame::unwindEpilogue(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) {
+    // Not yet implemented
+    return false;
+}
+
+bool StackFrame::unwindAtomicStub(const void*& pc) {
+    // Not needed
+    return false;
+}
+
+void StackFrame::adjustSP(const void* entry, const void* pc, uintptr_t& sp) {
+    // Not needed
+}
+
+bool StackFrame::skipFaultInstruction() {
+    return false;
+}
+
+bool StackFrame::checkInterruptedSyscall() {
+    return retval() == (uintptr_t)-EINTR;
+}
+
+bool StackFrame::isSyscall(instruction_t* pc) {
+    // swi #0
+    return *pc == 0xef000000;
+}
+
+#endif // defined(__arm__) || defined(__thumb__)
diff --git a/ddprof-lib/src/main/cpp/stackFrame_i386.cpp b/ddprof-lib/src/main/cpp/stackFrame_i386.cpp
new file mode 100644
index 00000000..a30d16f4
--- /dev/null
+++ b/ddprof-lib/src/main/cpp/stackFrame_i386.cpp
@@ -0,0 +1,162 @@
+/*
+ * Copyright The async-profiler authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#ifdef __i386__
+
+#include <errno.h>
+#include <string.h>
+#include "stackFrame.h"
+#include "vmStructs.h"
+
+
+uintptr_t& StackFrame::pc() {
+    return (uintptr_t&)_ucontext->uc_mcontext.gregs[REG_EIP];
+}
+
+uintptr_t& StackFrame::sp() {
+    return (uintptr_t&)_ucontext->uc_mcontext.gregs[REG_ESP];
+}
+
+uintptr_t& StackFrame::fp() {
+    return (uintptr_t&)_ucontext->uc_mcontext.gregs[REG_EBP];
+}
+
+uintptr_t& StackFrame::retval() {
+    return (uintptr_t&)_ucontext->uc_mcontext.gregs[REG_EAX];
+}
+
+uintptr_t StackFrame::link() {
+    // No link register on x86
+    return 0;
+}
+
+uintptr_t StackFrame::arg0() {
+    return stackAt(1);
+}
+
+uintptr_t StackFrame::arg1() {
+    return stackAt(2);
+}
+
+uintptr_t StackFrame::arg2() {
+    return stackAt(3);
+}
+
+uintptr_t StackFrame::arg3() {
+    return stackAt(4);
+}
+
+uintptr_t StackFrame::jarg0() {
+    // Unimplemented
+    return 0;
+}
+
+uintptr_t StackFrame::method() {
+    return _ucontext->uc_mcontext.gregs[REG_ESP];
+}
+
+uintptr_t StackFrame::senderSP() {
+    return _ucontext->uc_mcontext.gregs[REG_ESI];
+}
+
+void StackFrame::ret() {
+    pc() = stackAt(0);
+    sp() += 4;
+}
+
+
+bool StackFrame::unwindStub(instruction_t* entry, const char* name, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) {
+    instruction_t* ip = (instruction_t*)pc;
+    if (ip == entry || *ip == 0xc3
+        || strncmp(name, "itable", 6) == 0
+        || strncmp(name, "vtable", 6) == 0
+        || strcmp(name, "InlineCacheBuffer") == 0)
+    {
+        pc = *(uintptr_t*)sp;
+        sp += 4;
+        return true;
+    } else if (entry != NULL && entry[0] == 0x55 && entry[1] == 0x8b && entry[2] == 0xec) {
+        // The stub begins with
+        //   push ebp
+        //   mov  ebp, esp
+        if (ip == entry + 1) {
+            pc = ((uintptr_t*)sp)[1];
+            sp += 8;
+            return true;
+        } else if (withinCurrentStack(fp)) {
+            sp = fp + 8;
+            fp = ((uintptr_t*)sp)[-2];
+            pc = ((uintptr_t*)sp)[-1];
+            return true;
+        }
+    }
+    return false;
+}
+
+bool StackFrame::unwindCompiled(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) {
+    instruction_t* ip = (instruction_t*)pc;
+    instruction_t* entry = (instruction_t*)nm->entry();
+    if (ip <= entry
+        || *ip == 0xc3      // ret
+        || *ip == 0x55      // push ebp
+        || ip[-1] == 0x5d)  // after pop ebp
+    {
+        pc = *(uintptr_t*)sp;
+        sp += 4;
+        return true;
+    } else if (*ip == 0x5d) {
+        // pop ebp
+        fp = ((uintptr_t*)sp)[0];
+        pc = ((uintptr_t*)sp)[1];
+        sp += 8;
+        return true;
+    }
+    return false;
+}
+
+bool StackFrame::unwindPrologue(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) {
+    instruction_t* ip = (instruction_t*)pc;
+    instruction_t* entry = (instruction_t*)nm->entry();
+    if (ip <= entry || *ip == 0x55) {  // push ebp
+        pc = *(uintptr_t*)sp;
+        sp += 4;
+        return true;
+    }
+    return false;
+}
+
+bool StackFrame::unwindEpilogue(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) {
+    instruction_t* ip = (instruction_t*)pc;
+    if (*ip == 0xc3) {  // ret
+        pc = *(uintptr_t*)sp;
+        sp += 4;
+        return true;
+    }
+    return false;
+}
+
+bool StackFrame::unwindAtomicStub(const void*& pc) {
+    // Not needed
+    return false;
+}
+
+void StackFrame::adjustSP(const void* entry, const void* pc, uintptr_t& sp) {
+    // Not needed
+}
+
+bool StackFrame::skipFaultInstruction() {
+    return false;
+}
+
+bool StackFrame::checkInterruptedSyscall() {
+    return retval() == (uintptr_t)-EINTR;
+}
+
+bool StackFrame::isSyscall(instruction_t* pc) {
+    // int 0x80
+    return pc[0] == 0xcd && pc[1] == 0x80;
+}
+
+#endif // __i386__
diff --git a/ddprof-lib/src/main/cpp/stackFrame_loongarch64.cpp b/ddprof-lib/src/main/cpp/stackFrame_loongarch64.cpp
new file mode 100644
index 00000000..99c15ace
--- /dev/null
+++ b/ddprof-lib/src/main/cpp/stackFrame_loongarch64.cpp
@@ -0,0 +1,116 @@
+/*
+ * Copyright The async-profiler authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#ifdef __loongarch_lp64
+
+#include <errno.h>
+#include <string.h>
+#include <sys/syscall.h>
+#include "stackFrame.h"
+
+#define REG(l)  _ucontext->uc_mcontext.__gregs[l]
+
+uintptr_t& StackFrame::pc() {
+    return (uintptr_t&)_ucontext->uc_mcontext.__pc;
+}
+
+uintptr_t& StackFrame::sp() {
+    return (uintptr_t&)REG(LARCH_REG_SP);
+}
+
+uintptr_t& StackFrame::fp() {
+    return (uintptr_t&)REG(22);
+}
+
+uintptr_t& StackFrame::retval() {
+    return (uintptr_t&)REG(LARCH_REG_A0);
+}
+
+uintptr_t StackFrame::link() {
+    return (uintptr_t)REG(LARCH_REG_RA);
+}
+
+uintptr_t StackFrame::arg0() {
+    return (uintptr_t)REG(LARCH_REG_A0);
+}
+
+uintptr_t StackFrame::arg1() {
+    return (uintptr_t)REG(LARCH_REG_A0 + 1);
+}
+
+uintptr_t StackFrame::arg2() {
+    return (uintptr_t)REG(LARCH_REG_A0 + 2);
+}
+
+uintptr_t StackFrame::arg3() {
+    return (uintptr_t)REG(LARCH_REG_A0 + 3);
+}
+
+uintptr_t StackFrame::jarg0() {
+    return (uintptr_t)REG(12);
+}
+
+uintptr_t StackFrame::method() {
+    return (uintptr_t)REG(26);
+}
+
+uintptr_t StackFrame::senderSP() {
+    return (uintptr_t)REG(27);
+}
+
+void StackFrame::ret() {
+    pc() = link();
+}
+
+bool StackFrame::unwindStub(instruction_t* entry, const char* name, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) {
+    instruction_t* ip = (instruction_t*)pc;
+    if (ip == entry
+        || strncmp(name, "itable", 6) == 0
+        || strncmp(name, "vtable", 6) == 0
+        || strcmp(name, "InlineCacheBuffer") == 0)
+    {
+        pc = link();
+        return true;
+    }
+    return false;
+}
+
+bool StackFrame::unwindCompiled(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) {
+    // Not yet implemented
+    return false;
+}
+
+bool StackFrame::unwindPrologue(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) {
+    // Not yet implemented
+    return false;
+}
+
+bool StackFrame::unwindEpilogue(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) {
+    // Not yet implemented
+    return false;
+}
+
+bool StackFrame::unwindAtomicStub(const void*& pc) {
+    // Not needed
+    return false;
+}
+
+void StackFrame::adjustSP(const void* entry, const void* pc, uintptr_t& sp) {
+    // Not yet implemented
+}
+
+bool StackFrame::skipFaultInstruction() {
+    return false;
+}
+
+bool StackFrame::checkInterruptedSyscall() {
+    return retval() == (uintptr_t)-EINTR;
+}
+
+bool StackFrame::isSyscall(instruction_t* pc) {
+    return (*pc) == 0x002b0000;
+}
+
+#endif // __loongarch_lp64
diff --git a/ddprof-lib/src/main/cpp/stackFrame_ppc64.cpp b/ddprof-lib/src/main/cpp/stackFrame_ppc64.cpp
new file mode 100644
index 00000000..ad20ed5b
--- /dev/null
+++ b/ddprof-lib/src/main/cpp/stackFrame_ppc64.cpp
@@ -0,0 +1,162 @@
+/*
+ * Copyright The async-profiler authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#if defined(__PPC64__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+
+#include <errno.h>
+#include <signal.h>
+#include "stackFrame.h"
+
+
+uintptr_t& StackFrame::pc() {
+    return (uintptr_t&)_ucontext->uc_mcontext.regs->nip;
+}
+
+uintptr_t& StackFrame::sp() {
+    return (uintptr_t&)_ucontext->uc_mcontext.regs->gpr[1];
+}
+
+uintptr_t& StackFrame::fp() {
+    return *((uintptr_t*)_ucontext->uc_mcontext.regs->gpr[1]);
+}
+
+uintptr_t& StackFrame::retval() {
+    return (uintptr_t&)_ucontext->uc_mcontext.regs->gpr[3];
+}
+
+uintptr_t StackFrame::link() {
+    return (uintptr_t)_ucontext->uc_mcontext.regs->link;
+}
+
+uintptr_t StackFrame::arg0() {
+    return (uintptr_t)_ucontext->uc_mcontext.regs->gpr[3];
+}
+
+uintptr_t StackFrame::arg1() {
+    return (uintptr_t)_ucontext->uc_mcontext.regs->gpr[4];
+}
+
+uintptr_t StackFrame::arg2() {
+    return (uintptr_t)_ucontext->uc_mcontext.regs->gpr[5];
+}
+
+uintptr_t StackFrame::arg3() {
+    return (uintptr_t)_ucontext->uc_mcontext.regs->gpr[6];
+}
+
+uintptr_t StackFrame::jarg0() {
+    // Unimplemented
+    return 0;
+}
+
+uintptr_t StackFrame::method() {
+    // Unimplemented
+    return 0;
+}
+
+uintptr_t StackFrame::senderSP() {
+    // Unimplemented
+    return 0;
+}
+
+void StackFrame::ret() {
+    pc() = link();
+}
+
+static inline bool inC1EpilogueCrit(uintptr_t pc) {
+    if (!(pc & 0xfff)) {
+        // Make sure we are not at the page boundary, so that reading [pc - 1] is safe
+        return false;
+    }
+    // C1 epilogue and critical section (posX)
+    //        3821**** add     r1,r1,xx
+    // pos3   xxxxxxxx
+    // pos2   1000e1eb ld      r31,16(r1)
+    // pos1   a603e87f mtlr    r31
+    //        xxxxxxxx
+    //        2000804e blr
+    instruction_t* inst = (instruction_t*)pc;
+    if (inst[ 1] == 0xebe10010 && inst[2] == 0x7fe803a6 ||
+        inst[ 0] == 0xebe10010 && inst[1] == 0x7fe803a6 ||
+        inst[-1] == 0xebe10010 && inst[0] == 0x7fe803a6) {
+        return true;
+    }
+
+    return false; // not in critical section
+}
+
+static inline bool inC2PrologueCrit(uintptr_t pc) {
+    // C2 prologue and critical section
+    //        f821**** stdu    r1, (xx)r1
+    // pos1   fa950010 std     r20,16(r21)
+    instruction_t* inst = (instruction_t*)pc;
+    if (inst[0] == 0xfa950010 && (inst[-1] & 0xffff0000) == 0xf8210000) {
+        return true;
+    }
+
+    return false; // not in critical section
+}
+
+
+bool StackFrame::unwindStub(instruction_t* entry, const char* name, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) {
+    pc = link();
+    return true;
+}
+
+bool StackFrame::unwindCompiled(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) {
+    // On PPC there is a valid back link to the previous frame at all times. The callee stores
+    // the return address in the caller's frame before it constructs its own frame. After it
+    // has destroyed its frame it restores the link register and returns. A problematic sequence
+    // is the prologue/epilogue of a compiled method before/after frame construction/destruction.
+    // Therefore popping the frame would not help here, as it is not yet/anymore present, rather
+    // more adjusting the pc to the callers pc does the trick. There are two exceptions to this,
+    // One in the prologue of C2 compiled methods and one in the epilogue of C1 compiled methods.
+    if (inC1EpilogueCrit(pc)) {
+        // lr not yet set: use the value stored in the frame
+        pc = ((uintptr_t*)sp)[2];
+    } else if (inC2PrologueCrit(pc)) {
+        // frame constructed but lr not yet stored in it: just do it here
+        *(((unsigned long *) _ucontext->uc_mcontext.regs->gpr[21]) + 2) = (unsigned long) _ucontext->uc_mcontext.regs->gpr[20];
+    } else {
+        // most probably caller's framer is still on top but pc is already in callee: use caller's pc
+        pc = link();
+    }
+
+    return true;
+}
+
+bool StackFrame::unwindPrologue(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) {
+    // Not yet implemented
+    return false;
+}
+
+bool StackFrame::unwindEpilogue(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) {
+    // Not yet implemented
+    return false;
+}
+
+bool StackFrame::unwindAtomicStub(const void*& pc) {
+    // Not needed
+    return false;
+}
+
+void StackFrame::adjustSP(const void* entry, const void* pc, uintptr_t& sp) {
+    // Not needed
+}
+
+bool StackFrame::skipFaultInstruction() {
+    return false;
+}
+
+bool StackFrame::checkInterruptedSyscall() {
+    return retval() == (uintptr_t)-EINTR;
+}
+
+bool StackFrame::isSyscall(instruction_t* pc) {
+    // sc/svc
+    return (*pc & 0x1f) == 17;
+}
+
+#endif // defined(__PPC64__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
diff --git a/ddprof-lib/src/main/cpp/stackFrame_riscv64.cpp b/ddprof-lib/src/main/cpp/stackFrame_riscv64.cpp
new file mode 100644
index 00000000..54454177
--- /dev/null
+++ b/ddprof-lib/src/main/cpp/stackFrame_riscv64.cpp
@@ -0,0 +1,118 @@
+/*
+ * Copyright The async-profiler authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#if defined(__riscv) && (__riscv_xlen == 64)
+
+#include <errno.h>
+#include <string.h>
+#include <sys/syscall.h>
+#include "stackFrame.h"
+
+#define REG(l)  _ucontext->uc_mcontext.__gregs[l]
+
+uintptr_t& StackFrame::pc() {
+    return (uintptr_t&)REG(REG_PC);
+}
+
+uintptr_t& StackFrame::sp() {
+    return (uintptr_t&)REG(REG_SP);
+}
+
+uintptr_t& StackFrame::fp() {
+    return (uintptr_t&)REG(REG_S0);
+}
+
+uintptr_t& StackFrame::retval() {
+    return (uintptr_t&)REG(REG_A0);
+}
+
+uintptr_t StackFrame::link() {
+    return (uintptr_t)REG(REG_RA);
+}
+
+uintptr_t StackFrame::arg0() {
+    return (uintptr_t)REG(REG_A0);
+}
+
+uintptr_t StackFrame::arg1() {
+    return (uintptr_t)REG(REG_A0 + 1);
+}
+
+uintptr_t StackFrame::arg2() {
+    return (uintptr_t)REG(REG_A0 + 2);
+}
+
+uintptr_t StackFrame::arg3() {
+    return (uintptr_t)REG(REG_A0 + 3);
+}
+
+uintptr_t StackFrame::jarg0() {
+    return arg1();
+}
+
+uintptr_t StackFrame::method() {
+    return (uintptr_t)REG(31);
+}
+
+uintptr_t StackFrame::senderSP() {
+    return (uintptr_t)REG(19);
+}
+
+void StackFrame::ret() {
+    pc() = link();
+}
+
+bool StackFrame::unwindStub(instruction_t* entry, const char* name, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) {
+    instruction_t* ip = (instruction_t*)pc;
+    if (ip == entry
+        || strncmp(name, "itable", 6) == 0
+        || strncmp(name, "vtable", 6) == 0
+        || strcmp(name, "InlineCacheBuffer") == 0)
+    {
+        pc = link();
+        return true;
+    }
+    return false;
+}
+
+bool StackFrame::unwindCompiled(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) {
+    // Not yet implemented
+    return false;
+}
+
+bool StackFrame::unwindPrologue(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) {
+    // Not yet implemented
+    return false;
+}
+
+bool StackFrame::unwindEpilogue(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) {
+    // Not yet implemented
+    return false;
+}
+
+bool StackFrame::unwindAtomicStub(const void*& pc) {
+    // Not needed
+    return false;
+}
+
+void StackFrame::adjustSP(const void* entry, const void* pc, uintptr_t& sp) {
+    // Not yet implemented
+}
+
+bool StackFrame::skipFaultInstruction() {
+    return false;
+}
+
+bool StackFrame::checkInterruptedSyscall() {
+    return retval() == (uintptr_t)-EINTR;
+}
+
+bool StackFrame::isSyscall(instruction_t* pc) {
+    // RISC-V ISA uses ECALL for doing both syscalls and debugger
+    // calls, so this might technically mismatch.
+    return (*pc) == 0x00000073;
+}
+
+#endif // riscv
diff --git a/ddprof-lib/src/main/cpp/stackFrame_x64.cpp b/ddprof-lib/src/main/cpp/stackFrame_x64.cpp
new file mode 100644
index 00000000..7e61a266
--- /dev/null
+++ b/ddprof-lib/src/main/cpp/stackFrame_x64.cpp
@@ -0,0 +1,322 @@
+/*
+ * Copyright The async-profiler authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#ifdef __x86_64__
+
+#include <errno.h>
+#include <string.h>
+#include <sys/syscall.h>
+#include "stackFrame.h"
+#include "vmStructs.h"
+
+
+#ifdef __APPLE__
+#  define REG(l, m)  _ucontext->uc_mcontext->__ss.__##m
+#else
+#  define REG(l, m)  _ucontext->uc_mcontext.gregs[REG_##l]
+#endif
+
+
+uintptr_t& StackFrame::pc() {
+    return (uintptr_t&)REG(RIP, rip);
+}
+
+uintptr_t& StackFrame::sp() {
+    return (uintptr_t&)REG(RSP, rsp);
+}
+
+uintptr_t& StackFrame::fp() {
+    return (uintptr_t&)REG(RBP, rbp);
+}
+
+uintptr_t& StackFrame::retval() {
+    return (uintptr_t&)REG(RAX, rax);
+}
+
+uintptr_t StackFrame::link() {
+    // No link register on x86
+    return 0;
+}
+
+uintptr_t StackFrame::arg0() {
+    return (uintptr_t)REG(RDI, rdi);
+}
+
+uintptr_t StackFrame::arg1() {
+    return (uintptr_t)REG(RSI, rsi);
+}
+
+uintptr_t StackFrame::arg2() {
+    return (uintptr_t)REG(RDX, rdx);
+}
+
+uintptr_t StackFrame::arg3() {
+    return (uintptr_t)REG(RCX, rcx);
+}
+
+uintptr_t StackFrame::jarg0() {
+    return arg1();
+}
+
+uintptr_t StackFrame::method() {
+    return (uintptr_t)REG(RBX, rbx);
+}
+
+uintptr_t StackFrame::senderSP() {
+    return (uintptr_t)REG(R13, r13);
+}
+
+void StackFrame::ret() {
+    pc() = stackAt(0);
+    sp() += 8;
+}
+
+
+__attribute__((no_sanitize("address"))) bool StackFrame::unwindStub(instruction_t* entry, const char* name, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) {
+    instruction_t* ip = (instruction_t*)pc;
+    if (ip == entry || *ip == 0xc3
+        || strncmp(name, "itable", 6) == 0
+        || strncmp(name, "vtable", 6) == 0
+        || strcmp(name, "InlineCacheBuffer") == 0)
+    {
+        pc = ((uintptr_t*)sp)[0] - 1;
+        sp += 8;
+        return true;
+    } else if (entry != NULL && ([&] { unsigned int val; memcpy(&val, entry, sizeof(val)); return val; }()) == 0xec8b4855) {
+        // The stub begins with
+        //   push rbp
+        //   mov  rbp, rsp
+        if (ip == entry + 1) {
+            pc = ((uintptr_t*)sp)[1] - 1;
+            sp += 16;
+            return true;
+        } else if (withinCurrentStack(fp)) {
+            sp = fp + 16;
+            fp = ((uintptr_t*)sp)[-2];
+            pc = ((uintptr_t*)sp)[-1] - 1;
+            return true;
+        }
+    }
+    return false;
+}
+
+bool StackFrame::unwindCompiled(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) {
+    instruction_t* ip = (instruction_t*)pc;
+    instruction_t* entry = (instruction_t*)nm->entry();
+    if (ip <= entry
+        || *ip == 0xc3                                                          // ret
+        || *ip == 0x55                                                          // push rbp
+        || ip[-1] == 0x5d                                                       // after pop rbp
+        || (ip[0] == 0x41 && ip[1] == 0x85 && ip[2] == 0x02 && ip[3] == 0xc3))  // poll return
+    {
+        // Subtract 1 for PC to point to the call instruction,
+        // otherwise it may be attributed to a wrong bytecode
+        pc = ((uintptr_t*)sp)[0] - 1;
+        sp += 8;
+        return true;
+    } else if (*ip == 0x5d) {
+        // pop rbp
+        fp = ((uintptr_t*)sp)[0];
+        pc = ((uintptr_t*)sp)[1] - 1;
+        sp += 16;
+        return true;
+    } else if (ip <= entry + 15 && ((uintptr_t)ip & 0xfff) && ip[-1] == 0x55) {
+        // push rbp
+        pc = ((uintptr_t*)sp)[1] - 1;
+        sp += 16;
+        return true;
+    } else if (ip <= entry + 7 && ip[0] == 0x48 && ip[1] == 0x89 && ip[2] == 0x6c && ip[3] == 0x24) {
+        // mov [rsp + #off], rbp
+        sp += ip[4] + 16;
+        pc = ((uintptr_t*)sp)[-1] - 1;
+        return true;
+    } else if ((ip[0] == 0x41 && ip[1] == 0x81 && ip[2] == 0x7f && *(u32*)(ip + 4) == 1) ||
+               (ip >= entry + 8 && ip[-8] == 0x41 && ip[-7] == 0x81 && ip[-6] == 0x7f && *(u32*)(ip - 4) == 1)) {
+        // cmp [r15 + #off], 1
+        // nmethod_entry_barrier: frame is fully constructed here
+        sp += nm->frameSize() * sizeof(void*);
+        fp = ((uintptr_t*)sp)[-2];
+        pc = ((uintptr_t*)sp)[-1];
+        return true;
+    }
+    return false;
+}
+
+static inline bool isFrameComplete(instruction_t* entry, instruction_t* ip) {
+    // Frame is fully constructed after rsp is decremented by the frame size.
+    // Check if there is such an instruction anywhere between
+    // the method entry and the current instruction pointer.
+    for (ip -= 4; ip >= entry; ip--) {
+        if (ip[0] == 0x48 && ip[2] == 0xec && (ip[1] & 0xfd) == 0x81) {  // sub rsp, frame_size
+            return true;
+        }
+    }
+    return false;
+}
+
+bool StackFrame::unwindPrologue(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) {
+    //  0:   mov    %eax,-0x14000(%rsp)
+    //  7:   push   %rbp
+    //  8:   mov    %rsp,%rbp  ; for native methods only
+    // 11:   sub    $0x50,%rsp
+    instruction_t* ip = (instruction_t*)pc;
+    instruction_t* entry = (instruction_t*)nm->entry();
+    if (ip <= entry || *ip == 0x55 || nm->frameSize() == 0) {  // push rbp
+        pc = ((uintptr_t*)sp)[0] - 1;
+        sp += 8;
+        return true;
+    } else if (ip <= entry + 15 && ip[-1] == 0x55) {  // right after push rbp
+        pc = ((uintptr_t*)sp)[1] - 1;
+        sp += 16;
+        return true;
+    } else if (ip <= entry + 31 && isFrameComplete(entry, ip)) {
+        sp += nm->frameSize() * sizeof(void*);
+        fp = ((uintptr_t*)sp)[-2];
+        pc = ((uintptr_t*)sp)[-1];
+        return true;
+    }
+    return false;
+}
+
+static inline bool isPollReturn(instruction_t* ip) {
+    // JDK 17+
+    //   pop    %rbp
+    //   cmp    0x348(%r15),%rsp
+    //   ja     offset_32
+    //   ret
+    if (ip[0] == 0x49 && ip[1] == 0x3b && (ip[2] == 0x67 || ip[2] == 0xa7) && ip[-1] == 0x5d) {
+        // cmp, preceded by pop rbp
+        return true;
+    } else if (ip[0] == 0x0f && ip[1] == 0x87 && ip[6] == 0xc3) {
+        // ja, followed by ret
+        return true;
+    }
+
+    // JDK 11
+    //   pop    %rbp
+    //   mov    0x108(%r15),%r10
+    //   test   %eax,(%r10)
+    //   ret
+    if (ip[0] == 0x4d && ip[1] == 0x8b && ip[2] == 0x97 && ip[-1] == 0x5d) {
+        // mov, preceded by pop rbp
+        return true;
+    } else if (ip[0] == 0x41 && ip[1] == 0x85 && ip[2] == 0x02 && ip[3] == 0xc3) {
+        // test, followed by ret
+        return true;
+    }
+
+    // JDK 8
+    //   pop    %rbp
+    //   test   %eax,offset(%rip)
+    //   ret
+    if (ip[0] == 0x85 && ip[1] == 0x05 && ip[6] == 0xc3) {
+        // test, followed by ret
+        return true;
+    }
+
+    return false;
+}
+
+bool StackFrame::unwindEpilogue(NMethod* nm, uintptr_t& pc, uintptr_t& sp, uintptr_t& fp) {
+    //  add    $0x40,%rsp
+    //  pop    %rbp
+    //  {poll_return}
+    //  ret
+    instruction_t* ip = (instruction_t*)pc;
+    if (*ip == 0xc3 || isPollReturn(ip)) {  // ret
+        pc = ((uintptr_t*)sp)[0] - 1;
+        sp += 8;
+        return true;
+    } else if (*ip == 0x5d) {  // pop rbp
+        fp = ((uintptr_t*)sp)[0];
+        pc = ((uintptr_t*)sp)[1] - 1;
+        sp += 16;
+        return true;
+    }
+    return false;
+}
+
+bool StackFrame::unwindAtomicStub(const void*& pc) {
+    // Not needed
+    return false;
+}
+
+void StackFrame::adjustSP(const void* entry, const void* pc, uintptr_t& sp) {
+    // Not needed
+}
+
+// Skip failed MOV instruction by writing 0 to destination register
+bool StackFrame::skipFaultInstruction() {
+    unsigned int insn = *(unsigned int*)pc();
+    if ((insn & 0x80fff8) == 0x008b48) {
+        // mov r64, [r64 + offs]
+        unsigned int reg = ((insn << 1) & 8) | ((insn >> 19) & 7);
+        switch (reg) {
+            case 0x0: REG(RAX, rax) = 0; break;
+            case 0x1: REG(RCX, rcx) = 0; break;
+            case 0x2: REG(RDX, rdx) = 0; break;
+            case 0x3: REG(RBX, rbx) = 0; break;
+            case 0x4: return false;  // Do not modify RSP
+            case 0x5: REG(RBP, rbp) = 0; break;
+            case 0x6: REG(RSI, rsi) = 0; break;
+            case 0x7: REG(RDI, rdi) = 0; break;
+            case 0x8: REG(R8 , r8 ) = 0; break;
+            case 0x9: REG(R9 , r9 ) = 0; break;
+            case 0xa: REG(R10, r10) = 0; break;
+            case 0xb: REG(R11, r11) = 0; break;
+            case 0xc: REG(R12, r12) = 0; break;
+            case 0xd: REG(R13, r13) = 0; break;
+            case 0xe: REG(R14, r14) = 0; break;
+            case 0xf: REG(R15, r15) = 0; break;
+        }
+
+        unsigned int insn_size = 3;
+        if ((insn & 0x070000) == 0x040000) insn_size++;
+        if ((insn & 0x400000) == 0x400000) insn_size++;
+        pc() += insn_size;
+        return true;
+    }
+    return false;
+}
+
+__attribute__((no_sanitize("address"))) bool StackFrame::checkInterruptedSyscall() {
+#ifdef __APPLE__
+    // We are not interested in syscalls that do not check error code, e.g. semaphore_wait_trap
+    if (*(instruction_t*)pc() == 0xc3) {
+        return true;
+    }
+    // If CF is set, the error code is in low byte of eax,
+    // some other syscalls (ulock_wait) do not set CF when interrupted
+    if (REG(EFL, rflags) & 1) {
+        return (retval() & 0xff) == EINTR || (retval() & 0xff) == ETIMEDOUT;
+    } else {
+        return retval() == (uintptr_t)-EINTR;
+    }
+#else
+    if (retval() == (uintptr_t)-EINTR) {
+        // Workaround for JDK-8237858: restart the interrupted poll() manually.
+        // Check if the previous instruction is mov eax, SYS_poll with infinite timeout or
+        // mov eax, SYS_ppoll with any timeout (ppoll adjusts timeout automatically)
+        uintptr_t pc = this->pc();
+        if ((pc & 0xfff) >= 7 && *(instruction_t*)(pc - 7) == 0xb8) {
+            int nr = ([&] { int val; memcpy(&val, (const void*)(pc - 6), sizeof(val)); return val; }());
+            if (nr == SYS_ppoll
+                || (nr == SYS_poll && (int)REG(RDX, rdx) == -1)
+                || (nr == SYS_epoll_wait && (int)REG(R10, r10) == -1)
+                || (nr == SYS_epoll_pwait && (int)REG(R10, r10) == -1)) {
+                this->pc() = pc - 7;
+            }
+        }
+        return true;
+    }
+    return false;
+#endif
+}
+
+bool StackFrame::isSyscall(instruction_t* pc) {
+    return pc[0] == 0x0f && pc[1] == 0x05;
+}
+
+#endif // __x86_64__
diff --git a/ddprof-lib/src/main/cpp/stackWalker.cpp b/ddprof-lib/src/main/cpp/stackWalker.cpp
new file mode 100644
index 00000000..0f1f038c
--- /dev/null
+++ b/ddprof-lib/src/main/cpp/stackWalker.cpp
@@ -0,0 +1,590 @@
+/*
+ * Copyright The async-profiler authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <setjmp.h>
+#include "stackWalker.h"
+#include "dwarf.h"
+#include "profiler.h"
+#include "safeAccess.h"
+#include "stackFrame.h"
+#include "symbols.h"
+#include "vmStructs.h"
+
+
+const uintptr_t SAME_STACK_DISTANCE = 8192;
+const uintptr_t MAX_WALK_SIZE = 0x100000;
+const intptr_t MAX_INTERPRETER_FRAME_SIZE = 0x1000;
+
+static ucontext_t empty_ucontext{};
+
+// Use validation helpers from header (shared with tests)
+using StackWalkValidation::inDeadZone;
+using StackWalkValidation::aligned;
+using StackWalkValidation::MAX_FRAME_SIZE;
+
+static inline bool sameStack(void* hi, void* lo) {
+    return (uintptr_t)hi - (uintptr_t)lo < SAME_STACK_DISTANCE;
+}
+
+// AArch64: on Linux, frame link is stored at the top of the frame,
+// while on macOS, frame link is at the bottom.
+static inline uintptr_t defaultSenderSP(uintptr_t sp, uintptr_t fp) {
+#ifdef __APPLE__
+    return sp + 2 * sizeof(void*);
+#else
+    return fp;
+#endif
+}
+
+static inline void fillFrame(ASGCT_CallFrame& frame, ASGCT_CallFrameType type, const char* name) {
+    frame.bci = type;
+    frame.method_id = (jmethodID)name;
+}
+
+static inline void fillFrame(ASGCT_CallFrame& frame, ASGCT_CallFrameType type, u32 class_id) {
+    frame.bci = type;
+    frame.method_id = (jmethodID)(uintptr_t)class_id;
+}
+
+static inline void fillFrame(ASGCT_CallFrame& frame, FrameTypeId type, int bci, jmethodID method) {
+    frame.bci = FrameType::encode(type, bci);
+    frame.method_id = method;
+}
+
+static jmethodID getMethodId(VMMethod* method) {
+    if (!inDeadZone(method) && aligned((uintptr_t)method)) {
+        return method->validatedId();
+    }
+    return NULL;
+}
+
+
+int StackWalker::walkFP(void* ucontext, const void** callchain, int max_depth, StackContext* java_ctx) {
+    const void* pc;
+    uintptr_t fp;
+    uintptr_t sp;
+    uintptr_t bottom = (uintptr_t)&sp + MAX_WALK_SIZE;
+
+    StackFrame frame(ucontext);
+    if (ucontext == NULL) {
+        pc = callerPC();
+        fp = (uintptr_t)callerFP();
+        sp = (uintptr_t)callerSP();
+    } else {
+        pc = (const void*)frame.pc();
+        fp = frame.fp();
+        sp = frame.sp();
+    }
+
+    int depth = 0;
+
+    // Walk until the bottom of the stack or until the first Java frame
+    while (depth < max_depth) {
+        if (CodeHeap::contains(pc) && !(depth == 0 && frame.unwindAtomicStub(pc)) &&
+            VMThread::current() != nullptr) {  // If it is not a JVM thread, it cannot have Java frame
+            java_ctx->set(pc, sp, fp);
+            break;
+        }
+
+        callchain[depth++] = pc;
+
+        // Check if the next frame is below on the current stack
+        if (fp < sp || fp >= sp + MAX_FRAME_SIZE || fp >= bottom) {
+            break;
+        }
+
+        // Frame pointer must be word aligned
+        if (!aligned(fp)) {
+            break;
+        }
+
+        pc = stripPointer(SafeAccess::load((void**)fp + FRAME_PC_SLOT));
+        if (inDeadZone(pc)) {
+            break;
+        }
+
+        sp = fp + (FRAME_PC_SLOT + 1) * sizeof(void*);
+        fp = *(uintptr_t*)fp;
+    }
+
+    return depth;
+}
+
+int StackWalker::walkDwarf(void* ucontext, const void** callchain, int max_depth, StackContext* java_ctx) {
+    const void* pc;
+    uintptr_t fp;
+    uintptr_t sp;
+    uintptr_t bottom = (uintptr_t)&sp + MAX_WALK_SIZE;
+
+    StackFrame frame(ucontext);
+    if (ucontext == NULL) {
+        pc = callerPC();
+        fp = (uintptr_t)callerFP();
+        sp = (uintptr_t)callerSP();
+    } else {
+        pc = (const void*)frame.pc();
+        fp = frame.fp();
+        sp = frame.sp();
+    }
+
+    int depth = 0;
+    Profiler* profiler = Profiler::instance();
+
+    // Walk until the bottom of the stack or until the first Java frame
+    while (depth < max_depth) {
+        if (CodeHeap::contains(pc) && !(depth == 0 && frame.unwindAtomicStub(pc)) &&
+            VMThread::current() != nullptr) {  // If it is not a JVM thread, it cannot have Java frame
+            // Don't dereference pc as it may point to unreadable memory
+            // frame.adjustSP(page_start, pc, sp);
+            java_ctx->set(pc, sp, fp);
+            break;
+        }
+
+        callchain[depth++] = pc;
+
+        uintptr_t prev_sp = sp;
+        CodeCache* cc = profiler->findLibraryByAddress(pc);
+        FrameDesc f = cc != NULL ? cc->findFrameDesc(pc) : FrameDesc::default_frame;
+
+        u8 cfa_reg = (u8)f.cfa;
+        int cfa_off = f.cfa >> 8;
+        if (cfa_reg == DW_REG_SP) {
+            sp = sp + cfa_off;
+        } else if (cfa_reg == DW_REG_FP) {
+            sp = fp + cfa_off;
+        } else if (cfa_reg == DW_REG_PLT) {
+            sp += ((uintptr_t)pc & 15) >= 11 ? cfa_off * 2 : cfa_off;
+        } else {
+            break;
+        }
+
+        // Check if the next frame is below on the current stack
+        if (sp < prev_sp || sp >= prev_sp + MAX_FRAME_SIZE || sp >= bottom) {
+            break;
+        }
+
+        // Stack pointer must be word aligned
+        if (!aligned(sp)) {
+            break;
+        }
+
+        const void* prev_pc = pc;
+        if (f.fp_off & DW_PC_OFFSET) {
+            pc = (const char*)pc + (f.fp_off >> 1);
+        } else {
+            if (f.fp_off != DW_SAME_FP && f.fp_off < MAX_FRAME_SIZE && f.fp_off > -MAX_FRAME_SIZE) {
+                fp = (uintptr_t)SafeAccess::load((void**)(sp + f.fp_off));
+            }
+
+            if (EMPTY_FRAME_SIZE > 0 || f.pc_off != DW_LINK_REGISTER) {
+                pc = stripPointer(SafeAccess::load((void**)(sp + f.pc_off)));
+            } else if (depth == 1) {
+                pc = (const void*)frame.link();
+            } else {
+                break;
+            }
+
+            if (EMPTY_FRAME_SIZE == 0 && cfa_off == 0 && f.fp_off != DW_SAME_FP) {
+                // AArch64 default_frame
+                sp = defaultSenderSP(sp, fp);
+                if (sp < prev_sp || sp >= bottom || !aligned(sp)) {
+                    break;
+                }
+            }
+        }
+
+        if (inDeadZone(pc) || (pc == prev_pc && sp == prev_sp)) {
+            break;
+        }
+    }
+
+    return depth;
+}
+
+__attribute__((no_sanitize("address"))) int StackWalker::walkVM(void* ucontext, ASGCT_CallFrame* frames, int max_depth,
+                        StackWalkFeatures features, EventType event_type) {
+    if (ucontext == NULL) {
+        return walkVM(&empty_ucontext, frames, max_depth, features, event_type,
+                      callerPC(), (uintptr_t)callerSP(), (uintptr_t)callerFP());
+    } else {
+        StackFrame frame(ucontext);
+        return walkVM(ucontext, frames, max_depth, features, event_type,
+                      (const void*)frame.pc(), frame.sp(), frame.fp());
+    }
+}
+
+__attribute__((no_sanitize("address"))) int StackWalker::walkVM(void* ucontext, ASGCT_CallFrame* frames, int max_depth, JavaFrameAnchor* anchor, EventType event_type) {
+    uintptr_t sp = anchor->lastJavaSP();
+    if (sp == 0) {
+        return 0;
+    }
+
+    uintptr_t fp = anchor->lastJavaFP();
+    if (fp == 0) {
+        fp = sp;
+    }
+
+    const void* pc = anchor->lastJavaPC();
+    if (pc == NULL) {
+        pc = ((const void**)sp)[-1];
+    }
+
+    StackWalkFeatures no_features{};
+    return walkVM(ucontext, frames, max_depth, no_features, event_type, pc, sp, fp);
+}
+
+__attribute__((no_sanitize("address"))) int StackWalker::walkVM(void* ucontext, ASGCT_CallFrame* frames, int max_depth,
+                        StackWalkFeatures features, EventType event_type,
+                        const void* pc, uintptr_t sp, uintptr_t fp) {
+    StackFrame frame(ucontext);
+    uintptr_t bottom = (uintptr_t)&frame + MAX_WALK_SIZE;
+
+    Profiler* profiler = Profiler::instance();
+    int bcp_offset = InterpreterFrame::bcp_offset();
+
+    jmp_buf crash_protection_ctx;
+    VMThread* vm_thread = VMThread::current();
+    void* saved_exception = vm_thread != NULL ? vm_thread->exception() : NULL;
+
+    // Should be preserved across setjmp/longjmp
+    volatile int depth = 0;
+
+    JavaFrameAnchor* anchor = NULL;
+    if (vm_thread != NULL) {
+        anchor = vm_thread->anchor();
+        vm_thread->exception() = &crash_protection_ctx;
+        if (setjmp(crash_protection_ctx) != 0) {
+            vm_thread->exception() = saved_exception;
+            if (depth < max_depth) {
+                fillFrame(frames[depth++], BCI_ERROR, "break_not_walkable");
+            }
+            return depth;
+        }
+    }
+
+    const void* prev_native_pc = NULL;
+    // Show extended frame types and stub frames for execution-type events
+    bool details = event_type <= MALLOC_SAMPLE || features.mixed;
+
+    if (details && vm_thread != NULL && vm_thread->isJavaThread()) {
+        anchor = vm_thread->anchor();
+    }
+
+    unwind_loop:
+
+    // Walk until the bottom of the stack or until the first Java frame
+    while (depth < max_depth) {
+        if (CodeHeap::contains(pc)) {
+            // If we're in JVM-generated code but don't have a VMThread, we cannot safely
+            // walk the Java stack because crash protection is not set up.
+            //
+            // This can occur during JNI attach/detach transitions: when a thread detaches,
+            // pthread_setspecific() clears the VMThread TLS, but if a profiling signal arrives
+            // while PC is still in JVM stubs (JavaCalls, method entry/exit), we see CodeHeap
+            // code without VMThread context.
+            //
+            // Without vm_thread, crash protection via setjmp/longjmp cannot work
+            // (checkFault() needs vm_thread->exception() to longjmp). Any memory dereference in interpreter
+            // frame handling or NMethod validation would crash the process with unrecoverable SEGV.
+            //
+            // The missing VMThread is a timing issue during thread lifecycle.
+            if (vm_thread == NULL) {
+                fillFrame(frames[depth++], BCI_ERROR, "break_no_vmthread");
+                break;
+            }
+            prev_native_pc = NULL; // we are in JVM code, no previous 'native' PC
+            NMethod* nm = CodeHeap::findNMethod(pc);
+            if (nm == NULL) {
+                if (anchor == NULL) {
+                    // Add an error frame only if we cannot recover
+                    fillFrame(frames[depth++], BCI_ERROR, "unknown_nmethod");
+                }
+                break;
+            }
+
+            // Always prefer JavaFrameAnchor when it is available,
+            // since it provides reliable SP and FP.
+            // Do not treat the topmost stub as Java frame.
+            if (anchor != NULL && (depth > 0 || !nm->isStub())) {
+                if (anchor->getFrame(pc, sp, fp) && !nm->contains(pc)) {
+                    anchor = NULL;
+                    continue;  // NMethod has changed as a result of correction
+                }
+                anchor = NULL;
+            }
+
+            if (nm->isNMethod()) {
+                int level = nm->level();
+                FrameTypeId type = details && level >= 1 && level <= 3 ? FRAME_C1_COMPILED : FRAME_JIT_COMPILED;
+                fillFrame(frames[depth++], type, 0, nm->method()->id());
+
+                if (nm->isFrameCompleteAt(pc)) {
+                    if (depth == 1 && frame.unwindEpilogue(nm, (uintptr_t&)pc, sp, fp)) {
+                        continue;
+                    }
+
+                    int scope_offset = nm->findScopeOffset(pc);
+                    if (scope_offset > 0) {
+                        depth--;
+                        ScopeDesc scope(nm);
+                        do {
+                            scope_offset = scope.decode(scope_offset);
+                            if (details) {
+                                type = scope_offset > 0 ? FRAME_INLINED :
+                                       level >= 1 && level <= 3 ? FRAME_C1_COMPILED : FRAME_JIT_COMPILED;
+                            }
+                            fillFrame(frames[depth++], type, scope.bci(), scope.method()->id());
+                        } while (scope_offset > 0 && depth < max_depth);
+                    }
+
+                    // Handle situations when sp is temporarily changed in the compiled code
+                    frame.adjustSP(nm->entry(), pc, sp);
+
+                    sp += nm->frameSize() * sizeof(void*);
+                    fp = ((uintptr_t*)sp)[-FRAME_PC_SLOT - 1];
+                    pc = ((const void**)sp)[-FRAME_PC_SLOT];
+                    continue;
+                } else if (frame.unwindPrologue(nm, (uintptr_t&)pc, sp, fp)) {
+                    continue;
+                }
+
+                fillFrame(frames[depth++], BCI_ERROR, "break_compiled");
+                break;
+            } else if (nm->isInterpreter()) {
+                if (vm_thread != NULL && vm_thread->inDeopt()) {
+                    fillFrame(frames[depth++], BCI_ERROR, "break_deopt");
+                    break;
+                }
+
+                bool is_plausible_interpreter_frame = !inDeadZone((const void*)fp) && aligned(fp)
+                    && sp > fp - MAX_INTERPRETER_FRAME_SIZE
+                    && sp < fp + bcp_offset * sizeof(void*);
+
+                if (is_plausible_interpreter_frame) {
+                    VMMethod* method = ((VMMethod**)fp)[InterpreterFrame::method_offset];
+                    jmethodID method_id = getMethodId(method);
+                    if (method_id != NULL) {
+                        const char* bytecode_start = method->bytecode();
+                        const char* bcp = ((const char**)fp)[bcp_offset];
+                        int bci = bytecode_start == NULL || bcp < bytecode_start ? 0 : bcp - bytecode_start;
+                        fillFrame(frames[depth++], FRAME_INTERPRETED, bci, method_id);
+
+                        sp = ((uintptr_t*)fp)[InterpreterFrame::sender_sp_offset];
+                        pc = stripPointer(((void**)fp)[FRAME_PC_SLOT]);
+                        fp = *(uintptr_t*)fp;
+                        continue;
+                    }
+                }
+
+                if (depth == 0) {
+                    VMMethod* method = (VMMethod*)frame.method();
+                    jmethodID method_id = getMethodId(method);
+                    if (method_id != NULL) {
+                        fillFrame(frames[depth++], FRAME_INTERPRETED, 0, method_id);
+
+                        if (is_plausible_interpreter_frame) {
+                            pc = stripPointer(((void**)fp)[FRAME_PC_SLOT]);
+                            sp = frame.senderSP();
+                            fp = *(uintptr_t*)fp;
+                        } else {
+                            pc = stripPointer(*(void**)sp);
+                            sp = frame.senderSP();
+                        }
+                        continue;
+                    }
+                }
+
+                fillFrame(frames[depth++], BCI_ERROR, "break_interpreted");
+                break;
+            } else if (nm->isEntryFrame(pc) && !features.mixed) {
+                JavaFrameAnchor* next_anchor = JavaFrameAnchor::fromEntryFrame(fp);
+                if (next_anchor == NULL) {
+                    fillFrame(frames[depth++], BCI_ERROR, "break_entry_frame");
+                    break;
+                }
+                uintptr_t prev_sp = sp;
+                if (!next_anchor->getFrame(pc, sp, fp)) {
+                    // End of Java stack
+                    break;
+                }
+                if (sp < prev_sp || sp >= bottom || !aligned(sp)) {
+                    fillFrame(frames[depth++], BCI_ERROR, "break_entry_frame");
+                    break;
+                }
+                continue;
+            } else {
+                if (features.vtable_target && nm->isVTableStub() && depth == 0) {
+                    uintptr_t receiver = frame.jarg0();
+                    if (receiver != 0) {
+                        VMSymbol* symbol = VMKlass::fromOop(receiver)->name();
+                        u32 class_id = profiler->classMap()->lookup(symbol->body(), symbol->length());
+                        fillFrame(frames[depth++], BCI_ALLOC, class_id);
+                    }
+                }
+
+                CodeBlob* stub = profiler->findRuntimeStub(pc);
+                const void* start = stub != NULL ? stub->_start : nm->code();
+                const char* name = stub != NULL ? stub->_name : nm->name();
+
+                if (details) {
+                    fillFrame(frames[depth++], BCI_NATIVE_FRAME, name);
+                }
+
+                if (frame.unwindStub((instruction_t*)start, name, (uintptr_t&)pc, sp, fp)) {
+                    continue;
+                }
+
+                if (depth > 1 && nm->frameSize() > 0) {
+                    sp += nm->frameSize() * sizeof(void*);
+                    fp = ((uintptr_t*)sp)[-FRAME_PC_SLOT - 1];
+                    pc = ((const void**)sp)[-FRAME_PC_SLOT];
+                    continue;
+                }
+            }
+        } else {
+            const char* method_name = profiler->findNativeMethod(pc);
+            char mark;
+            if (method_name != NULL && (mark = NativeFunc::mark(method_name)) != 0) {
+                if (mark == MARK_ASYNC_PROFILER && event_type == MALLOC_SAMPLE) {
+                    // Skip all internal frames above malloc_hook functions, leave the hook itself
+                    depth = 0;
+                } else if (mark == MARK_COMPILER_ENTRY && features.comp_task && vm_thread != NULL) {
+                    // Insert current compile task as a pseudo Java frame
+                    VMMethod* method = vm_thread->compiledMethod();
+                    jmethodID method_id = method != NULL ? method->id() : NULL;
+                    if (method_id != NULL) {
+                        fillFrame(frames[depth++], FRAME_JIT_COMPILED, 0, method_id);
+                    }
+                }
+            } else if (method_name == NULL && details) {
+                // These workarounds will minimize the number of unknown frames for 'vm'
+                // We want to keep the 'raw' data in 'vmx', though
+                if (anchor) {
+                    uintptr_t prev_sp = sp;
+                    sp = anchor->lastJavaSP();
+                    fp = anchor->lastJavaFP();
+                    pc = anchor->lastJavaPC();
+                    if (sp != 0 && pc != NULL) {
+                        // already used the anchor; disable it
+                        anchor = NULL;
+                        if (sp < prev_sp || sp >= bottom || !aligned(sp)) {
+                            fillFrame(frames[depth++], BCI_ERROR, "break_no_anchor");
+                            break;
+                        }
+                        // we restored from Java frame; clean the prev_native_pc
+                        prev_native_pc = NULL;
+                        if (depth > 0) {
+                            fillFrame(frames[depth++], BCI_ERROR, "[skipped frames]");
+                        }
+                        continue;
+                    }
+                }
+                const char* prev_symbol = prev_native_pc != NULL ? profiler->findNativeMethod(prev_native_pc) : NULL;
+                if (prev_symbol != NULL && strstr(prev_symbol, "thread_start")) {
+                    // Unwinding from Rust 'thread_start' but not having enough info to do it correctly
+                    // Rather, just assume that this is the root frame
+                    break;
+                }
+                if (Symbols::isLibcOrPthreadAddress((uintptr_t)pc)) {
+                    // We might not have the libc symbols available
+                    // The unwinding is also not super reliable; best to jump out if this is not the leaf
+                    fillFrame(frames[depth++], BCI_NATIVE_FRAME, "[libc/pthread]");
+                    break;
+                }
+                fillFrame(frames[depth++], BCI_ERROR, "break_no_anchor");
+                break;
+            }
+            fillFrame(frames[depth++], BCI_NATIVE_FRAME, method_name);
+        }
+
+        uintptr_t prev_sp = sp;
+        CodeCache* cc = profiler->findLibraryByAddress(pc);
+        FrameDesc f = cc != NULL ? cc->findFrameDesc(pc) : FrameDesc::default_frame;
+
+        u8 cfa_reg = (u8)f.cfa;
+        int cfa_off = f.cfa >> 8;
+        if (cfa_reg == DW_REG_SP) {
+            sp = sp + cfa_off;
+        } else if (cfa_reg == DW_REG_FP) {
+            sp = fp + cfa_off;
+        } else if (cfa_reg == DW_REG_PLT) {
+            sp += ((uintptr_t)pc & 15) >= 11 ? cfa_off * 2 : cfa_off;
+        } else {
+            break;
+        }
+
+        // Check if the next frame is below on the current stack
+        if (sp < prev_sp || sp >= prev_sp + MAX_FRAME_SIZE || sp >= bottom) {
+            break;
+        }
+
+        // Stack pointer must be word aligned
+        if (!aligned(sp)) {
+            break;
+        }
+
+        // store the previous pc before unwinding
+        prev_native_pc = pc;
+        if (f.fp_off & DW_PC_OFFSET) {
+            pc = (const char*)pc + (f.fp_off >> 1);
+        } else {
+            if (f.fp_off != DW_SAME_FP && f.fp_off < MAX_FRAME_SIZE && f.fp_off > -MAX_FRAME_SIZE) {
+                fp = (uintptr_t)SafeAccess::load((void**)(sp + f.fp_off));
+            }
+
+            if (EMPTY_FRAME_SIZE > 0 || f.pc_off != DW_LINK_REGISTER) {
+                pc = stripPointer(SafeAccess::load((void**)(sp + f.pc_off)));
+            } else if (depth == 1) {
+                pc = (const void*)frame.link();
+            } else {
+                break;
+            }
+
+            if (EMPTY_FRAME_SIZE > 0 || f.pc_off != DW_LINK_REGISTER) {
+                pc = stripPointer(*(void**)(sp + f.pc_off));
+            } else if (depth == 1) {
+                pc = (const void*)frame.link();
+            } else {
+                break;
+            }
+
+            if (EMPTY_FRAME_SIZE == 0 && cfa_off == 0 && f.fp_off != DW_SAME_FP) {
+                // AArch64 default_frame
+                sp = defaultSenderSP(sp, fp);
+                if (sp < prev_sp || sp >= bottom || !aligned(sp)) {
+                    break;
+                }
+            }
+        }
+
+        if (inDeadZone(pc) || (pc == prev_native_pc && sp == prev_sp)) {
+            break;
+        }
+    }
+
+    // If we did not meet Java frame but current thread has JavaFrameAnchor set,
+    // retry stack walking from the anchor
+    if (anchor != NULL && anchor->getFrame(pc, sp, fp)) {
+        anchor = NULL;
+        while (depth > 0 && frames[depth - 1].method_id == NULL) depth--;  // pop unknown frames
+        goto unwind_loop;
+    }
+
+    if (vm_thread != NULL) vm_thread->exception() = saved_exception;
+
+    return depth;
+}
+
+void StackWalker::checkFault() {
+    if (VMThread::key() < 0) {
+        // JVM has not been loaded or VMStructs have not been initialized yet
+        return;
+    }
+
+    VMThread* vm_thread = VMThread::current();
+    if (vm_thread != NULL && sameStack(vm_thread->exception(), &vm_thread)) {
+        longjmp(*(jmp_buf*)vm_thread->exception(), 1);
+    }
+}
diff --git a/ddprof-lib/src/main/cpp/stackWalker.h b/ddprof-lib/src/main/cpp/stackWalker.h
new file mode 100644
index 00000000..84025157
--- /dev/null
+++ b/ddprof-lib/src/main/cpp/stackWalker.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright The async-profiler authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#ifndef _STACKWALKER_H
+#define _STACKWALKER_H
+
+#include <stdint.h>
+#include "arguments.h"
+#include "event.h"
+#include "vmEntry.h"
+
+
+class JavaFrameAnchor;
+
+struct StackContext {
+    const void* pc;
+    uintptr_t sp;
+    uintptr_t fp;
+    u64 cpu;
+
+    void set(const void* pc, uintptr_t sp, uintptr_t fp) {
+        this->pc = pc;
+        this->sp = sp;
+        this->fp = fp;
+    }
+};
+
+// Stack walking validation helpers (used by implementation and tests)
+namespace StackWalkValidation {
+    const uintptr_t DEAD_ZONE = 0x1000;
+    const intptr_t MAX_FRAME_SIZE = 0x40000;
+
+    // Check if pointer is in dead zone (very low or very high address)
+    static inline bool inDeadZone(const void* ptr) {
+        return ptr < (const void*)DEAD_ZONE || ptr > (const void*)-DEAD_ZONE;
+    }
+
+    // Check if pointer is properly aligned
+    static inline bool aligned(uintptr_t ptr) {
+        return (ptr & (sizeof(uintptr_t) - 1)) == 0;
+    }
+}
+
+class StackWalker {
+  private:
+    static int walkVM(void* ucontext, ASGCT_CallFrame* frames, int max_depth,
+                      StackWalkFeatures features, EventType event_type,
+                      const void* pc, uintptr_t sp, uintptr_t fp);
+
+  public:
+    static int walkFP(void* ucontext, const void** callchain, int max_depth, StackContext* java_ctx);
+    static int walkDwarf(void* ucontext, const void** callchain, int max_depth, StackContext* java_ctx);
+    static int walkVM(void* ucontext, ASGCT_CallFrame* frames, int max_depth, StackWalkFeatures features, EventType event_type);
+    static int walkVM(void* ucontext, ASGCT_CallFrame* frames, int max_depth, JavaFrameAnchor* anchor, EventType event_type);
+
+    static void checkFault();
+};
+
+#endif // _STACKWALKER_H
diff --git a/ddprof-lib/src/main/cpp/symbols.h b/ddprof-lib/src/main/cpp/symbols.h
index 749a2123..b315d51e 100644
--- a/ddprof-lib/src/main/cpp/symbols.h
+++ b/ddprof-lib/src/main/cpp/symbols.h
@@ -1,17 +1,6 @@
 /*
- * Copyright 2017 Andrei Pangin
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Copyright The async-profiler authors
+ * SPDX-License-Identifier: Apache-2.0
  */
 
 #ifndef _SYMBOLS_H
@@ -20,22 +9,27 @@
 #include "codeCache.h"
 #include "mutex.h"
 
+#include <stdint.h>
+
+
 class Symbols {
-private:
-  static Mutex _parse_lock;
-  static bool _have_kernel_symbols;
-  static bool _libs_limit_reported;
-
-public:
-  static void parseKernelSymbols(CodeCache *cc);
-  static void parseLibraries(CodeCacheArray *array, bool kernel_symbols);
-  // The clear function is mainly for test purposes
-  // There are internal caches that are not associated to the array
-  static void clearParsingCaches();
-  static bool haveKernelSymbols() { return _have_kernel_symbols; }
-
-  // Some symbols are always roots - eg. no unwinding should be attempted once they are encountered
-  static bool isRootSymbol(const void* address);
+  private:
+    static Mutex _parse_lock;
+    static bool _have_kernel_symbols;
+    static bool _libs_limit_reported;
+
+  public:
+    static void initLibraryRanges();
+    static void parseKernelSymbols(CodeCache* cc);
+    static void parseLibraries(CodeCacheArray* array, bool kernel_symbols);
+
+    static bool haveKernelSymbols() {
+        return _have_kernel_symbols;
+    }
+    // Clear internal caches - mainly for test purposes
+    static void clearParsingCaches();
+    // Fast range check: does this PC lie in libc or libpthread?
+    static bool isLibcOrPthreadAddress(uintptr_t pc);
 };
 
 class UnloadProtection {
diff --git a/ddprof-lib/src/main/cpp/symbols_linux.cpp b/ddprof-lib/src/main/cpp/symbols_linux.cpp
new file mode 100644
index 00000000..8d13202e
--- /dev/null
+++ b/ddprof-lib/src/main/cpp/symbols_linux.cpp
@@ -0,0 +1,1066 @@
+/*
+ * Copyright The async-profiler authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#ifdef __linux__
+
+#include <dlfcn.h>
+#include <unordered_map>
+#include <unordered_set>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <elf.h>
+#include <errno.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <link.h>
+#include <linux/limits.h>
+#include <pthread.h>
+#include <sys/auxv.h>
+#include "symbols.h"
+#include "dwarf.h"
+#include "fdtransferClient.h"
+#include "log.h"
+#include "os.h"
+
+// Simple address range
+struct Range {
+    uintptr_t start;
+    uintptr_t end;
+};
+
+static bool range_valid(const Range* r) {
+    return r->start && r->end && r->end > r->start;
+}
+
+static Range g_libc = {0, 0};
+static Range g_libpthread = {0, 0};
+static bool g_lib_ranges_inited = false;
+
+// Unified dl_iterate_phdr callback context
+struct UnifiedCtx {
+    void* fbase;           // For range_for_fbase functionality
+    Range* out;            // For range_for_fbase functionality
+    const void** main_phdr; // For getMainPhdr functionality
+    void* libc_fbase;      // For init_lib_ranges_once functionality
+    void* pthread_fbase;   // For init_lib_ranges_once functionality
+    Range* libc_range;     // For init_lib_ranges_once functionality
+    Range* pthread_range;  // For init_lib_ranges_once functionality
+};
+
+// Unified callback for both range computation and main phdr collection
+static int unified_phdr_cb(dl_phdr_info* info, size_t /*unused*/, void* data) {
+    UnifiedCtx* ctx = (UnifiedCtx*)data;
+
+    // Main executable's program header (first entry)
+    if (ctx->main_phdr != NULL && *ctx->main_phdr == NULL) {
+        *ctx->main_phdr = info->dlpi_phdr;
+    }
+
+    // Range computation for specific fbase (range_for_fbase functionality)
+    if (ctx->fbase != NULL && (void*)info->dlpi_addr == ctx->fbase) {
+        uintptr_t minv = (uintptr_t)-1;
+        uintptr_t maxv = 0;
+        for (int i = 0; i < info->dlpi_phnum; i++) {
+            const ElfW(Phdr)* ph = &info->dlpi_phdr[i];
+            if (ph->p_type != PT_LOAD) continue;
+            uintptr_t vaddr = (uintptr_t)info->dlpi_addr + ph->p_vaddr;
+            uintptr_t vend = vaddr + ph->p_memsz;
+            if (vaddr < minv) minv = vaddr;
+            if (vend > maxv) maxv = vend;
+        }
+        if (minv != (uintptr_t)-1 && maxv > minv) {
+            ctx->out->start = minv;
+            ctx->out->end = maxv;
+        }
+    }
+
+    // Library range computation (init_lib_ranges_once functionality)
+    if (ctx->libc_fbase != NULL && (void*)info->dlpi_addr == ctx->libc_fbase) {
+        uintptr_t minv = (uintptr_t)-1;
+        uintptr_t maxv = 0;
+        for (int i = 0; i < info->dlpi_phnum; i++) {
+            const ElfW(Phdr)* ph = &info->dlpi_phdr[i];
+            if (ph->p_type != PT_LOAD) continue;
+            uintptr_t vaddr = (uintptr_t)info->dlpi_addr + ph->p_vaddr;
+            uintptr_t vend = vaddr + ph->p_memsz;
+            if (vaddr < minv) minv = vaddr;
+            if (vend > maxv) maxv = vend;
+        }
+        if (minv != (uintptr_t)-1 && maxv > minv) {
+            ctx->libc_range->start = minv;
+            ctx->libc_range->end = maxv;
+        }
+    }
+
+    if (ctx->pthread_fbase != NULL && (void*)info->dlpi_addr == ctx->pthread_fbase) {
+        uintptr_t minv = (uintptr_t)-1;
+        uintptr_t maxv = 0;
+        for (int i = 0; i < info->dlpi_phnum; i++) {
+            const ElfW(Phdr)* ph = &info->dlpi_phdr[i];
+            if (ph->p_type != PT_LOAD) continue;
+            uintptr_t vaddr = (uintptr_t)info->dlpi_addr + ph->p_vaddr;
+            uintptr_t vend = vaddr + ph->p_memsz;
+            if (vaddr < minv) minv = vaddr;
+            if (vend > maxv) maxv = vend;
+        }
+        if (minv != (uintptr_t)-1 && maxv > minv) {
+            ctx->pthread_range->start = minv;
+            ctx->pthread_range->end = maxv;
+        }
+    }
+
+    return 0; // continue iteration
+}
+
+// Main program header - initialized lazily
+static const void* _main_phdr = NULL;
+static pthread_once_t _main_phdr_once = PTHREAD_ONCE_INIT;
+static const char* _ld_base = (const char*)getauxval(AT_BASE);
+
+// Initialize main phdr once
+static void init_main_phdr_once() {
+    UnifiedCtx ctx = {NULL, NULL, &_main_phdr, NULL, NULL, NULL, NULL};
+    dl_iterate_phdr(&unified_phdr_cb, &ctx);
+}
+
+// Ensure main phdr is initialized
+static void ensure_main_phdr_initialized() {
+    pthread_once(&_main_phdr_once, init_main_phdr_once);
+}
+
+static Range range_for_fbase(void* fbase) {
+    Range r = {0, 0};
+    if (!fbase) return r;
+    UnifiedCtx ctx = {fbase, &r, NULL, NULL, NULL, NULL, NULL};
+    dl_iterate_phdr(&unified_phdr_cb, &ctx);
+    return r;
+}
+
+static void init_lib_ranges_once() {
+    if (g_lib_ranges_inited) return;
+    g_lib_ranges_inited = true;
+
+    // libc anchor: prefer gnu_get_libc_version if present; fallback to strlen
+    void* libc_sym = dlsym(RTLD_DEFAULT, "gnu_get_libc_version");
+    if (!libc_sym) libc_sym = (void*)&strlen;
+
+    Dl_info di = {0};
+    void* libc_fbase = NULL;
+    if (dladdr(libc_sym, &di) && di.dli_fbase) {
+        libc_fbase = di.dli_fbase;
+    }
+
+    // pthread anchor: pthread_create (on glibc >= 2.34 this lives in libc)
+    Dl_info di2 = {0};
+    void* pthread_fbase = NULL;
+    if (dladdr((void*)&pthread_create, &di2) && di2.dli_fbase) {
+        pthread_fbase = di2.dli_fbase;
+    }
+
+    // Use unified dl_iterate_phdr call to get all information at once
+    UnifiedCtx ctx = {NULL, NULL, &_main_phdr, libc_fbase, pthread_fbase, &g_libc, &g_libpthread};
+    dl_iterate_phdr(&unified_phdr_cb, &ctx);
+
+    // If pthread couldn't be resolved separately, treat it as libc
+    if (!range_valid(&g_libpthread)) g_libpthread = g_libc;
+}
+
+static bool pc_in_range(uintptr_t pc, const Range* r) {
+    return range_valid(r) && pc >= r->start && pc < r->end;
+}
+
+#ifdef __x86_64__
+
+#include <poll.h>
+#include "vmEntry.h"
+
+// Workaround for JDK-8312065 on JDK 8:
+// replace poll() implementation with ppoll() which is restartable
+static int poll_hook(struct pollfd* fds, nfds_t nfds, int timeout) {
+    if (timeout >= 0) {
+        struct timespec ts;
+        ts.tv_sec = timeout / 1000;
+        ts.tv_nsec = (timeout % 1000) * 1000000;
+        return ppoll(fds, nfds, &ts, NULL);
+    } else {
+        return ppoll(fds, nfds, NULL, NULL);
+    }
+}
+
+static void applyPatch(CodeCache* cc) {
+    static bool patch_libnet = VM::hotspot_version() == 8;
+
+    if (patch_libnet) {
+        size_t len = strlen(cc->name());
+        if (len >= 10 && strcmp(cc->name() + len - 10, "/libnet.so") == 0) {
+            UnloadProtection handle(cc);
+            if (handle.isValid()) {
+                cc->patchImport(im_poll, (void*)poll_hook);
+                patch_libnet = false;
+            }
+        }
+    }
+}
+
+#else
+
+static void applyPatch(CodeCache* cc) {}
+
+#endif
+
+
+static bool isMainExecutable(const char* image_base, const void* map_end) {
+    ensure_main_phdr_initialized();
+    return _main_phdr != NULL && _main_phdr >= image_base && _main_phdr < map_end;
+}
+
+static bool isLoader(const char* image_base) {
+    return _ld_base == image_base;
+}
+
+class SymbolDesc {
+  private:
+    const char* _addr;
+    const char* _desc;
+
+  public:
+      SymbolDesc(const char* s) {
+          _addr = s;
+          _desc = strchr(_addr, ' ');
+      }
+
+      const char* addr() { return (const char*)strtoul(_addr, NULL, 16); }
+      char type()        { return _desc != NULL ? _desc[1] : 0; }
+      const char* name() { return _desc + 3; }
+};
+
+class MemoryMapDesc {
+  private:
+    const char* _addr;
+    const char* _end;
+    const char* _perm;
+    const char* _offs;
+    const char* _dev;
+    const char* _inode;
+    const char* _file;
+
+  public:
+      MemoryMapDesc(const char* s) {
+          _addr = s;
+          _end = strchr(_addr, '-') + 1;
+          _perm = strchr(_end, ' ') + 1;
+          _offs = strchr(_perm, ' ') + 1;
+          _dev = strchr(_offs, ' ') + 1;
+          _inode = strchr(_dev, ' ') + 1;
+          _file = strchr(_inode, ' ');
+
+          if (_file != NULL) {
+              while (*_file == ' ') _file++;
+          }
+      }
+
+      const char* file()    { return _file; }
+      bool isReadable()     { return _perm[0] == 'r'; }
+      bool isExecutable()   { return _perm[2] == 'x'; }
+      const char* addr()    { return (const char*)strtoul(_addr, NULL, 16); }
+      const char* end()     { return (const char*)strtoul(_end, NULL, 16); }
+      unsigned long offs()  { return strtoul(_offs, NULL, 16); }
+      unsigned long inode() { return strtoul(_inode, NULL, 10); }
+
+      unsigned long dev() {
+          char* colon;
+          unsigned long major = strtoul(_dev, &colon, 16);
+          unsigned long minor = strtoul(colon + 1, NULL, 16);
+          return major << 8 | minor;
+      }
+};
+
+struct SharedLibrary {
+    char* file;
+    const char* map_start;
+    const char* map_end;
+    const char* image_base;
+};
+
+
+#ifdef __LP64__
+const unsigned char ELFCLASS_SUPPORTED = ELFCLASS64;
+typedef Elf64_Ehdr ElfHeader;
+typedef Elf64_Shdr ElfSection;
+typedef Elf64_Phdr ElfProgramHeader;
+typedef Elf64_Nhdr ElfNote;
+typedef Elf64_Sym  ElfSymbol;
+typedef Elf64_Rel  ElfRelocation;
+typedef Elf64_Dyn  ElfDyn;
+#define ELF_R_TYPE ELF64_R_TYPE
+#define ELF_R_SYM  ELF64_R_SYM
+#else
+const unsigned char ELFCLASS_SUPPORTED = ELFCLASS32;
+typedef Elf32_Ehdr ElfHeader;
+typedef Elf32_Shdr ElfSection;
+typedef Elf32_Phdr ElfProgramHeader;
+typedef Elf32_Nhdr ElfNote;
+typedef Elf32_Sym  ElfSymbol;
+typedef Elf32_Rel  ElfRelocation;
+typedef Elf32_Dyn  ElfDyn;
+#define ELF_R_TYPE ELF32_R_TYPE
+#define ELF_R_SYM  ELF32_R_SYM
+#endif // __LP64__
+
+#if defined(__x86_64__)
+#  define R_GLOB_DAT R_X86_64_GLOB_DAT
+#  define R_ABS64 R_X86_64_64
+#elif defined(__i386__)
+#  define R_GLOB_DAT R_386_GLOB_DAT
+#  define R_ABS64 -1
+#elif defined(__arm__) || defined(__thumb__)
+#  define R_GLOB_DAT R_ARM_GLOB_DAT
+#  define R_ABS64 -1
+#elif defined(__aarch64__)
+#  define R_GLOB_DAT R_AARCH64_GLOB_DAT
+#  define R_ABS64 R_AARCH64_ABS64
+#elif defined(__PPC64__)
+#  define R_GLOB_DAT R_PPC64_GLOB_DAT
+#  define R_ABS64 -1
+#elif defined(__riscv) && (__riscv_xlen == 64)
+// RISC-V does not have GLOB_DAT relocation, use something neutral,
+// like the impossible relocation number.
+#  define R_GLOB_DAT -1
+#  define R_ABS64 -1
+#elif defined(__loongarch_lp64)
+// LOONGARCH does not have GLOB_DAT relocation, use something neutral,
+// like the impossible relocation number.
+#  define R_GLOB_DAT -1
+#  define R_ABS64 -1
+#else
+#  error "Compiling on unsupported arch"
+#endif
+
+
+static char _debuginfod_cache_buf[PATH_MAX] = {0};
+
+class ElfParser {
+  private:
+    CodeCache* _cc;
+    const char* _base;
+    const char* _file_name;
+    bool _relocate_dyn;
+    ElfHeader* _header;
+    const char* _sections;
+    const char* _vaddr_diff;
+
+    ElfParser(CodeCache* cc, const char* base, const void* addr, const char* file_name, bool relocate_dyn) {
+        _cc = cc;
+        _base = base;
+        _file_name = file_name;
+        _relocate_dyn = relocate_dyn;
+        _header = (ElfHeader*)addr;
+        _sections = (const char*)addr + _header->e_shoff;
+    }
+
+    bool validHeader() {
+        unsigned char* ident = _header->e_ident;
+        return ident[0] == 0x7f && ident[1] == 'E' && ident[2] == 'L' && ident[3] == 'F'
+            && ident[4] == ELFCLASS_SUPPORTED && ident[5] == ELFDATA2LSB && ident[6] == EV_CURRENT
+            && _header->e_shstrndx != SHN_UNDEF;
+    }
+
+    ElfSection* section(int index) {
+        return (ElfSection*)(_sections + index * _header->e_shentsize);
+    }
+
+    const char* at(ElfSection* section) {
+        return (const char*)_header + section->sh_offset;
+    }
+
+    const char* at(ElfProgramHeader* pheader) {
+        if (_header->e_type == ET_EXEC) {
+            return (const char*)pheader->p_vaddr;
+        }
+        return _vaddr_diff == NULL ? (const char*)pheader->p_vaddr : _vaddr_diff + pheader->p_vaddr;
+    }
+
+    const char* base() {
+        return _header->e_type == ET_EXEC ? NULL : _vaddr_diff;
+    }
+
+    char* dyn_ptr(ElfDyn* dyn) {
+        // GNU dynamic linker relocates pointers in the dynamic section, while musl doesn't.
+        // Also, [vdso] is not relocated, and its vaddr may differ from the load address.
+        if (_relocate_dyn || (_base != NULL && (char*)dyn->d_un.d_ptr < _base)) {
+            return _vaddr_diff == NULL ? (char*)dyn->d_un.d_ptr : (char*)_vaddr_diff + dyn->d_un.d_ptr;
+        } else {
+            return (char*)dyn->d_un.d_ptr;
+        }
+    }
+
+    ElfSection* findSection(uint32_t type, const char* name);
+    ElfProgramHeader* findProgramHeader(uint32_t type);
+
+    void calcVirtualLoadAddress();
+    void parseDynamicSection();
+    void parseDwarfInfo();
+    uint32_t getSymbolCount(uint32_t* gnu_hash);
+    void loadSymbols(bool use_debug);
+    bool loadSymbolsFromDebug(const char* build_id, const int build_id_len);
+    bool loadSymbolsFromDebuginfodCache(const char* build_id, const int build_id_len);
+    bool loadSymbolsUsingBuildId();
+    bool loadSymbolsUsingDebugLink();
+    void loadSymbolTable(const char* symbols, size_t total_size, size_t ent_size, const char* strings);
+    void addRelocationSymbols(ElfSection* reltab, const char* plt);
+    const char* getDebuginfodCache();
+
+  public:
+    static void parseProgramHeaders(CodeCache* cc, const char* base, const char* end, bool relocate_dyn);
+    static bool parseFile(CodeCache* cc, const char* base, const char* file_name, bool use_debug);
+};
+
+
+ElfSection* ElfParser::findSection(uint32_t type, const char* name) {
+    const char* strtab = at(section(_header->e_shstrndx));
+
+    for (int i = 0; i < _header->e_shnum; i++) {
+        ElfSection* section = this->section(i);
+        if (section->sh_type == type && section->sh_name != 0) {
+            if (strcmp(strtab + section->sh_name, name) == 0) {
+                return section;
+            }
+        }
+    }
+
+    return NULL;
+}
+
+ElfProgramHeader* ElfParser::findProgramHeader(uint32_t type) {
+    const char* pheaders = (const char*)_header + _header->e_phoff;
+
+    for (int i = 0; i < _header->e_phnum; i++) {
+        ElfProgramHeader* pheader = (ElfProgramHeader*)(pheaders + i * _header->e_phentsize);
+        if (pheader->p_type == type) {
+            return pheader;
+        }
+    }
+
+    return NULL;
+}
+
+bool ElfParser::parseFile(CodeCache* cc, const char* base, const char* file_name, bool use_debug) {
+    int fd = open(file_name, O_RDONLY);
+    if (fd == -1) {
+        return false;
+    }
+
+    size_t length = (size_t)lseek(fd, 0, SEEK_END);
+    void* addr = mmap(NULL, length, PROT_READ, MAP_PRIVATE, fd, 0);
+    close(fd);
+
+    if (addr == MAP_FAILED) {
+        Log::warn("Could not parse symbols from %s: %s", file_name, strerror(errno));
+    } else {
+        ElfParser elf(cc, base, addr, file_name, false);
+        if (elf.validHeader()) {
+            elf.calcVirtualLoadAddress();
+            elf.loadSymbols(use_debug);
+        }
+        munmap(addr, length);
+    }
+    return true;
+}
+
+void ElfParser::parseProgramHeaders(CodeCache* cc, const char* base, const char* end, bool relocate_dyn) {
+    ElfParser elf(cc, base, base, NULL, relocate_dyn);
+    if (elf.validHeader() && base + elf._header->e_phoff < end) {
+        cc->setTextBase(base);
+        elf.calcVirtualLoadAddress();
+        elf.parseDynamicSection();
+        elf.parseDwarfInfo();
+    }
+}
+
+void ElfParser::calcVirtualLoadAddress() {
+    // Find a difference between the virtual load address (often zero) and the actual DSO base
+    if (_base == NULL) {
+        _vaddr_diff = NULL;
+        return;
+    }
+    const char* pheaders = (const char*)_header + _header->e_phoff;
+    for (int i = 0; i < _header->e_phnum; i++) {
+        ElfProgramHeader* pheader = (ElfProgramHeader*)(pheaders + i * _header->e_phentsize);
+        if (pheader->p_type == PT_LOAD) {
+            _vaddr_diff = _base - pheader->p_vaddr;
+            return;
+        }
+    }
+    _vaddr_diff = _base;
+}
+
+void ElfParser::parseDynamicSection() {
+    ElfProgramHeader* dynamic = findProgramHeader(PT_DYNAMIC);
+    if (dynamic != NULL) {
+        const char* symtab = NULL;
+        const char* strtab = NULL;
+        char* jmprel = NULL;
+        char* rel = NULL;
+        size_t pltrelsz = 0;
+        size_t relsz = 0;
+        size_t relent = 0;
+        size_t relcount = 0;
+        size_t syment = 0;
+        uint32_t nsyms = 0;
+
+        const char* dyn_start = at(dynamic);
+        const char* dyn_end = dyn_start + dynamic->p_memsz;
+        for (ElfDyn* dyn = (ElfDyn*)dyn_start; dyn < (ElfDyn*)dyn_end; dyn++) {
+            switch (dyn->d_tag) {
+                case DT_SYMTAB:
+                    symtab = dyn_ptr(dyn);
+                    break;
+                case DT_STRTAB:
+                    strtab = dyn_ptr(dyn);
+                    break;
+                case DT_SYMENT:
+                    syment = dyn->d_un.d_val;
+                    break;
+                case DT_HASH:
+                    nsyms = ((uint32_t*)dyn_ptr(dyn))[1];
+                    break;
+                case DT_GNU_HASH:
+                    if (nsyms == 0) {
+                        nsyms = getSymbolCount((uint32_t*)dyn_ptr(dyn));
+                    }
+                    break;
+                case DT_JMPREL:
+                    jmprel = dyn_ptr(dyn);
+                    break;
+                case DT_PLTRELSZ:
+                    pltrelsz = dyn->d_un.d_val;
+                    break;
+                case DT_RELA:
+                case DT_REL:
+                    rel = dyn_ptr(dyn);
+                    break;
+                case DT_RELASZ:
+                case DT_RELSZ:
+                    relsz = dyn->d_un.d_val;
+                    break;
+                case DT_RELAENT:
+                case DT_RELENT:
+                    relent = dyn->d_un.d_val;
+                    break;
+                case DT_RELACOUNT:
+                case DT_RELCOUNT:
+                    relcount = dyn->d_un.d_val;
+                    break;
+            }
+        }
+
+        if (symtab == NULL || strtab == NULL || syment == 0 || relent == 0) {
+            return;
+        }
+
+        if (!_cc->hasDebugSymbols() && nsyms > 0) {
+            loadSymbolTable(symtab, syment * nsyms, syment, strtab);
+        }
+
+        const char* base = this->base();
+        if (jmprel != NULL && pltrelsz != 0) {
+            // Parse .rela.plt table
+            for (size_t offs = 0; offs < pltrelsz; offs += relent) {
+                ElfRelocation* r = (ElfRelocation*)(jmprel + offs);
+                ElfSymbol* sym = (ElfSymbol*)(symtab + ELF_R_SYM(r->r_info) * syment);
+                if (sym->st_name != 0) {
+                    _cc->addImport((void**)(base + r->r_offset), strtab + sym->st_name);
+                }
+            }
+        }
+
+        if (rel != NULL && relsz != 0) {
+            // Relocation entries for imports can be found in .rela.dyn, for example
+            // if a shared library is built without PLT (-fno-plt). However, if both
+            // entries exist, addImport saves them both.
+            for (size_t offs = relcount * relent; offs < relsz; offs += relent) {
+                ElfRelocation* r = (ElfRelocation*)(rel + offs);
+                if (ELF_R_TYPE(r->r_info) == R_GLOB_DAT || ELF_R_TYPE(r->r_info) == R_ABS64) {
+                    ElfSymbol* sym = (ElfSymbol*)(symtab + ELF_R_SYM(r->r_info) * syment);
+                    if (sym->st_name != 0) {
+                        _cc->addImport((void**)(base + r->r_offset), strtab + sym->st_name);
+                    }
+                }
+            }
+        }
+    }
+}
+
+void ElfParser::parseDwarfInfo() {
+    if (!DWARF_SUPPORTED) return;
+
+    ElfProgramHeader* eh_frame_hdr = findProgramHeader(PT_GNU_EH_FRAME);
+    if (eh_frame_hdr != NULL) {
+        if (eh_frame_hdr->p_vaddr != 0) {
+            DwarfParser dwarf(_cc->name(), _base, at(eh_frame_hdr));
+            _cc->setDwarfTable(dwarf.table(), dwarf.count());
+        } else if (strcmp(_cc->name(), "[vdso]") == 0) {
+            FrameDesc* table = (FrameDesc*)malloc(sizeof(FrameDesc));
+            *table = FrameDesc::empty_frame;
+            _cc->setDwarfTable(table, 1);
+        }
+    }
+}
+
+uint32_t ElfParser::getSymbolCount(uint32_t* gnu_hash) {
+    uint32_t nbuckets = gnu_hash[0];
+    uint32_t* buckets = &gnu_hash[4] + gnu_hash[2] * (sizeof(size_t) / 4);
+
+    uint32_t nsyms = 0;
+    for (uint32_t i = 0; i < nbuckets; i++) {
+        if (buckets[i] > nsyms) nsyms = buckets[i];
+    }
+
+    if (nsyms > 0) {
+        uint32_t* chain = &buckets[nbuckets] - gnu_hash[1];
+        while (!(chain[nsyms++] & 1));
+    }
+    return nsyms;
+}
+
+void ElfParser::loadSymbols(bool use_debug) {
+    ElfSection* symtab = findSection(SHT_SYMTAB, ".symtab");
+    if (symtab != NULL) {
+        // Parse debug symbols from the original .so
+        ElfSection* strtab = section(symtab->sh_link);
+        loadSymbolTable(at(symtab), symtab->sh_size, symtab->sh_entsize, at(strtab));
+        _cc->setDebugSymbols(true);
+    } else if (use_debug) {
+        // Try to load symbols from an external debuginfo library
+        loadSymbolsUsingBuildId() || loadSymbolsUsingDebugLink();
+    }
+
+    if (use_debug) {
+        // Synthesize names for PLT stubs
+        ElfSection* plt = findSection(SHT_PROGBITS, ".plt");
+        if (plt != NULL) {
+            _cc->setPlt(plt->sh_addr, plt->sh_size);
+            ElfSection* reltab = findSection(SHT_RELA, ".rela.plt");
+            if (reltab != NULL || (reltab = findSection(SHT_REL, ".rel.plt")) != NULL) {
+                addRelocationSymbols(reltab, base() + plt->sh_addr + PLT_HEADER_SIZE);
+            }
+        }
+    }
+}
+
+const char* ElfParser::getDebuginfodCache() {
+    if (_debuginfod_cache_buf[0]) {
+        return _debuginfod_cache_buf;
+    }
+
+    const char* env_vars[] = {"DEBUGINFOD_CACHE_PATH", "XDG_CACHE_HOME", "HOME"};
+    const char* suffixes[] = {"/", "debuginfod_client/", ".cache/debuginfod_client/"};
+
+    for (int i = 0; i < sizeof(env_vars) / sizeof(env_vars[0]); i++) {
+        const char* env_val = getenv(env_vars[i]);
+        if (!env_val || !env_val[0]) {
+            continue;
+        }
+
+        if (snprintf(_debuginfod_cache_buf, sizeof(_debuginfod_cache_buf), "%s/%s", env_val, suffixes[i]) < sizeof(_debuginfod_cache_buf)) {
+            return _debuginfod_cache_buf;
+        }
+    }
+
+    _debuginfod_cache_buf[0] = '\0';
+    return _debuginfod_cache_buf;
+}
+
+bool ElfParser::loadSymbolsFromDebug(const char* build_id, const int build_id_len) {
+    char path[PATH_MAX];
+    char* p = path + snprintf(path, sizeof(path), "/usr/lib/debug/.build-id/%02hhx/", build_id[0]);
+    for (int i = 1; i < build_id_len; i++) {
+        p += snprintf(p, 3, "%02hhx", build_id[i]);
+    }
+    strcpy(p, ".debug");
+
+    return parseFile(_cc, _base, path, false);
+}
+
+bool ElfParser::loadSymbolsFromDebuginfodCache(const char* build_id, const int build_id_len) {
+    const char* debuginfod_cache = getDebuginfodCache();
+    if (debuginfod_cache == NULL || !debuginfod_cache[0]) {
+        return false;
+    }
+
+    char path[PATH_MAX];
+    const int debuginfod_cache_len = strlen(debuginfod_cache);
+    if (debuginfod_cache_len + build_id_len + strlen("/debuginfo") >= sizeof(path)) {
+        Log::warn("Path too long, skipping loading symbols: %s", debuginfod_cache);
+        return false;
+    }
+
+    char* p = strcpy(path, debuginfod_cache);
+    p += debuginfod_cache_len;
+    for (int i = 0; i < build_id_len; i++) {
+        p += snprintf(p, 3, "%02hhx", build_id[i]);
+    }
+    strcpy(p, "/debuginfo");
+
+    return parseFile(_cc, _base, path, false);
+}
+
+// Load symbols from the first file that exists in the following locations, in order, where abcdef1234 is Build ID.
+//   /usr/lib/debug/.build-id/ab/cdef1234.debug
+//   $DEBUGINFOD_CACHE_PATH/abcdef1234/debuginfo
+//   $XDG_CACHE_HOME/debuginfod_client/abcdef1234/debuginfo
+//   $HOME/.cache/debuginfod_client/abcdef1234/debuginfo
+bool ElfParser::loadSymbolsUsingBuildId() {
+    ElfSection* section = findSection(SHT_NOTE, ".note.gnu.build-id");
+    if (section == NULL || section->sh_size <= 16) {
+        return false;
+    }
+
+    ElfNote* note = (ElfNote*)at(section);
+    if (note->n_namesz != 4 || note->n_descsz < 2 || note->n_descsz > 64) {
+        return false;
+    }
+
+    const char* build_id = (const char*)note + sizeof(*note) + 4;
+    int build_id_len = note->n_descsz;
+
+    return loadSymbolsFromDebug(build_id, build_id_len)
+        || loadSymbolsFromDebuginfodCache(build_id, build_id_len);
+}
+
+// Look for debuginfo file specified in .gnu_debuglink section
+bool ElfParser::loadSymbolsUsingDebugLink() {
+    ElfSection* section = findSection(SHT_PROGBITS, ".gnu_debuglink");
+    if (section == NULL || section->sh_size <= 4) {
+        return false;
+    }
+
+    const char* basename = strrchr(_file_name, '/');
+    if (basename == NULL) {
+        return false;
+    }
+
+    char* dirname = strndup(_file_name, basename - _file_name);
+    if (dirname == NULL) {
+        return false;
+    }
+
+    const char* debuglink = at(section);
+    char path[PATH_MAX];
+    bool result = false;
+
+    // 1. /path/to/libjvm.so.debug
+    if (strcmp(debuglink, basename + 1) != 0 &&
+        snprintf(path, PATH_MAX, "%s/%s", dirname, debuglink) < PATH_MAX) {
+        result = parseFile(_cc, _base, path, false);
+    }
+
+    // 2. /path/to/.debug/libjvm.so.debug
+    if (!result && snprintf(path, PATH_MAX, "%s/.debug/%s", dirname, debuglink) < PATH_MAX) {
+        result = parseFile(_cc, _base, path, false);
+    }
+
+    // 3. /usr/lib/debug/path/to/libjvm.so.debug
+    if (!result && snprintf(path, PATH_MAX, "/usr/lib/debug%s/%s", dirname, debuglink) < PATH_MAX) {
+        result = parseFile(_cc, _base, path, false);
+    }
+
+    free(dirname);
+    return result;
+}
+
+void ElfParser::loadSymbolTable(const char* symbols, size_t total_size, size_t ent_size, const char* strings) {
+    const char* base = this->base();
+    for (const char* symbols_end = symbols + total_size; symbols < symbols_end; symbols += ent_size) {
+        ElfSymbol* sym = (ElfSymbol*)symbols;
+        if (sym->st_name != 0 && sym->st_value != 0) {
+            // Skip special AArch64 mapping symbols: $x and $d
+            if (sym->st_size != 0 || sym->st_info != 0 || strings[sym->st_name] != '$') {
+                const char* addr;
+                if (base != NULL) {
+                    // Check for overflow when adding sym->st_value to base
+                    uintptr_t base_addr = (uintptr_t)base;
+                    uint64_t symbol_value = sym->st_value;
+                    
+                    // Skip this symbol if addition would overflow
+                    // First check if symbol_value exceeds the address space
+                    if (symbol_value > UINTPTR_MAX) {
+                        continue;
+                    }
+                    // Then check if addition would overflow
+                    if (base_addr > UINTPTR_MAX - (uintptr_t)symbol_value) {
+                        continue;
+                    }
+                    
+                    // Perform addition using integer arithmetic to avoid pointer overflow
+                    addr = (const char*)(base_addr + (uintptr_t)symbol_value);
+                } else {
+                    addr = (const char*)sym->st_value;
+                }
+                _cc->add(addr, (int)sym->st_size, strings + sym->st_name);
+            }
+        }
+    }
+}
+
+void ElfParser::addRelocationSymbols(ElfSection* reltab, const char* plt) {
+    ElfSection* symtab = section(reltab->sh_link);
+    const char* symbols = at(symtab);
+
+    ElfSection* strtab = section(symtab->sh_link);
+    const char* strings = at(strtab);
+
+    const char* relocations = at(reltab);
+    const char* relocations_end = relocations + reltab->sh_size;
+    for (; relocations < relocations_end; relocations += reltab->sh_entsize) {
+        ElfRelocation* r = (ElfRelocation*)relocations;
+        ElfSymbol* sym = (ElfSymbol*)(symbols + ELF_R_SYM(r->r_info) * symtab->sh_entsize);
+
+        char name[256];
+        if (sym->st_name == 0) {
+            strcpy(name, "@plt");
+        } else {
+            const char* sym_name = strings + sym->st_name;
+            snprintf(name, sizeof(name), "%s%cplt", sym_name, sym_name[0] == '_' && sym_name[1] == 'Z' ? '.' : '@');
+            name[sizeof(name) - 1] = 0;
+        }
+
+        _cc->add(plt, PLT_ENTRY_SIZE, name);
+        plt += PLT_ENTRY_SIZE;
+    }
+}
+
+
+Mutex Symbols::_parse_lock;
+bool Symbols::_have_kernel_symbols = false;
+bool Symbols::_libs_limit_reported = false;
+static std::unordered_set<u64> _parsed_inodes;
+static bool _in_parse_libraries = false;
+
+void Symbols::parseKernelSymbols(CodeCache* cc) {
+    int fd;
+    if (FdTransferClient::hasPeer()) {
+        fd = FdTransferClient::requestKallsymsFd();
+    } else {
+        fd = open("/proc/kallsyms", O_RDONLY);
+    }
+
+    if (fd == -1) {
+        Log::warn("open(\"/proc/kallsyms\"): %s", strerror(errno));
+        return;
+    }
+
+    FILE* f = fdopen(fd, "r");
+    if (f == NULL) {
+        Log::warn("fdopen(): %s", strerror(errno));
+        close(fd);
+        return;
+    }
+
+    char str[256];
+    while (fgets(str, sizeof(str) - 8, f) != NULL) {
+        size_t len = strlen(str) - 1; // trim the '\n'
+        strcpy(str + len, "_[k]");
+
+        SymbolDesc symbol(str);
+        char type = symbol.type();
+        if (type == 'T' || type == 't' || type == 'W' || type == 'w') {
+            const char* addr = symbol.addr();
+            if (addr != NULL) {
+                if (!_have_kernel_symbols) {
+                    if (strncmp(symbol.name(), "__LOAD_PHYSICAL_ADDR", 20) == 0 ||
+                        strncmp(symbol.name(), "phys_startup", 12) == 0) {
+                        continue;
+                    }
+                    _have_kernel_symbols = true;
+                }
+                cc->add(addr, 0, symbol.name());
+            }
+        }
+    }
+
+    fclose(f);
+}
+
+static void collectSharedLibraries(std::unordered_map<u64, SharedLibrary>& libs, int max_count) {
+    FILE* f = fopen("/proc/self/maps", "r");
+    if (f == NULL) {
+        return;
+    }
+
+    const char* image_base = NULL;
+    u64 last_inode = 0;
+    char* str = NULL;
+    size_t str_size = 0;
+    ssize_t len;
+
+    while (max_count > 0 && (len = getline(&str, &str_size, f)) > 0) {
+        str[len - 1] = 0;
+
+        MemoryMapDesc map(str);
+        if (!map.isReadable() || map.file() == NULL || map.file()[0] == 0) {
+            continue;
+        }
+
+        u64 inode = u64(map.dev()) << 32 | map.inode();
+        if (_parsed_inodes.find(inode) != _parsed_inodes.end()) {
+            continue;  // shared object is already parsed
+        }
+        if (inode == 0 && strcmp(map.file(), "[vdso]") != 0) {
+            continue;  // all shared libraries have inode, except vDSO
+        }
+
+        const char* map_start = map.addr();
+        const char* map_end = map.end();
+        if (inode != last_inode && map.offs() == 0) {
+            image_base = map_start;
+            last_inode = inode;
+        }
+
+        if (map.isExecutable()) {
+            SharedLibrary& lib = libs[inode];
+            if (lib.file == nullptr) {
+                lib.file = strdup(map.file());
+                lib.map_start = map_start;
+                lib.map_end = map_end;
+                lib.image_base = inode == last_inode ? image_base : NULL;
+                max_count--;
+            } else {
+                // The same library may have multiple executable segments mapped
+                lib.map_end = map_end;
+            }
+        }
+    }
+
+    free(str);
+    fclose(f);
+}
+
+void Symbols::parseLibraries(CodeCacheArray* array, bool kernel_symbols) {
+    MutexLocker ml(_parse_lock);
+
+    if (_in_parse_libraries || array->count() >= MAX_NATIVE_LIBS) {
+        return;
+    }
+    _in_parse_libraries = true;
+
+    if (kernel_symbols && !haveKernelSymbols()) {
+        CodeCache* cc = new CodeCache("[kernel]");
+        parseKernelSymbols(cc);
+
+        if (haveKernelSymbols()) {
+            cc->sort();
+            array->add(cc);
+        } else {
+            delete cc;
+        }
+    }
+
+    std::unordered_map<u64, SharedLibrary> libs;
+    collectSharedLibraries(libs, MAX_NATIVE_LIBS - array->count());
+
+    for (auto& it : libs) {
+        u64 inode = it.first;
+        _parsed_inodes.insert(inode);
+
+        SharedLibrary& lib = it.second;
+        CodeCache* cc = new CodeCache(lib.file, array->count(), lib.map_start, lib.map_end, lib.image_base);
+
+        if (strchr(lib.file, ':') != NULL) {
+            // Do not try to parse pseudofiles like anon_inode:name, /memfd:name
+        } else if (strcmp(lib.file, "[vdso]") == 0) {
+            ElfParser::parseProgramHeaders(cc, lib.map_start, lib.map_end, true);
+        } else if (lib.image_base == NULL) {
+            // Unlikely case when image base has not been found: not safe to access program headers.
+            // Be careful: executable file is not always ELF, e.g. classes.jsa
+            ElfParser::parseFile(cc, lib.map_start, lib.file, true);
+        } else {
+            // Parse debug symbols first
+            ElfParser::parseFile(cc, lib.image_base, lib.file, true);
+
+            UnloadProtection handle(cc);
+            if (handle.isValid()) {
+                ElfParser::parseProgramHeaders(cc, lib.image_base, lib.map_end, OS::isMusl());
+            }
+        }
+
+        free(lib.file);
+
+        cc->sort();
+        applyPatch(cc);
+        array->add(cc);
+    }
+
+    if (array->count() >= MAX_NATIVE_LIBS && !_libs_limit_reported) {
+        Log::warn("Number of parsed libraries reached the limit of %d", MAX_NATIVE_LIBS);
+        _libs_limit_reported = true;
+    }
+
+    _in_parse_libraries = false;
+}
+
+// Check that the base address of the shared object has not changed
+static bool verifyBaseAddress(const CodeCache* cc, void* lib_handle) {
+    Dl_info dl_info;
+    struct link_map* map;
+
+    if (dlinfo(lib_handle, RTLD_DI_LINKMAP, &map) != 0 || dladdr(map->l_ld, &dl_info) == 0) {
+        return false;
+    }
+
+    return cc->imageBase() == (const char*)dl_info.dli_fbase;
+}
+
+UnloadProtection::UnloadProtection(const CodeCache *cc) {
+    if (OS::isMusl() || isMainExecutable(cc->imageBase(), cc->maxAddress()) || isLoader(cc->imageBase())) {
+        _lib_handle = NULL;
+        _valid = true;
+        return;
+    }
+
+    // dlopen() can reopen previously loaded libraries even if the underlying file has been deleted
+    const char* stripped_name = cc->name();
+    size_t name_len = strlen(stripped_name);
+    if (name_len > 10 && strcmp(stripped_name + name_len - 10, " (deleted)") == 0) {
+        char* buf = (char*) alloca(name_len - 9);
+        *stpncpy(buf, stripped_name, name_len - 10) = 0;
+        stripped_name = buf;
+    }
+
+    // Protect library from unloading while parsing in-memory ELF program headers.
+    // Also, dlopen() ensures the library is fully loaded.
+    _lib_handle = dlopen(stripped_name, RTLD_LAZY | RTLD_NOLOAD);
+    _valid = _lib_handle != NULL && verifyBaseAddress(cc, _lib_handle);
+}
+
+UnloadProtection::~UnloadProtection() {
+    if (_lib_handle != NULL) {
+        dlclose(_lib_handle);
+    }
+}
+
+void Symbols::initLibraryRanges() {
+    init_lib_ranges_once();
+}
+
+bool Symbols::isLibcOrPthreadAddress(uintptr_t pc) {
+    // Fast, allocation-free integer checks — no strings involved.
+    // initLibraryRanges() must have been called during profiler startup.
+    if (pc_in_range(pc, &g_libc)) return true;
+    if (pc_in_range(pc, &g_libpthread)) return true;
+    return false;
+}
+
+
+// Implementation of clearParsingCaches for test compatibility
+void Symbols::clearParsingCaches() {
+    _parsed_inodes.clear();
+}
+
+#endif // __linux__
diff --git a/ddprof-lib/src/main/cpp/symbols_linux.h b/ddprof-lib/src/main/cpp/symbols_linux.h
index 67a19183..19404483 100644
--- a/ddprof-lib/src/main/cpp/symbols_linux.h
+++ b/ddprof-lib/src/main/cpp/symbols_linux.h
@@ -3,7 +3,7 @@
 
 #include "symbols.h"
 
-// Forward declaration for ElfParser functionality from cpp-external/symbols_linux.cpp
+// Forward declaration for ElfParser functionality from symbols_linux.cpp
 // The actual implementation will be available through the patched upstream file
 class ElfParser {
 public:
diff --git a/ddprof-lib/src/main/cpp/symbols_macos.cpp b/ddprof-lib/src/main/cpp/symbols_macos.cpp
new file mode 100644
index 00000000..268d0cad
--- /dev/null
+++ b/ddprof-lib/src/main/cpp/symbols_macos.cpp
@@ -0,0 +1,231 @@
+/*
+ * Copyright The async-profiler authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#ifdef __APPLE__
+
+#include <unordered_set>
+#include <dlfcn.h>
+#include <string.h>
+#include <mach-o/dyld.h>
+#include <mach-o/loader.h>
+#include <mach-o/nlist.h>
+#include "symbols.h"
+#include "log.h"
+
+UnloadProtection::UnloadProtection(const CodeCache *cc) {
+    // Protect library from unloading while parsing in-memory ELF program headers.
+    // Also, dlopen() ensures the library is fully loaded.
+    _lib_handle = dlopen(cc->name(), RTLD_LAZY | RTLD_NOLOAD);
+    _valid = _lib_handle != NULL;
+}
+
+UnloadProtection::~UnloadProtection() {
+    if (_lib_handle != NULL) {
+        dlclose(_lib_handle);
+    }
+}
+
+class MachOParser {
+  private:
+    CodeCache* _cc;
+    const mach_header* _image_base;
+    const char* _vmaddr_slide;
+
+    static const char* add(const void* base, uint64_t offset) {
+        return (const char*)base + offset;
+    }
+
+    void findSymbolPtrSection(const segment_command_64* sc, const section_64** section_ptr) {
+        const section_64* section = (const section_64*)add(sc, sizeof(segment_command_64));
+        for (uint32_t i = 0; i < sc->nsects; i++) {
+            uint32_t section_type = section->flags & SECTION_TYPE;
+            if (section_type == S_NON_LAZY_SYMBOL_POINTERS) {
+                section_ptr[0] = section;
+            } else if (section_type == S_LAZY_SYMBOL_POINTERS) {
+                section_ptr[1] = section;
+            }
+            section++;
+        }
+    }
+
+    const section_64* findSection(const segment_command_64* sc, const char* section_name) {
+        const section_64* section = (const section_64*)add(sc, sizeof(segment_command_64));
+        for (uint32_t i = 0; i < sc->nsects; i++) {
+            if (strcmp(section->sectname, section_name) == 0) {
+                return section;
+            }
+            section++;
+        }
+        return NULL;
+    }
+
+    void loadSymbols(const symtab_command* symtab, const char* link_base) {
+        const nlist_64* sym = (const nlist_64*)add(link_base, symtab->symoff);
+        const char* str_table = add(link_base, symtab->stroff);
+        bool debug_symbols = false;
+
+        for (uint32_t i = 0; i < symtab->nsyms; i++) {
+            if ((sym->n_type & 0xee) == 0x0e && sym->n_value != 0) {
+                const char* addr = _vmaddr_slide + sym->n_value;
+                const char* name = str_table + sym->n_un.n_strx;
+                if (name[0] == '_') name++;
+                _cc->add(addr, 0, name);
+                debug_symbols = true;
+            }
+            sym++;
+        }
+
+        _cc->setDebugSymbols(debug_symbols);
+    }
+
+    void loadStubSymbols(const symtab_command* symtab, const dysymtab_command* dysymtab,
+                         const section_64* stubs_section, const char* link_base) {
+        const nlist_64* sym = (const nlist_64*)add(link_base, symtab->symoff);
+        const char* str_table = add(link_base, symtab->stroff);
+
+        const uint32_t* isym = (const uint32_t*)add(link_base, dysymtab->indirectsymoff) + stubs_section->reserved1;
+        uint32_t isym_count = stubs_section->size / stubs_section->reserved2;
+        const char* stubs_start = _vmaddr_slide + stubs_section->addr;
+
+        for (uint32_t i = 0; i < isym_count; i++) {
+            if ((isym[i] & (INDIRECT_SYMBOL_LOCAL | INDIRECT_SYMBOL_ABS)) == 0) {
+                const char* name = str_table + sym[isym[i]].n_un.n_strx;
+                if (name[0] == '_') name++;
+
+                char stub_name[256];
+                snprintf(stub_name, sizeof(stub_name), "stub:%s", name);
+                _cc->add(stubs_start + i * stubs_section->reserved2, stubs_section->reserved2, stub_name);
+            }
+        }
+
+        _cc->setPlt(stubs_section->addr, isym_count * stubs_section->reserved2);
+    }
+
+    void loadImports(const symtab_command* symtab, const dysymtab_command* dysymtab,
+                     const section_64* symbol_ptr_section, const char* link_base) {
+        const nlist_64* sym = (const nlist_64*)add(link_base, symtab->symoff);
+        const char* str_table = add(link_base, symtab->stroff);
+
+        const uint32_t* isym = (const uint32_t*)add(link_base, dysymtab->indirectsymoff) + symbol_ptr_section->reserved1;
+        uint32_t isym_count = symbol_ptr_section->size / sizeof(void*);
+        void** slot = (void**)(_vmaddr_slide + symbol_ptr_section->addr);
+
+        for (uint32_t i = 0; i < isym_count; i++) {
+            if ((isym[i] & (INDIRECT_SYMBOL_LOCAL | INDIRECT_SYMBOL_ABS)) == 0) {
+                const char* name = str_table + sym[isym[i]].n_un.n_strx;
+                if (name[0] == '_') name++;
+                _cc->addImport(&slot[i], name);
+            }
+        }
+    }
+
+  public:
+    MachOParser(CodeCache* cc, const mach_header* image_base, const char* vmaddr_slide) :
+        _cc(cc), _image_base(image_base), _vmaddr_slide(vmaddr_slide) {}
+
+    bool parse() {
+        if (_image_base->magic != MH_MAGIC_64) {
+            return false;
+        }
+
+        const mach_header_64* header = (const mach_header_64*)_image_base;
+        const load_command* lc = (const load_command*)(header + 1);
+
+        const char* link_base = NULL;
+        const section_64* symbol_ptr[2] = {NULL, NULL};
+        const symtab_command* symtab = NULL;
+        const dysymtab_command* dysymtab = NULL;
+        const section_64* stubs_section = NULL;
+
+        for (uint32_t i = 0; i < header->ncmds; i++) {
+            if (lc->cmd == LC_SEGMENT_64) {
+                const segment_command_64* sc = (const segment_command_64*)lc;
+                if (strcmp(sc->segname, "__TEXT") == 0) {
+                    _cc->updateBounds(_image_base, add(_image_base, sc->vmsize));
+                    stubs_section = findSection(sc, "__stubs");
+                } else if (strcmp(sc->segname, "__LINKEDIT") == 0) {
+                    link_base = _vmaddr_slide + sc->vmaddr - sc->fileoff;
+                } else if (strcmp(sc->segname, "__DATA") == 0 || strcmp(sc->segname, "__DATA_CONST") == 0) {
+                    findSymbolPtrSection(sc, symbol_ptr);
+                }
+            } else if (lc->cmd == LC_SYMTAB) {
+                symtab = (const symtab_command*)lc;
+            } else if (lc->cmd == LC_DYSYMTAB) {
+                dysymtab = (const dysymtab_command*)lc;
+            }
+            lc = (const load_command*)add(lc, lc->cmdsize);
+        }
+
+        if (symtab != NULL && link_base != NULL) {
+            loadSymbols(symtab, link_base);
+
+            if (dysymtab != NULL) {
+                if (symbol_ptr[0] != NULL) loadImports(symtab, dysymtab, symbol_ptr[0], link_base);
+                if (symbol_ptr[1] != NULL) loadImports(symtab, dysymtab, symbol_ptr[1], link_base);
+                if (stubs_section != NULL) loadStubSymbols(symtab, dysymtab, stubs_section, link_base);
+            }
+        }
+
+        return true;
+    }
+};
+
+
+Mutex Symbols::_parse_lock;
+bool Symbols::_have_kernel_symbols = false;
+bool Symbols::_libs_limit_reported = false;
+static std::unordered_set<const void*> _parsed_libraries;
+
+void Symbols::parseKernelSymbols(CodeCache* cc) {
+}
+
+void Symbols::parseLibraries(CodeCacheArray* array, bool kernel_symbols) {
+    MutexLocker ml(_parse_lock);
+    uint32_t images = _dyld_image_count();
+
+    for (uint32_t i = 0; i < images; i++) {
+        const mach_header* image_base = _dyld_get_image_header(i);
+        if (image_base == NULL || !_parsed_libraries.insert(image_base).second) {
+            continue;  // the library was already parsed
+        }
+
+        int count = array->count();
+        if (count >= MAX_NATIVE_LIBS) {
+            if (!_libs_limit_reported) {
+                Log::warn("Number of parsed libraries reached the limit of %d", MAX_NATIVE_LIBS);
+                _libs_limit_reported = true;
+            }
+            break;
+        }
+
+        const char* path = _dyld_get_image_name(i);
+        const char* vmaddr_slide = (const char*)_dyld_get_image_vmaddr_slide(i);
+
+        CodeCache* cc = new CodeCache(path, count);
+        cc->setTextBase(vmaddr_slide);
+
+        UnloadProtection handle(cc);
+        if (handle.isValid()) {
+            MachOParser parser(cc, image_base, vmaddr_slide);
+            if (!parser.parse()) {
+                Log::warn("Could not parse symbols from %s", path);
+            }
+            cc->sort();
+            array->add(cc);
+        } else {
+            delete cc;
+        }
+    }
+}
+
+void Symbols::initLibraryRanges() {
+    // No initialization needed on macOS
+}
+
+bool Symbols::isLibcOrPthreadAddress(uintptr_t pc) {
+    return false;
+}
+
+#endif // __APPLE__
diff --git a/ddprof-lib/src/main/cpp/trap.cpp b/ddprof-lib/src/main/cpp/trap.cpp
new file mode 100644
index 00000000..38e47fc4
--- /dev/null
+++ b/ddprof-lib/src/main/cpp/trap.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright The async-profiler authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <sys/mman.h>
+#include "trap.h"
+#include "os.h"
+
+
+uintptr_t Trap::_page_start[TRAP_COUNT] = {0};
+
+
+bool Trap::isFaultInstruction(uintptr_t pc) {
+    for (int i = 0; i < TRAP_COUNT; i++) {
+        if (pc - _page_start[i] < OS::page_size) {
+            return true;
+        }
+    }
+    return false;
+}
+
+void Trap::assign(const void* address, uintptr_t offset) {
+    _entry = (uintptr_t)address;
+    if (_entry == 0) {
+        return;
+    }
+    _entry += offset;
+
+#if defined(__arm__) || defined(__thumb__)
+    _breakpoint_insn = (_entry & 1) ? BREAKPOINT_THUMB : BREAKPOINT;
+    _entry &= ~(uintptr_t)1;
+#endif
+
+    _saved_insn = *(instruction_t*)_entry;
+    _page_start[_id] = _entry & -OS::page_size;
+}
+
+// Two allocation traps are always enabled/disabled together.
+// If both traps belong to the same page, protect/unprotect it just once.
+void Trap::pair(Trap& second) {
+    if (_page_start[_id] == _page_start[second._id]) {
+        _protect = false;
+        second._unprotect = false;
+    }
+}
+
+// Patch instruction at the entry point
+bool Trap::patch(instruction_t insn) {
+    if (_unprotect) {
+        int prot = WX_MEMORY ? (PROT_READ | PROT_WRITE) : (PROT_READ | PROT_WRITE | PROT_EXEC);
+        if (OS::mprotect((void*)(_entry & -OS::page_size), OS::page_size, prot) != 0) {
+            return false;
+        }
+    }
+
+    *(instruction_t*)_entry = insn;
+    flushCache(_entry);
+
+    if (_protect) {
+        OS::mprotect((void*)(_entry & -OS::page_size), OS::page_size, PROT_READ | PROT_EXEC);
+    }
+    return true;
+}
diff --git a/ddprof-lib/src/main/cpp/trap.h b/ddprof-lib/src/main/cpp/trap.h
new file mode 100644
index 00000000..97620e44
--- /dev/null
+++ b/ddprof-lib/src/main/cpp/trap.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright The async-profiler authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#ifndef _TRAP_H
+#define _TRAP_H
+
+#include <stdint.h>
+#include "arch.h"
+
+
+const int TRAP_COUNT = 4;
+
+
+class Trap {
+  private:
+    int _id;
+    bool _unprotect;
+    bool _protect;
+    uintptr_t _entry;
+    instruction_t _breakpoint_insn;
+    instruction_t _saved_insn;
+
+    bool patch(instruction_t insn);
+
+    static uintptr_t _page_start[TRAP_COUNT];
+
+  public:
+    Trap(int id) : _id(id), _unprotect(true), _protect(WX_MEMORY), _entry(0), _breakpoint_insn(BREAKPOINT) {
+    }
+
+    uintptr_t entry() {
+        return _entry;
+    }
+
+    bool covers(uintptr_t pc) {
+        // PC points either to BREAKPOINT instruction or to the next one
+        return pc - _entry <= sizeof(instruction_t);
+    }
+
+    void assign(const void* address, uintptr_t offset = BREAKPOINT_OFFSET);
+    void pair(Trap& second);
+
+    bool install() {
+        return _entry == 0 || patch(_breakpoint_insn);
+    }
+
+    bool uninstall() {
+        return _entry == 0 || patch(_saved_insn);
+    }
+
+    static bool isFaultInstruction(uintptr_t pc);
+};
+
+#endif // _TRAP_H
diff --git a/ddprof-lib/src/main/cpp/tsc.cpp b/ddprof-lib/src/main/cpp/tsc.cpp
new file mode 100644
index 00000000..8811d443
--- /dev/null
+++ b/ddprof-lib/src/main/cpp/tsc.cpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright The async-profiler authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <jvmti.h>
+#include "tsc.h"
+#include "vmEntry.h"
+
+
+bool TSC::_initialized = false;
+bool TSC::_available = false;
+bool TSC::_enabled = false;
+u64 TSC::_offset = 0;
+u64 TSC::_frequency = NANOTIME_FREQ;
+
+void TSC::enable(Clock clock) {
+    if (!TSC_SUPPORTED || clock == CLK_MONOTONIC) {
+        _enabled = false;
+        return;
+    }
+
+    if (!_initialized) {
+        if (VM::loaded()) {
+            JNIEnv* env = VM::jni();
+
+            jfieldID jvm;
+            jmethodID getTicksFrequency, counterTime;
+            jclass cls = env->FindClass("jdk/jfr/internal/JVM");
+            if (cls != NULL
+                    && ((jvm = env->GetStaticFieldID(cls, "jvm", "Ljdk/jfr/internal/JVM;")) != NULL)
+                    && ((getTicksFrequency = env->GetMethodID(cls, "getTicksFrequency", "()J")) != NULL)
+                    && ((counterTime = env->GetStaticMethodID(cls, "counterTime", "()J")) != NULL)) {
+                u64 frequency = env->CallLongMethod(env->GetStaticObjectField(cls, jvm), getTicksFrequency);
+                if (frequency > NANOTIME_FREQ) {
+                    // Default 1GHz frequency might mean that rdtsc is not available
+                    u64 jvm_ticks = env->CallStaticLongMethod(cls, counterTime);
+                    _offset = rdtsc() - jvm_ticks;
+                    _frequency = frequency;
+                    _available = true;
+                }
+            }
+
+            env->ExceptionClear();
+        } else if (cpuHasGoodTimestampCounter()) {
+            _offset = 0;
+            _available = true;
+        }
+
+        _initialized = true;
+    }
+
+    _enabled = _available;
+}
diff --git a/ddprof-lib/src/main/cpp/tsc.h b/ddprof-lib/src/main/cpp/tsc.h
new file mode 100644
index 00000000..1874b9ed
--- /dev/null
+++ b/ddprof-lib/src/main/cpp/tsc.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright The async-profiler authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#ifndef _TSC_H
+#define _TSC_H
+
+#include "arguments.h"
+#include "os.h"
+
+
+const u64 NANOTIME_FREQ = 1000000000;
+
+
+#if defined(__x86_64__) || defined(__i386__)
+
+#include <cpuid.h>
+
+#define TSC_SUPPORTED true
+
+static inline u64 rdtsc() {
+#if defined(__x86_64__)
+    u32 lo, hi;
+    asm volatile("rdtsc" : "=a" (lo), "=d" (hi));
+    return ((u64)hi << 32) | lo;
+#else
+    u64 result;
+    asm volatile("rdtsc" : "=A" (result));
+    return result;
+#endif
+}
+
+// Returns true if this CPU has a good ("invariant") timestamp counter
+static bool cpuHasGoodTimestampCounter() {
+    unsigned int eax, ebx, ecx, edx;
+
+    // Check if CPUID supports misc feature flags
+    __cpuid(0x80000000, eax, ebx, ecx, edx);
+    if (eax < 0x80000007) {
+        return 0;
+    }
+
+    // Get misc feature flags
+    __cpuid(0x80000007, eax, ebx, ecx, edx);
+
+    // Bit 8 of EDX indicates invariant TSC
+    return (edx & (1 << 8)) != 0;
+}
+
+#elif defined(__aarch64__)
+
+#define TSC_SUPPORTED true
+
+static inline u64 rdtsc() {
+    u64 value;
+    asm volatile("mrs %0, cntvct_el0" : "=r"(value));
+    return value;
+}
+
+static bool cpuHasGoodTimestampCounter() {
+    // AARCH64 always has a good timestamp counter.
+    return true;
+}
+
+#else
+
+#define TSC_SUPPORTED false
+#define rdtsc() 0
+
+static bool cpuHasGoodTimestampCounter() {
+    return false;
+}
+
+#endif
+
+
+class TSC {
+  private:
+    static bool _initialized;
+    static bool _available;
+    static bool _enabled;
+    static u64 _offset;
+    static u64 _frequency;
+
+  public:
+    static void enable(Clock clock);
+
+    static bool enabled() {
+        return TSC_SUPPORTED && _enabled;
+    }
+
+    static u64 ticks() {
+        return enabled() ? rdtsc() - _offset : OS::nanotime();
+    }
+
+    // Ticks per second.
+    // When using the TSC with no JVM, since there is no calibration,
+    // this function will return an incorrect value.
+    static u64 frequency() {
+        return enabled() ? _frequency : NANOTIME_FREQ;
+    }
+};
+
+#endif // _TSC_H
diff --git a/ddprof-lib/src/main/cpp/vmStructs.cpp b/ddprof-lib/src/main/cpp/vmStructs.cpp
new file mode 100644
index 00000000..e7f3b66d
--- /dev/null
+++ b/ddprof-lib/src/main/cpp/vmStructs.cpp
@@ -0,0 +1,762 @@
+/*
+ * Copyright The async-profiler authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <pthread.h>
+#include <unistd.h>
+#include "vmStructs.h"
+#include "vmEntry.h"
+#include "j9Ext.h"
+#include "safeAccess.h"
+
+
+CodeCache* VMStructs::_libjvm = NULL;
+
+bool VMStructs::_has_class_names = false;
+bool VMStructs::_has_method_structs = false;
+bool VMStructs::_has_compiler_structs = false;
+bool VMStructs::_has_stack_structs = false;
+bool VMStructs::_has_class_loader_data = false;
+bool VMStructs::_has_native_thread_id = false;
+bool VMStructs::_has_perm_gen = false;
+bool VMStructs::_can_dereference_jmethod_id = false;
+bool VMStructs::_compact_object_headers = false;
+
+int VMStructs::_klass_name_offset = -1;
+int VMStructs::_symbol_length_offset = -1;
+int VMStructs::_symbol_length_and_refcount_offset = -1;
+int VMStructs::_symbol_body_offset = -1;
+int VMStructs::_oop_klass_offset = -1;
+int VMStructs::_class_loader_data_offset = -1;
+int VMStructs::_class_loader_data_next_offset = -1;
+int VMStructs::_methods_offset = -1;
+int VMStructs::_jmethod_ids_offset = -1;
+int VMStructs::_thread_osthread_offset = -1;
+int VMStructs::_thread_anchor_offset = -1;
+int VMStructs::_thread_state_offset = -1;
+int VMStructs::_thread_vframe_offset = -1;
+int VMStructs::_thread_exception_offset = -1;
+int VMStructs::_osthread_id_offset = -1;
+int VMStructs::_call_wrapper_anchor_offset = -1;
+int VMStructs::_comp_env_offset = -1;
+int VMStructs::_comp_task_offset = -1;
+int VMStructs::_comp_method_offset = -1;
+int VMStructs::_anchor_sp_offset = -1;
+int VMStructs::_anchor_pc_offset = -1;
+int VMStructs::_anchor_fp_offset = -1;
+int VMStructs::_blob_size_offset = -1;
+int VMStructs::_frame_size_offset = -1;
+int VMStructs::_frame_complete_offset = -1;
+int VMStructs::_code_offset = -1;
+int VMStructs::_data_offset = -1;
+int VMStructs::_mutable_data_offset = -1;
+int VMStructs::_relocation_size_offset = -1;
+int VMStructs::_scopes_pcs_offset = -1;
+int VMStructs::_scopes_data_offset = -1;
+int VMStructs::_nmethod_name_offset = -1;
+int VMStructs::_nmethod_method_offset = -1;
+int VMStructs::_nmethod_entry_offset = -1;
+int VMStructs::_nmethod_state_offset = -1;
+int VMStructs::_nmethod_level_offset = -1;
+int VMStructs::_nmethod_metadata_offset = -1;
+int VMStructs::_nmethod_immutable_offset = -1;
+int VMStructs::_method_constmethod_offset = -1;
+int VMStructs::_method_code_offset = -1;
+int VMStructs::_constmethod_constants_offset = -1;
+int VMStructs::_constmethod_idnum_offset = -1;
+int VMStructs::_constmethod_size = -1;
+int VMStructs::_pool_holder_offset = -1;
+int VMStructs::_array_len_offset = 0;
+int VMStructs::_array_data_offset = -1;
+int VMStructs::_code_heap_memory_offset = -1;
+int VMStructs::_code_heap_segmap_offset = -1;
+int VMStructs::_code_heap_segment_shift = -1;
+int VMStructs::_heap_block_used_offset = -1;
+int VMStructs::_vs_low_bound_offset = -1;
+int VMStructs::_vs_high_bound_offset = -1;
+int VMStructs::_vs_low_offset = -1;
+int VMStructs::_vs_high_offset = -1;
+int VMStructs::_flag_name_offset = -1;
+int VMStructs::_flag_addr_offset = -1;
+int VMStructs::_flag_origin_offset = -1;
+const char* VMStructs::_flags_addr = NULL;
+int VMStructs::_flag_count = 0;
+int VMStructs::_flag_size = 0;
+char* VMStructs::_code_heap[3] = {};
+const void* VMStructs::_code_heap_low = NO_MIN_ADDRESS;
+const void* VMStructs::_code_heap_high = NO_MAX_ADDRESS;
+char** VMStructs::_code_heap_addr = NULL;
+const void** VMStructs::_code_heap_low_addr = NULL;
+const void** VMStructs::_code_heap_high_addr = NULL;
+int* VMStructs::_klass_offset_addr = NULL;
+char** VMStructs::_narrow_klass_base_addr = NULL;
+char* VMStructs::_narrow_klass_base = NULL;
+int* VMStructs::_narrow_klass_shift_addr = NULL;
+int VMStructs::_narrow_klass_shift = -1;
+char** VMStructs::_collected_heap_addr = NULL;
+char* VMStructs::_collected_heap = NULL;
+int VMStructs::_collected_heap_reserved_offset = -1;
+int VMStructs::_region_start_offset = -1;
+int VMStructs::_region_size_offset = -1;
+int VMStructs::_markword_klass_shift = -1;
+int VMStructs::_markword_monitor_value = -1;
+int VMStructs::_entry_frame_call_wrapper_offset = -1;
+int VMStructs::_interpreter_frame_bcp_offset = 0;
+unsigned char VMStructs::_unsigned5_base = 0;
+const void** VMStructs::_call_stub_return_addr = NULL;
+const void* VMStructs::_call_stub_return = NULL;
+const void* VMStructs::_interpreted_frame_valid_start = NULL;
+const void* VMStructs::_interpreted_frame_valid_end = NULL;
+
+jfieldID VMStructs::_eetop;
+jfieldID VMStructs::_tid;
+jfieldID VMStructs::_klass = NULL;
+int VMStructs::_tls_index = -1;
+intptr_t VMStructs::_env_offset = -1;
+void* VMStructs::_java_thread_vtbl[6];
+
+VMStructs::LockFunc VMStructs::_lock_func;
+VMStructs::LockFunc VMStructs::_unlock_func;
+
+
+uintptr_t VMStructs::readSymbol(const char* symbol_name) {
+    const void* symbol = _libjvm->findSymbol(symbol_name);
+    if (symbol == NULL) {
+        // Avoid JVM crash in case of missing symbols
+        return 0;
+    }
+    return *(uintptr_t*)symbol;
+}
+
+// Run at agent load time
+void VMStructs::init(CodeCache* libjvm) {
+    if (libjvm != NULL) {
+        _libjvm = libjvm;
+        initOffsets();
+        initJvmFunctions();
+    }
+}
+
+// Run when VM is initialized and JNI is available
+void VMStructs::ready() {
+    resolveOffsets();
+    patchSafeFetch();
+    initThreadBridge();
+}
+
+void VMStructs::initOffsets() {
+    uintptr_t entry = readSymbol("gHotSpotVMStructs");
+    uintptr_t stride = readSymbol("gHotSpotVMStructEntryArrayStride");
+    uintptr_t type_offset = readSymbol("gHotSpotVMStructEntryTypeNameOffset");
+    uintptr_t field_offset = readSymbol("gHotSpotVMStructEntryFieldNameOffset");
+    uintptr_t offset_offset = readSymbol("gHotSpotVMStructEntryOffsetOffset");
+    uintptr_t address_offset = readSymbol("gHotSpotVMStructEntryAddressOffset");
+
+    if (entry != 0 && stride != 0) {
+        for (;; entry += stride) {
+            const char* type = *(const char**)(entry + type_offset);
+            const char* field = *(const char**)(entry + field_offset);
+            if (type == NULL || field == NULL) {
+                break;
+            }
+
+            if (strcmp(type, "Klass") == 0) {
+                if (strcmp(field, "_name") == 0) {
+                    _klass_name_offset = *(int*)(entry + offset_offset);
+                }
+            } else if (strcmp(type, "Symbol") == 0) {
+                if (strcmp(field, "_length") == 0) {
+                    _symbol_length_offset = *(int*)(entry + offset_offset);
+                } else if (strcmp(field, "_length_and_refcount") == 0) {
+                    _symbol_length_and_refcount_offset = *(int*)(entry + offset_offset);
+                } else if (strcmp(field, "_body") == 0) {
+                    _symbol_body_offset = *(int*)(entry + offset_offset);
+                }
+            } else if (strcmp(type, "oopDesc") == 0) {
+                if (strcmp(field, "_metadata._klass") == 0) {
+                    _oop_klass_offset = *(int*)(entry + offset_offset);
+                }
+            } else if (strcmp(type, "Universe") == 0 || strcmp(type, "CompressedKlassPointers") == 0) {
+                if (strcmp(field, "_narrow_klass._base") == 0 || strcmp(field, "_base") == 0) {
+                    _narrow_klass_base_addr = *(char***)(entry + address_offset);
+                } else if (strcmp(field, "_narrow_klass._shift") == 0 || strcmp(field, "_shift") == 0) {
+                    _narrow_klass_shift_addr = *(int**)(entry + address_offset);
+                } else if (strcmp(field, "_collectedHeap") == 0) {
+                    _collected_heap_addr = *(char***)(entry + address_offset);
+                }
+            } else if (strcmp(type, "CollectedHeap") == 0) {
+                if (strcmp(field, "_reserved") == 0) {
+                    _collected_heap_reserved_offset = *(int*)(entry + offset_offset);
+                }
+            } else if (strcmp(type, "MemRegion") == 0) {
+                if (strcmp(field, "_start") == 0) {
+                    _region_start_offset = *(int*)(entry + offset_offset);
+                } else if (strcmp(field, "_word_size") == 0) {
+                    _region_size_offset = *(int*)(entry + offset_offset);
+                }
+            } else if (strcmp(type, "CompiledMethod") == 0 || strcmp(type, "nmethod") == 0) {
+                if (strcmp(field, "_method") == 0) {
+                    _nmethod_method_offset = *(int*)(entry + offset_offset);
+                } else if (strcmp(field, "_verified_entry_offset") == 0) {
+                    _nmethod_entry_offset = *(int*)(entry + offset_offset);
+                } else if (strcmp(field, "_verified_entry_point") == 0) {
+                    _nmethod_entry_offset = - *(int*)(entry + offset_offset);
+                } else if (strcmp(field, "_state") == 0) {
+                    _nmethod_state_offset = *(int*)(entry + offset_offset);
+                } else if (strcmp(field, "_comp_level") == 0) {
+                    _nmethod_level_offset = *(int*)(entry + offset_offset);
+                } else if (strcmp(field, "_metadata_offset") == 0) {
+                    _nmethod_metadata_offset = *(int*)(entry + offset_offset);
+                } else if (strcmp(field, "_immutable_data") == 0) {
+                    _nmethod_immutable_offset = *(int*)(entry + offset_offset);
+                } else if (strcmp(field, "_scopes_pcs_offset") == 0) {
+                    _scopes_pcs_offset = *(int*)(entry + offset_offset);
+                } else if (strcmp(field, "_scopes_data_offset") == 0) {
+                    _scopes_data_offset = *(int*)(entry + offset_offset);
+                } else if (strcmp(field, "_scopes_data_begin") == 0) {
+                    _scopes_data_offset = - *(int*)(entry + offset_offset);
+                }
+            } else if (strcmp(type, "Method") == 0) {
+                if (strcmp(field, "_constMethod") == 0) {
+                    _method_constmethod_offset = *(int*)(entry + offset_offset);
+                } else if (strcmp(field, "_code") == 0) {
+                    _method_code_offset = *(int*)(entry + offset_offset);
+                }
+            } else if (strcmp(type, "ConstMethod") == 0) {
+                if (strcmp(field, "_constants") == 0) {
+                    _constmethod_constants_offset = *(int*)(entry + offset_offset);
+                } else if (strcmp(field, "_method_idnum") == 0) {
+                    _constmethod_idnum_offset = *(int*)(entry + offset_offset);
+                }
+            } else if (strcmp(type, "ConstantPool") == 0) {
+                if (strcmp(field, "_pool_holder") == 0) {
+                    _pool_holder_offset = *(int*)(entry + offset_offset);
+                }
+            } else if (strcmp(type, "InstanceKlass") == 0) {
+                if (strcmp(field, "_class_loader_data") == 0) {
+                    _class_loader_data_offset = *(int*)(entry + offset_offset);
+                } else if (strcmp(field, "_methods") == 0) {
+                    _methods_offset = *(int*)(entry + offset_offset);
+                } else if (strcmp(field, "_methods_jmethod_ids") == 0) {
+                    _jmethod_ids_offset = *(int*)(entry + offset_offset);
+                }
+            } else if (strcmp(type, "ClassLoaderData") == 0) {
+                if (strcmp(field, "_next") == 0) {
+                    _class_loader_data_next_offset = *(int*)(entry + offset_offset);
+                }
+            } else if (strcmp(type, "java_lang_Class") == 0) {
+                if (strcmp(field, "_klass_offset") == 0) {
+                    _klass_offset_addr = *(int**)(entry + address_offset);
+                }
+            } else if (strcmp(type, "Thread") == 0) {
+                // Since JDK 25, _osthread field belongs to Thread rather than JavaThread
+                if (strcmp(field, "_osthread") == 0) {
+                    _thread_osthread_offset = *(int*)(entry + offset_offset);
+                }
+            } else if (strcmp(type, "JavaThread") == 0) {
+                if (strcmp(field, "_osthread") == 0) {
+                    _thread_osthread_offset = *(int*)(entry + offset_offset);
+                } else if (strcmp(field, "_anchor") == 0) {
+                    _thread_anchor_offset = *(int*)(entry + offset_offset);
+                } else if (strcmp(field, "_thread_state") == 0) {
+                    _thread_state_offset = *(int*)(entry + offset_offset);
+                } else if (strcmp(field, "_vframe_array_head") == 0) {
+                    _thread_vframe_offset = *(int*)(entry + offset_offset);
+                }
+            } else if (strcmp(type, "ThreadShadow") == 0) {
+                if (strcmp(field, "_exception_file") == 0) {
+                    _thread_exception_offset = *(int*)(entry + offset_offset);
+                }
+            } else if (strcmp(type, "OSThread") == 0) {
+                if (strcmp(field, "_thread_id") == 0) {
+                    _osthread_id_offset = *(int*)(entry + offset_offset);
+                }
+            } else if (strcmp(type, "CompilerThread") == 0) {
+                if (strcmp(field, "_env") == 0) {
+                    _comp_env_offset = *(int*)(entry + offset_offset);
+                }
+            } else if (strcmp(type, "ciEnv") == 0) {
+                if (strcmp(field, "_task") == 0) {
+                    _comp_task_offset = *(int*)(entry + offset_offset);
+                }
+            } else if (strcmp(type, "CompileTask") == 0) {
+                if (strcmp(field, "_method") == 0) {
+                    _comp_method_offset = *(int*)(entry + offset_offset);
+                }
+            } else if (strcmp(type, "JavaCallWrapper") == 0) {
+                if (strcmp(field, "_anchor") == 0) {
+                    _call_wrapper_anchor_offset = *(int*)(entry + offset_offset);
+                }
+            } else if (strcmp(type, "JavaFrameAnchor") == 0) {
+                if (strcmp(field, "_last_Java_sp") == 0) {
+                    _anchor_sp_offset = *(int*)(entry + offset_offset);
+                } else if (strcmp(field, "_last_Java_pc") == 0) {
+                    _anchor_pc_offset = *(int*)(entry + offset_offset);
+                } else if (strcmp(field, "_last_Java_fp") == 0) {
+                    _anchor_fp_offset = *(int*)(entry + offset_offset);
+                }
+            } else if (strcmp(type, "CodeBlob") == 0) {
+                if (strcmp(field, "_size") == 0) {
+                    _blob_size_offset = *(int*)(entry + offset_offset);
+                } else if (strcmp(field, "_frame_size") == 0) {
+                    _frame_size_offset = *(int*)(entry + offset_offset);
+                } else if (strcmp(field, "_frame_complete_offset") == 0) {
+                    _frame_complete_offset = *(int*)(entry + offset_offset);
+                } else if (strcmp(field, "_code_offset") == 0) {
+                    _code_offset = *(int*)(entry + offset_offset);
+                } else if (strcmp(field, "_code_begin") == 0) {
+                    _code_offset = - *(int*)(entry + offset_offset);
+                } else if (strcmp(field, "_data_offset") == 0) {
+                    _data_offset = *(int*)(entry + offset_offset);
+                } else if (strcmp(field, "_mutable_data") == 0) {
+                    _mutable_data_offset = *(int*)(entry + offset_offset);
+                } else if (strcmp(field, "_relocation_size") == 0) {
+                    _relocation_size_offset = *(int*)(entry + offset_offset);
+                } else if (strcmp(field, "_name") == 0) {
+                    _nmethod_name_offset = *(int*)(entry + offset_offset);
+                }
+            } else if (strcmp(type, "CodeCache") == 0) {
+                if (strcmp(field, "_heap") == 0) {
+                    _code_heap_addr = *(char***)(entry + address_offset);
+                } else if (strcmp(field, "_heaps") == 0) {
+                    _code_heap_addr = *(char***)(entry + address_offset);
+                } else if (strcmp(field, "_low_bound") == 0) {
+                    _code_heap_low_addr = *(const void***)(entry + address_offset);
+                } else if (strcmp(field, "_high_bound") == 0) {
+                    _code_heap_high_addr = *(const void***)(entry + address_offset);
+                }
+            } else if (strcmp(type, "CodeHeap") == 0) {
+                if (strcmp(field, "_memory") == 0) {
+                    _code_heap_memory_offset = *(int*)(entry + offset_offset);
+                } else if (strcmp(field, "_segmap") == 0) {
+                    _code_heap_segmap_offset = *(int*)(entry + offset_offset);
+                } else if (strcmp(field, "_log2_segment_size") == 0) {
+                    _code_heap_segment_shift = *(int*)(entry + offset_offset);
+                }
+            } else if (strcmp(type, "HeapBlock::Header") == 0) {
+                if (strcmp(field, "_used") == 0) {
+                    _heap_block_used_offset = *(int*)(entry + offset_offset);
+                }
+            } else if (strcmp(type, "VirtualSpace") == 0) {
+                if (strcmp(field, "_low_boundary") == 0) {
+                    _vs_low_bound_offset = *(int*)(entry + offset_offset);
+                } else if (strcmp(field, "_high_boundary") == 0) {
+                    _vs_high_bound_offset = *(int*)(entry + offset_offset);
+                } else if (strcmp(field, "_low") == 0) {
+                    _vs_low_offset = *(int*)(entry + offset_offset);
+                } else if (strcmp(field, "_high") == 0) {
+                    _vs_high_offset = *(int*)(entry + offset_offset);
+                }
+            } else if (strcmp(type, "StubRoutines") == 0) {
+                if (strcmp(field, "_call_stub_return_address") == 0) {
+                    _call_stub_return_addr = *(const void***)(entry + address_offset);
+                }
+            } else if (strcmp(type, "GrowableArrayBase") == 0 || strcmp(type, "GenericGrowableArray") == 0) {
+                if (strcmp(field, "_len") == 0) {
+                    _array_len_offset = *(int*)(entry + offset_offset);
+                }
+            } else if (strcmp(type, "GrowableArray<int>") == 0) {
+                if (strcmp(field, "_data") == 0) {
+                    _array_data_offset = *(int*)(entry + offset_offset);
+                }
+            } else if (strcmp(type, "JVMFlag") == 0 || strcmp(type, "Flag") == 0) {
+                if (strcmp(field, "_name") == 0 || strcmp(field, "name") == 0) {
+                    _flag_name_offset = *(int*)(entry + offset_offset);
+                } else if (strcmp(field, "_addr") == 0 || strcmp(field, "addr") == 0) {
+                    _flag_addr_offset = *(int*)(entry + offset_offset);
+                } else if (strcmp(field, "_flags") == 0 || strcmp(field, "origin") == 0) {
+                    _flag_origin_offset = *(int*)(entry + offset_offset);
+                } else if (strcmp(field, "flags") == 0) {
+                    _flags_addr = **(char***)(entry + address_offset);
+                } else if (strcmp(field, "numFlags") == 0) {
+                    _flag_count = **(int**)(entry + address_offset);
+                }
+            } else if (strcmp(type, "PcDesc") == 0) {
+                // TODO
+            } else if (strcmp(type, "PermGen") == 0) {
+                _has_perm_gen = true;
+            }
+        }
+    }
+
+    entry = readSymbol("gHotSpotVMTypes");
+    stride = readSymbol("gHotSpotVMTypeEntryArrayStride");
+    type_offset = readSymbol("gHotSpotVMTypeEntryTypeNameOffset");
+    uintptr_t size_offset = readSymbol("gHotSpotVMTypeEntrySizeOffset");
+
+    if (entry != 0 && stride != 0) {
+        for (;; entry += stride) {
+            const char* type = *(const char**)(entry + type_offset);
+            if (type == NULL) {
+                break;
+            }
+
+            if (strcmp(type, "JVMFlag") == 0 || strcmp(type, "Flag") == 0) {
+                _flag_size = *(int*)(entry + size_offset);
+            } else if (strcmp(type, "ConstMethod") == 0) {
+                _constmethod_size = *(int*)(entry + size_offset);
+            }
+        }
+    }
+
+    entry = readSymbol("gHotSpotVMLongConstants");
+    stride = readSymbol("gHotSpotVMLongConstantEntryArrayStride");
+    uintptr_t name_offset = readSymbol("gHotSpotVMLongConstantEntryNameOffset");
+    uintptr_t value_offset = readSymbol("gHotSpotVMLongConstantEntryValueOffset");
+
+    if (entry != 0 && stride != 0) {
+        for (;; entry += stride) {
+            const char* name = *(const char**)(entry + name_offset);
+            if (name == NULL) {
+                break;
+            }
+
+            if (strncmp(name, "markWord::", 10) == 0) {
+                if (strcmp(name + 10, "klass_shift") == 0) {
+                    _markword_klass_shift = *(long*)(entry + value_offset);
+                } else if (strcmp(name + 10, "monitor_value") == 0) {
+                    _markword_monitor_value = *(long*)(entry + value_offset);
+                }
+            }
+        }
+    }
+
+    entry = readSymbol("gHotSpotVMIntConstants");
+    stride = readSymbol("gHotSpotVMIntConstantEntryArrayStride");
+    name_offset = readSymbol("gHotSpotVMIntConstantEntryNameOffset");
+    value_offset = readSymbol("gHotSpotVMIntConstantEntryValueOffset");
+
+    if (entry != 0 && stride != 0) {
+        for (;; entry += stride) {
+            const char* name = *(const char**)(entry + name_offset);
+            if (name == NULL) {
+                break;
+            }
+
+            if (strcmp(name, "frame::entry_frame_call_wrapper_offset") == 0) {
+                _entry_frame_call_wrapper_offset = *(int*)(entry + value_offset) * sizeof(uintptr_t);
+                break;  // remove it for reading more constants
+            }
+        }
+    }
+}
+
+void VMStructs::resolveOffsets() {
+    if (VM::isOpenJ9() || VM::isZing()) {
+        return;
+    }
+
+    if (_klass_offset_addr != NULL) {
+        _klass = (jfieldID)(uintptr_t)(*_klass_offset_addr << 2 | 2);
+    }
+
+    JVMFlag* ccp = JVMFlag::find("UseCompressedClassPointers");
+    if (ccp != NULL && ccp->get() && _narrow_klass_base_addr != NULL && _narrow_klass_shift_addr != NULL) {
+        _narrow_klass_base = *_narrow_klass_base_addr;
+        _narrow_klass_shift = *_narrow_klass_shift_addr;
+    }
+
+    JVMFlag* coh = JVMFlag::find("UseCompactObjectHeaders");
+    if (coh != NULL && coh->get()) {
+        _compact_object_headers = true;
+    }
+
+    _has_class_names = _klass_name_offset >= 0
+            && (_compact_object_headers ? (_markword_klass_shift >= 0 && _markword_monitor_value == MONITOR_BIT)
+                                        : _oop_klass_offset >= 0)
+            && (_symbol_length_offset >= 0 || _symbol_length_and_refcount_offset >= 0)
+            && _symbol_body_offset >= 0
+            && _klass != NULL;
+
+    _has_method_structs = _jmethod_ids_offset >= 0
+            && _nmethod_method_offset >= 0
+            && _nmethod_entry_offset != -1
+            && _nmethod_state_offset >= 0
+            && _method_constmethod_offset >= 0
+            && _method_code_offset >= 0
+            && _constmethod_constants_offset >= 0
+            && _constmethod_idnum_offset >= 0
+            && _constmethod_size >= 0
+            && _pool_holder_offset >= 0;
+
+    _has_compiler_structs = _comp_env_offset >= 0
+            && _comp_task_offset >= 0
+            && _comp_method_offset >= 0;
+
+    _has_class_loader_data = _class_loader_data_offset >= 0
+            && _class_loader_data_next_offset == sizeof(uintptr_t) * 8 + 8
+            && _methods_offset >= 0
+            && _klass != NULL
+            && _lock_func != NULL && _unlock_func != NULL;
+
+#if defined(__x86_64__) || defined(__i386__)
+    _interpreter_frame_bcp_offset = VM::hotspot_version() >= 11 ? -8 : VM::hotspot_version() == 8 ? -7 : 0;
+#elif defined(__aarch64__)
+    _interpreter_frame_bcp_offset = VM::hotspot_version() >= 11 ? -9 : VM::hotspot_version() == 8 ? -7 : 0;
+    // The constant is missing on ARM, but fortunately, it has been stable for years across all JDK versions
+    _entry_frame_call_wrapper_offset = -64;
+#elif defined(__arm__) || defined(__thumb__)
+    _interpreter_frame_bcp_offset = VM::hotspot_version() >= 11 ? -8 : 0;
+    _entry_frame_call_wrapper_offset = 0;
+#endif
+
+    // JDK-8292758 has slightly changed ScopeDesc encoding
+    if (VM::hotspot_version() >= 20) {
+        _unsigned5_base = 1;
+    }
+
+    if (_call_stub_return_addr != NULL) {
+        _call_stub_return = *_call_stub_return_addr;
+    }
+
+    // Since JDK 23, _metadata_offset is relative to _data_offset. See metadata()
+    if (_nmethod_immutable_offset < 0) {
+        _data_offset = 0;
+    }
+
+    _has_stack_structs = _has_method_structs
+            && _call_wrapper_anchor_offset >= 0
+            && _entry_frame_call_wrapper_offset != -1
+            && _interpreter_frame_bcp_offset != 0
+            && _code_offset != -1
+            && _data_offset >= 0
+            && _scopes_data_offset != -1
+            && _scopes_pcs_offset >= 0
+            && ((_mutable_data_offset >= 0 && _relocation_size_offset >= 0) || _nmethod_metadata_offset >= 0)
+            && _thread_vframe_offset >= 0
+            && _thread_exception_offset >= 0
+            && _constmethod_size >= 0;
+
+    // Since JDK-8268406, it is no longer possible to get VMMethod* by dereferencing jmethodID
+    _can_dereference_jmethod_id = _has_method_structs && VM::hotspot_version() <= 25;
+
+    if (_code_heap_addr != NULL && _code_heap_low_addr != NULL && _code_heap_high_addr != NULL) {
+        char* code_heaps = *_code_heap_addr;
+        unsigned int code_heap_count = *(unsigned int*)(code_heaps + _array_len_offset);
+        if (code_heap_count <= 3 && _array_data_offset >= 0) {
+            char* code_heap_array = *(char**)(code_heaps + _array_data_offset);
+            memcpy(_code_heap, code_heap_array, code_heap_count * sizeof(_code_heap[0]));
+        }
+        _code_heap_low = *_code_heap_low_addr;
+        _code_heap_high = *_code_heap_high_addr;
+    } else if (_code_heap_addr != NULL && _code_heap_memory_offset >= 0) {
+        _code_heap[0] = *_code_heap_addr;
+        _code_heap_low = *(const void**)(_code_heap[0] + _code_heap_memory_offset + _vs_low_bound_offset);
+        _code_heap_high = *(const void**)(_code_heap[0] + _code_heap_memory_offset + _vs_high_bound_offset);
+    }
+
+    // Invariant: _code_heap[i] != NULL iff all CodeHeap structures are available
+    if (_code_heap[0] != NULL && _code_heap_segment_shift >= 0) {
+        _code_heap_segment_shift = *(int*)(_code_heap[0] + _code_heap_segment_shift);
+    }
+    if (_code_heap_memory_offset < 0 || _code_heap_segmap_offset < 0 ||
+        _code_heap_segment_shift < 0 || _code_heap_segment_shift > 16 ||
+        _heap_block_used_offset < 0) {
+        memset(_code_heap, 0, sizeof(_code_heap));
+    }
+
+    if (_collected_heap_addr != NULL && _collected_heap_reserved_offset >= 0 &&
+        _region_start_offset >= 0 && _region_size_offset >= 0) {
+        _collected_heap = *_collected_heap_addr + _collected_heap_reserved_offset;
+    }
+}
+
+void VMStructs::initJvmFunctions() {
+    if (VM::hotspot_version() == 8) {
+        _lock_func = (LockFunc)_libjvm->findSymbol("_ZN7Monitor28lock_without_safepoint_checkEv");
+        _unlock_func = (LockFunc)_libjvm->findSymbol("_ZN7Monitor6unlockEv");
+    }
+
+    if (VM::hotspot_version() > 0) {
+        CodeBlob* blob = _libjvm->findBlob("_ZNK5frame26is_interpreted_frame_validEP10JavaThread");
+        if (blob != NULL) {
+            _interpreted_frame_valid_start = blob->_start;
+            _interpreted_frame_valid_end = blob->_end;
+        }
+    }
+}
+
+void VMStructs::patchSafeFetch() {
+    // Workarounds for JDK-8307549 and JDK-8321116
+    if (WX_MEMORY && VM::hotspot_version() == 17) {
+        void** entry = (void**)_libjvm->findSymbol("_ZN12StubRoutines18_safefetch32_entryE");
+        if (entry != NULL) {
+            *entry = (void*)SafeAccess::load32;
+        }
+    } else if (WX_MEMORY && VM::hotspot_version() == 11) {
+        void** entry = (void**)_libjvm->findSymbol("_ZN12StubRoutines17_safefetchN_entryE");
+        if (entry != NULL) {
+            *entry = (void*)SafeAccess::load;
+        }
+    }
+}
+
+void VMStructs::initTLS(void* vm_thread) {
+    for (int i = 0; i < 1024; i++) {
+        if (pthread_getspecific((pthread_key_t)i) == vm_thread) {
+            _tls_index = i;
+            break;
+        }
+    }
+}
+
+void VMStructs::initThreadBridge() {
+    jthread thread;
+    if (VM::jvmti()->GetCurrentThread(&thread) != 0) {
+        return;
+    }
+
+    JNIEnv* env = VM::jni();
+    jclass thread_class = env->FindClass("java/lang/Thread");
+    if (thread_class == NULL || (_tid = env->GetFieldID(thread_class, "tid", "J")) == NULL) {
+        env->ExceptionClear();
+        return;
+    }
+
+    if (VM::isOpenJ9()) {
+        void* j9thread = J9Ext::j9thread_self();
+        if (j9thread != NULL) {
+            initTLS(j9thread);
+        }
+    } else {
+        // Get eetop field - a bridge from Java Thread to VMThread
+        if ((_eetop = env->GetFieldID(thread_class, "eetop", "J")) == NULL) {
+            // No such field - probably not a HotSpot JVM
+            env->ExceptionClear();
+            return;
+        }
+
+        VMThread* vm_thread = VMThread::fromJavaThread(env, thread);
+        if (vm_thread != NULL) {
+            _has_native_thread_id = _thread_osthread_offset >= 0 && _osthread_id_offset >= 0;
+            initTLS(vm_thread);
+            _env_offset = (intptr_t)env - (intptr_t)vm_thread;
+            memcpy(_java_thread_vtbl, vm_thread->vtable(), sizeof(_java_thread_vtbl));
+        }
+    }
+}
+
+VMThread* VMThread::current() {
+    return _tls_index >= 0 ? (VMThread*)pthread_getspecific((pthread_key_t)_tls_index) : NULL;
+}
+
+int VMThread::nativeThreadId(JNIEnv* jni, jthread thread) {
+    if (_has_native_thread_id) {
+        VMThread* vm_thread = fromJavaThread(jni, thread);
+        return vm_thread != NULL ? vm_thread->osThreadId() : -1;
+    }
+    return VM::isOpenJ9() ? J9Ext::GetOSThreadID(thread) : -1;
+}
+
+int VMThread::osThreadId() {
+    const char* osthread = *(const char**) at(_thread_osthread_offset);
+    if (osthread != NULL) {
+        // Java thread may be in the middle of termination, and its osthread structure just released
+        return SafeAccess::load32((int32_t*)(osthread + _osthread_id_offset), -1);
+    }
+    return -1;
+}
+
+JNIEnv* VMThread::jni() {
+    if (_env_offset < 0) {
+        return VM::jni();  // fallback for non-HotSpot JVM
+    }
+    return isJavaThread() ? (JNIEnv*) at(_env_offset) : NULL;
+}
+
+jmethodID VMMethod::id() {
+    // We may find a bogus NMethod during stack walking, it does not always point to a valid VMMethod
+    const char* const_method = (const char*) SafeAccess::load((void**) at(_method_constmethod_offset));
+    if (!goodPtr(const_method)) {
+        return NULL;
+    }
+
+    const char* cpool = (const char*) SafeAccess::load((void**)(const_method + _constmethod_constants_offset));
+    unsigned short num = (unsigned short) SafeAccess::load32((int32_t*)(const_method + _constmethod_idnum_offset), 0);
+    if (goodPtr(cpool)) {
+        VMKlass* holder = *(VMKlass**)(cpool + _pool_holder_offset);
+        if (goodPtr(holder)) {
+            jmethodID* ids = holder->jmethodIDs();
+            if (ids != NULL && num < (size_t)ids[0]) {
+                return ids[num + 1];
+            }
+        }
+    }
+    return NULL;
+}
+
+jmethodID VMMethod::validatedId() {
+    jmethodID method_id = id();
+    if (!_can_dereference_jmethod_id || (goodPtr(method_id) && *(VMMethod**)method_id == this)) {
+        return method_id;
+    }
+    return NULL;
+}
+
+NMethod* CodeHeap::findNMethod(char* heap, const void* pc) {
+    unsigned char* heap_start = *(unsigned char**)(heap + _code_heap_memory_offset + _vs_low_offset);
+    unsigned char* segmap = *(unsigned char**)(heap + _code_heap_segmap_offset + _vs_low_offset);
+    size_t idx = ((unsigned char*)pc - heap_start) >> _code_heap_segment_shift;
+
+    if (segmap[idx] == 0xff) {
+        return NULL;
+    }
+    while (segmap[idx] > 0) {
+        idx -= segmap[idx];
+    }
+
+    unsigned char* block = heap_start + (idx << _code_heap_segment_shift) + _heap_block_used_offset;
+    return *block ? align<NMethod*>(block + sizeof(uintptr_t)) : NULL;
+}
+
+JVMFlag* JVMFlag::find(const char* name) {
+    if (_flags_addr != NULL && _flag_size > 0) {
+        for (int i = 0; i < _flag_count; i++) {
+            JVMFlag* f = (JVMFlag*)(_flags_addr + i * _flag_size);
+            if (f->name() != NULL && strcmp(f->name(), name) == 0 && f->addr() != NULL) {
+                return f;
+            }
+        }
+    }
+    return NULL;
+}
+
+int NMethod::findScopeOffset(const void* pc) {
+    intptr_t pc_offset = (const char*)pc - code();
+    if (pc_offset < 0 || pc_offset > 0x7fffffff) {
+        return -1;
+    }
+
+    const int* scopes_pcs = (const int*) at(_scopes_pcs_offset);
+    PcDesc* pcd = (PcDesc*) immutableDataAt(scopes_pcs[0]);
+    PcDesc* pcd_end = (PcDesc*) immutableDataAt(scopes_pcs[1]);
+    int low = 0;
+    int high = (pcd_end - pcd) - 1;
+
+    while (low <= high) {
+        int mid = (unsigned int)(low + high) >> 1;
+        if (pcd[mid]._pc < pc_offset) {
+            low = mid + 1;
+        } else if (pcd[mid]._pc > pc_offset) {
+            high = mid - 1;
+        } else {
+            return pcd[mid]._scope_offset;
+        }
+    }
+
+    return pcd + low < pcd_end ? pcd[low]._scope_offset : -1;
+}
+
+int ScopeDesc::readInt() {
+    unsigned char c = *_stream++;
+    unsigned int n = c - _unsigned5_base;
+    if (c >= 192) {
+        for (int shift = 6; ; shift += 6) {
+            c = *_stream++;
+            n += (c - _unsigned5_base) << shift;
+            if (c < 192 || shift >= 24) break;
+        }
+    }
+    return n;
+}
diff --git a/ddprof-lib/src/main/cpp/vmStructs.h b/ddprof-lib/src/main/cpp/vmStructs.h
new file mode 100644
index 00000000..1afd9e52
--- /dev/null
+++ b/ddprof-lib/src/main/cpp/vmStructs.h
@@ -0,0 +1,705 @@
+/*
+ * Copyright The async-profiler authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#ifndef _VMSTRUCTS_H
+#define _VMSTRUCTS_H
+
+#include <jvmti.h>
+#include <stdint.h>
+#include <string.h>
+#include <type_traits>
+#include "codeCache.h"
+
+
+class VMStructs {
+  protected:
+    enum { MONITOR_BIT = 2 };
+
+    static CodeCache* _libjvm;
+
+    static bool _has_class_names;
+    static bool _has_method_structs;
+    static bool _has_compiler_structs;
+    static bool _has_stack_structs;
+    static bool _has_class_loader_data;
+    static bool _has_native_thread_id;
+    static bool _has_perm_gen;
+    static bool _can_dereference_jmethod_id;
+    static bool _compact_object_headers;
+
+    static int _klass_name_offset;
+    static int _symbol_length_offset;
+    static int _symbol_length_and_refcount_offset;
+    static int _symbol_body_offset;
+    static int _oop_klass_offset;
+    static int _class_loader_data_offset;
+    static int _class_loader_data_next_offset;
+    static int _methods_offset;
+    static int _jmethod_ids_offset;
+    static int _thread_osthread_offset;
+    static int _thread_anchor_offset;
+    static int _thread_state_offset;
+    static int _thread_vframe_offset;
+    static int _thread_exception_offset;
+    static int _osthread_id_offset;
+    static int _call_wrapper_anchor_offset;
+    static int _comp_env_offset;
+    static int _comp_task_offset;
+    static int _comp_method_offset;
+    static int _anchor_sp_offset;
+    static int _anchor_pc_offset;
+    static int _anchor_fp_offset;
+    static int _blob_size_offset;
+    static int _frame_size_offset;
+    static int _frame_complete_offset;
+    static int _code_offset;
+    static int _data_offset;
+    static int _mutable_data_offset;
+    static int _relocation_size_offset;
+    static int _scopes_pcs_offset;
+    static int _scopes_data_offset;
+    static int _nmethod_name_offset;
+    static int _nmethod_method_offset;
+    static int _nmethod_entry_offset;
+    static int _nmethod_state_offset;
+    static int _nmethod_level_offset;
+    static int _nmethod_metadata_offset;
+    static int _nmethod_immutable_offset;
+    static int _method_constmethod_offset;
+    static int _method_code_offset;
+    static int _constmethod_constants_offset;
+    static int _constmethod_idnum_offset;
+    static int _constmethod_size;
+    static int _pool_holder_offset;
+    static int _array_len_offset;
+    static int _array_data_offset;
+    static int _code_heap_memory_offset;
+    static int _code_heap_segmap_offset;
+    static int _code_heap_segment_shift;
+    static int _heap_block_used_offset;
+    static int _vs_low_bound_offset;
+    static int _vs_high_bound_offset;
+    static int _vs_low_offset;
+    static int _vs_high_offset;
+    static int _flag_name_offset;
+    static int _flag_addr_offset;
+    static int _flag_origin_offset;
+    static const char* _flags_addr;
+    static int _flag_count;
+    static int _flag_size;
+    static char* _code_heap[3];
+    static const void* _code_heap_low;
+    static const void* _code_heap_high;
+    static char** _code_heap_addr;
+    static const void** _code_heap_low_addr;
+    static const void** _code_heap_high_addr;
+    static int* _klass_offset_addr;
+    static char** _narrow_klass_base_addr;
+    static char* _narrow_klass_base;
+    static int* _narrow_klass_shift_addr;
+    static int _narrow_klass_shift;
+    static char** _collected_heap_addr;
+    static char* _collected_heap;
+    static int _collected_heap_reserved_offset;
+    static int _region_start_offset;
+    static int _region_size_offset;
+    static int _markword_klass_shift;
+    static int _markword_monitor_value;
+    static int _entry_frame_call_wrapper_offset;
+    static int _interpreter_frame_bcp_offset;
+    static unsigned char _unsigned5_base;
+    static const void** _call_stub_return_addr;
+    static const void* _call_stub_return;
+    static const void* _interpreted_frame_valid_start;
+    static const void* _interpreted_frame_valid_end;
+
+    static jfieldID _eetop;
+    static jfieldID _tid;
+    static jfieldID _klass;
+    static int _tls_index;
+    static intptr_t _env_offset;
+    static void* _java_thread_vtbl[6];
+
+    typedef void (*LockFunc)(void*);
+    static LockFunc _lock_func;
+    static LockFunc _unlock_func;
+
+    static uintptr_t readSymbol(const char* symbol_name);
+    static void initOffsets();
+    static void resolveOffsets();
+    static void patchSafeFetch();
+    static void initJvmFunctions();
+    static void initTLS(void* vm_thread);
+    static void initThreadBridge();
+
+    const char* at(int offset) {
+        return (const char*)this + offset;
+    }
+
+    static bool goodPtr(const void* ptr) {
+        return (uintptr_t)ptr >= 0x1000 && ((uintptr_t)ptr & (sizeof(uintptr_t) - 1)) == 0;
+    }
+
+    template<typename T>
+    static T align(const void* ptr) {
+        static_assert(std::is_pointer<T>::value, "T must be a pointer type");
+        return (T)((uintptr_t)ptr & ~(sizeof(T) - 1));
+    }
+
+  public:
+    static void init(CodeCache* libjvm);
+    static void ready();
+
+    static CodeCache* libjvm() {
+        return _libjvm;
+    }
+
+    static bool hasClassNames() {
+        return _has_class_names;
+    }
+
+    static bool hasMethodStructs() {
+        return _has_method_structs;
+    }
+
+    static bool hasCompilerStructs() {
+        return _has_compiler_structs;
+    }
+
+    static bool hasStackStructs() {
+        return _has_stack_structs;
+    }
+
+    static bool hasClassLoaderData() {
+        return _has_class_loader_data;
+    }
+
+    static bool hasNativeThreadId() {
+        return _has_native_thread_id;
+    }
+
+    static bool hasJavaThreadId() {
+        return _tid != NULL;
+    }
+
+    static bool isInterpretedFrameValidFunc(const void* pc) {
+        return pc >= _interpreted_frame_valid_start && pc < _interpreted_frame_valid_end;
+    }
+};
+
+
+class MethodList {
+  public:
+    enum { SIZE = 8 };
+
+  private:
+    intptr_t _method[SIZE];
+    int _ptr;
+    MethodList* _next;
+    int _padding;
+
+  public:
+    MethodList(MethodList* next) : _ptr(0), _next(next), _padding(0) {
+        for (int i = 0; i < SIZE; i++) {
+            _method[i] = 0x37;
+        }
+    }
+};
+
+
+class NMethod;
+class VMMethod;
+
+class VMSymbol : VMStructs {
+  public:
+    unsigned short length() {
+        if (_symbol_length_offset >= 0) {
+          return *(unsigned short*) at(_symbol_length_offset);
+        } else {
+          return *(unsigned int*) at(_symbol_length_and_refcount_offset) >> 16;
+        }
+    }
+
+    const char* body() {
+        return at(_symbol_body_offset);
+    }
+};
+
+class ClassLoaderData : VMStructs {
+  private:
+    void* mutex() {
+        return *(void**) at(sizeof(uintptr_t) * 3);
+    }
+
+  public:
+    void lock() {
+        _lock_func(mutex());
+    }
+
+    void unlock() {
+        _unlock_func(mutex());
+    }
+
+    MethodList** methodList() {
+        return (MethodList**) at(sizeof(uintptr_t) * 6 + 8);
+    }
+};
+
+class VMKlass : VMStructs {
+  public:
+    static VMKlass* fromJavaClass(JNIEnv* env, jclass cls) {
+        if (_has_perm_gen) {
+            jobject klassOop = env->GetObjectField(cls, _klass);
+            return (VMKlass*)(*(uintptr_t**)klassOop + 2);
+        } else if (sizeof(VMKlass*) == 8) {
+            return (VMKlass*)(uintptr_t)env->GetLongField(cls, _klass);
+        } else {
+            return (VMKlass*)(uintptr_t)env->GetIntField(cls, _klass);
+        }
+    }
+
+    static VMKlass* fromHandle(uintptr_t handle) {
+        if (_has_perm_gen) {
+            // On JDK 7 KlassHandle is a pointer to klassOop, hence one more indirection
+            return (VMKlass*)(*(uintptr_t**)handle + 2);
+        } else {
+            return (VMKlass*)handle;
+        }
+    }
+
+    static VMKlass* fromOop(uintptr_t oop) {
+        if (_narrow_klass_shift >= 0) {
+            uintptr_t narrow_klass;
+            if (_compact_object_headers) {
+                uintptr_t mark = *(uintptr_t*)oop;
+                if (mark & MONITOR_BIT) {
+                    mark = *(uintptr_t*)(mark ^ MONITOR_BIT);
+                }
+                narrow_klass = mark >> _markword_klass_shift;
+            } else {
+                narrow_klass = *(unsigned int*)(oop + _oop_klass_offset);
+            }
+            return (VMKlass*)(_narrow_klass_base + (narrow_klass << _narrow_klass_shift));
+        } else {
+            return *(VMKlass**)(oop + _oop_klass_offset);
+        }
+    }
+
+    VMSymbol* name() {
+        return *(VMSymbol**) at(_klass_name_offset);
+    }
+
+    ClassLoaderData* classLoaderData() {
+        return *(ClassLoaderData**) at(_class_loader_data_offset);
+    }
+
+    int methodCount() {
+        int* methods = *(int**) at(_methods_offset);
+        return methods == NULL ? 0 : *methods & 0xffff;
+    }
+
+    jmethodID* jmethodIDs() {
+        return __atomic_load_n((jmethodID**) at(_jmethod_ids_offset), __ATOMIC_ACQUIRE);
+    }
+};
+
+class JavaFrameAnchor : VMStructs {
+  private:
+    enum { MAX_CALL_WRAPPER_DISTANCE = 512 };
+
+  public:
+    static JavaFrameAnchor* fromEntryFrame(uintptr_t fp) {
+        const char* call_wrapper = *(const char**)(fp + _entry_frame_call_wrapper_offset);
+        if (!goodPtr(call_wrapper) || (uintptr_t)call_wrapper - fp > MAX_CALL_WRAPPER_DISTANCE) {
+            return NULL;
+        }
+        return (JavaFrameAnchor*)(call_wrapper + _call_wrapper_anchor_offset);
+    }
+
+    uintptr_t lastJavaSP() {
+        return *(uintptr_t*) at(_anchor_sp_offset);
+    }
+
+    uintptr_t lastJavaFP() {
+        return *(uintptr_t*) at(_anchor_fp_offset);
+    }
+
+    const void* lastJavaPC() {
+        return *(const void**) at(_anchor_pc_offset);
+    }
+
+    void setLastJavaPC(const void* pc) {
+        *(const void**) at(_anchor_pc_offset) = pc;
+    }
+
+    bool getFrame(const void*& pc, uintptr_t& sp, uintptr_t& fp) {
+        if (lastJavaPC() != NULL && lastJavaSP() != 0) {
+            pc = lastJavaPC();
+            sp = lastJavaSP();
+            fp = lastJavaFP();
+            return true;
+        }
+        return false;
+    }
+};
+
+class VMThread : VMStructs {
+  public:
+    static VMThread* current();
+
+    static int key() {
+        return _tls_index;
+    }
+
+    static VMThread* fromJavaThread(JNIEnv* env, jthread thread) {
+        return (VMThread*)(uintptr_t)env->GetLongField(thread, _eetop);
+    }
+
+    static jlong javaThreadId(JNIEnv* env, jthread thread) {
+        return env->GetLongField(thread, _tid);
+    }
+
+    static int nativeThreadId(JNIEnv* jni, jthread thread);
+
+    int osThreadId();
+
+    JNIEnv* jni();
+
+    const void** vtable() {
+        return *(const void***)this;
+    }
+
+    // This thread is considered a JavaThread if at least 2 of the selected 3 vtable entries
+    // match those of a known JavaThread (which is either application thread or AttachListener).
+    // Indexes were carefully chosen to work on OpenJDK 8 to 25, both product an debug builds.
+    bool isJavaThread() {
+        const void** vtbl = vtable();
+        return (vtbl[1] == _java_thread_vtbl[1]) +
+               (vtbl[3] == _java_thread_vtbl[3]) +
+               (vtbl[5] == _java_thread_vtbl[5]) >= 2;
+    }
+
+    int state() {
+        return _thread_state_offset >= 0 ? *(int*) at(_thread_state_offset) : 0;
+    }
+
+    bool inJava() {
+        return state() == 8;
+    }
+
+    bool inDeopt() {
+        return *(void**) at(_thread_vframe_offset) != NULL;
+    }
+
+    void*& exception() {
+        return *(void**) at(_thread_exception_offset);
+    }
+
+    JavaFrameAnchor* anchor() {
+        return (JavaFrameAnchor*) at(_thread_anchor_offset);
+    }
+
+    VMMethod* compiledMethod() {
+        const char* env = *(const char**) at(_comp_env_offset);
+        if (env != NULL) {
+            const char* task = *(const char**) (env + _comp_task_offset);
+            if (task != NULL) {
+                return *(VMMethod**) (task + _comp_method_offset);
+            }
+        }
+        return NULL;
+    }
+};
+
+class VMMethod : VMStructs {
+  public:
+    jmethodID id();
+
+    // Performs extra validation when VMMethod comes from incomplete frame
+    jmethodID validatedId();
+
+    // Workaround for JDK-8313816
+    static bool isStaleMethodId(jmethodID id) {
+        if (!_can_dereference_jmethod_id) return false;
+        VMMethod* vm_method = *(VMMethod**)id;
+        return vm_method == NULL || vm_method->id() == NULL;
+    }
+
+    const char* bytecode() {
+        return *(const char**) at(_method_constmethod_offset) + _constmethod_size;
+    }
+
+    NMethod* code() {
+        return *(NMethod**) at(_method_code_offset);
+    }
+};
+
+class NMethod : VMStructs {
+  public:
+    int size() {
+        return *(int*) at(_blob_size_offset);
+    }
+
+    int frameSize() {
+        return *(int*) at(_frame_size_offset);
+    }
+
+    short frameCompleteOffset() {
+        return *(short*) at(_frame_complete_offset);
+    }
+
+    void setFrameCompleteOffset(int offset) {
+        if (_nmethod_immutable_offset > 0) {
+            // _frame_complete_offset is short on JDK 23+
+            *(short*) at(_frame_complete_offset) = offset;
+        } else {
+            *(int*) at(_frame_complete_offset) = offset;
+        }
+    }
+
+    const char* immutableDataAt(int offset) {
+        if (_nmethod_immutable_offset > 0) {
+            return *(const char**) at(_nmethod_immutable_offset) + offset;
+        }
+        return at(offset);
+    }
+
+    const char* code() {
+        if (_code_offset > 0) {
+            return at(*(int*) at(_code_offset));
+        } else {
+            return *(const char**) at(-_code_offset);
+        }
+    }
+
+    const char* scopes() {
+        if (_scopes_data_offset > 0) {
+            return immutableDataAt(*(int*) at(_scopes_data_offset));
+        } else {
+            return *(const char**) at(-_scopes_data_offset);
+        }
+    }
+
+    const void* entry() {
+        if (_nmethod_entry_offset > 0) {
+            return at(*(int*) at(_code_offset) + *(unsigned short*) at(_nmethod_entry_offset));
+        } else {
+            return *(void**) at(-_nmethod_entry_offset);
+        }
+    }
+
+    bool contains(const void* pc) {
+        return pc >= this && pc < at(size());
+    }
+
+    bool isFrameCompleteAt(const void* pc) {
+        return pc >= code() + frameCompleteOffset();
+    }
+
+    bool isEntryFrame(const void* pc) {
+        return pc == _call_stub_return;
+    }
+
+    const char* name() {
+        return *(const char**) at(_nmethod_name_offset);
+    }
+
+    bool isNMethod() {
+        const char* n = name();
+        return n != NULL && (strcmp(n, "nmethod") == 0 || strcmp(n, "native nmethod") == 0);
+    }
+
+    bool isInterpreter() {
+        const char* n = name();
+        return n != NULL && strcmp(n, "Interpreter") == 0;
+    }
+
+    bool isStub() {
+        const char* n = name();
+        return n != NULL && strncmp(n, "StubRoutines", 12) == 0;
+    }
+
+    bool isVTableStub() {
+        const char* n = name();
+        return n != NULL && strcmp(n, "vtable chunks") == 0;
+    }
+
+    VMMethod* method() {
+        return *(VMMethod**) at(_nmethod_method_offset);
+    }
+
+    char state() {
+        return *at(_nmethod_state_offset);
+    }
+
+    bool isAlive() {
+        return state() >= 0 && state() <= 1;
+    }
+
+    int level() {
+        return _nmethod_level_offset >= 0 ? *(signed char*) at(_nmethod_level_offset) : 0;
+    }
+
+    VMMethod** metadata() {
+        if (_mutable_data_offset >= 0) {
+            // Since JDK 25
+            return (VMMethod**) (*(char**) at(_mutable_data_offset) + *(int*) at(_relocation_size_offset));
+        } else if (_data_offset > 0) {
+            // since JDK 23
+            return (VMMethod**) at(*(int*) at(_data_offset) + *(unsigned short*) at(_nmethod_metadata_offset));
+        }
+        return (VMMethod**) at(*(int*) at(_nmethod_metadata_offset));
+    }
+
+    int findScopeOffset(const void* pc);
+};
+
+class CodeHeap : VMStructs {
+  private:
+    static bool contains(char* heap, const void* pc) {
+        return heap != NULL &&
+               pc >= *(const void**)(heap + _code_heap_memory_offset + _vs_low_offset) &&
+               pc <  *(const void**)(heap + _code_heap_memory_offset + _vs_high_offset);
+    }
+
+    static NMethod* findNMethod(char* heap, const void* pc);
+
+  public:
+    static bool available() {
+        return _code_heap_addr != NULL;
+    }
+
+    static bool contains(const void* pc) {
+        return _code_heap_low <= pc && pc < _code_heap_high;
+    }
+
+    static void updateBounds(const void* start, const void* end) {
+        for (const void* low = _code_heap_low;
+             start < low && !__sync_bool_compare_and_swap(&_code_heap_low, low, start);
+             low = _code_heap_low);
+        for (const void* high = _code_heap_high;
+             end > high && !__sync_bool_compare_and_swap(&_code_heap_high, high, end);
+             high = _code_heap_high);
+    }
+
+    static NMethod* findNMethod(const void* pc) {
+        if (contains(_code_heap[0], pc)) return findNMethod(_code_heap[0], pc);
+        if (contains(_code_heap[1], pc)) return findNMethod(_code_heap[1], pc);
+        if (contains(_code_heap[2], pc)) return findNMethod(_code_heap[2], pc);
+        return NULL;
+    }
+};
+
+class CollectedHeap : VMStructs {
+  public:
+    static bool created() {
+        return _collected_heap_addr != NULL && *_collected_heap_addr != NULL;
+    }
+
+    static CollectedHeap* heap() {
+        return (CollectedHeap*)_collected_heap;
+    }
+
+    uintptr_t start() {
+        return *(uintptr_t*) at(_region_start_offset);
+    }
+
+    uintptr_t size() {
+        return (*(uintptr_t*) at(_region_size_offset)) * sizeof(uintptr_t);
+    }
+};
+
+class JVMFlag : VMStructs {
+  private:
+    enum {
+        ORIGIN_DEFAULT = 0,
+        ORIGIN_MASK    = 15,
+        SET_ON_CMDLINE = 1 << 17
+    };
+
+  public:
+    static JVMFlag* find(const char* name);
+
+    const char* name() {
+        return *(const char**) at(_flag_name_offset);
+    }
+
+    char* addr() {
+        return *(char**) at(_flag_addr_offset);
+    }
+
+    bool isDefault() {
+        return _flag_origin_offset < 0 || (*(int*) at(_flag_origin_offset) & ORIGIN_MASK) == ORIGIN_DEFAULT;
+    }
+
+    void setCmdline() {
+        if (_flag_origin_offset >= 0) {
+            *(int*) at(_flag_origin_offset) |= SET_ON_CMDLINE;
+        }
+    }
+
+    char get() {
+        return *addr();
+    }
+
+    void set(char value) {
+        *addr() = value;
+    }
+};
+
+class PcDesc {
+  public:
+    int _pc;
+    int _scope_offset;
+    int _obj_offset;
+    int _flags;
+};
+
+class ScopeDesc : VMStructs {
+  private:
+    const unsigned char* _scopes;
+    VMMethod** _metadata;
+    const unsigned char* _stream;
+    int _method_offset;
+    int _bci;
+
+    int readInt();
+
+  public:
+    ScopeDesc(NMethod* nm) {
+        _scopes = (const unsigned char*)nm->scopes();
+        _metadata = nm->metadata();
+    }
+
+    int decode(int offset) {
+        _stream = _scopes + offset;
+        int sender_offset = readInt();
+        _method_offset = readInt();
+        _bci = readInt() - 1;
+        return sender_offset;
+    }
+
+    VMMethod* method() {
+        return _method_offset > 0 ? _metadata[_method_offset - 1] : NULL;
+    }
+
+    int bci() {
+        return _bci;
+    }
+};
+
+class InterpreterFrame : VMStructs {
+  public:
+    enum {
+        sender_sp_offset = -1,
+        method_offset = -3
+    };
+
+    static int bcp_offset() {
+        return _interpreter_frame_bcp_offset;
+    }
+};
+
+#endif // _VMSTRUCTS_H
diff --git a/ddprof-lib/src/test/fuzz/fuzz_dwarf.cpp b/ddprof-lib/src/test/fuzz/fuzz_dwarf.cpp
index ceae8b48..ad844cb9 100644
--- a/ddprof-lib/src/test/fuzz/fuzz_dwarf.cpp
+++ b/ddprof-lib/src/test/fuzz/fuzz_dwarf.cpp
@@ -21,7 +21,7 @@
 #include <stdint.h>
 #include <cstring>
 
-// Include the DWARF parser - dwarf.h comes from cpp-external (async-profiler upstream)
+// Include the DWARF parser
 #include "dwarf.h"
 #include "dwarf_dd.h"
 
diff --git a/ddprof-lib/src/test/make/Makefile b/ddprof-lib/src/test/make/Makefile
index 09b62226..df716720 100644
--- a/ddprof-lib/src/test/make/Makefile
+++ b/ddprof-lib/src/test/make/Makefile
@@ -1,5 +1,5 @@
 CC := g++
-SRCDIR := ../../main/cpp-external ../../main/cpp
+SRCDIR := ../../main/cpp
 OBJDIR := ./../../../build/scanbuild_obj
 CFLAGS := -O0 -Wall -std=c++17 -fno-omit-frame-pointer -momit-leaf-frame-pointer -fvisibility=hidden
 SRCS := ${wildcard ${SRCDIR}/*.cpp }
@@ -29,4 +29,4 @@ $(OBJDIR)/%.o : ${SRCDIR}/%.cpp
 	${CC} ${CFLAGS} -DEBUG -DPROFILER_VERSION=\"snapshot\" ${INCLUDES}   -c  $<  -o  $@
 
 clean :
-	@rm -rf $(OBJDIR)
\ No newline at end of file
+	@rm -rf $(OBJDIR)
diff --git a/doc/event-type-system.md b/doc/event-type-system.md
index c14e7189..f93a978f 100644
--- a/doc/event-type-system.md
+++ b/doc/event-type-system.md
@@ -57,7 +57,7 @@ enum ASGCT_CallFrameType {
 
 Uses `EventType` consistently throughout:
 
-- **Function signature** (cpp-external/profiler.h:213):
+- **Function signature** (cpp/profiler.h:213):
   ```cpp
   u64 recordSample(void* ucontext, u64 counter, EventType event_type, Event* event);
   ```
diff --git a/gradle/lock.properties b/gradle/lock.properties
deleted file mode 100644
index a2d67147..00000000
--- a/gradle/lock.properties
+++ /dev/null
@@ -1,5 +0,0 @@
-ap.branch=dd/master
-ap.commit=1addbbddf55f00e176c1755156bb0ae40266eab7
-
-ctx_branch=main
-ctx_commit=b33673d801b85a6c38fa0e9f1a139cb246737ce8
diff --git a/gradle/patching.gradle b/gradle/patching.gradle
deleted file mode 100644
index b6aba7e3..00000000
--- a/gradle/patching.gradle
+++ /dev/null
@@ -1,288 +0,0 @@
-/*
- * Copyright 2025 Datadog, Inc
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Unified upstream patching configuration for DataDog Java Profiler
- * 
- * This file defines all modifications applied to async-profiler upstream source files
- * to ensure compatibility with DataDog's requirements (ASan, memory safety, API extensions)
- * 
- * CONFIGURATION SYNTAX AND SEMANTICS
- * ==================================
- * 
- * Root Structure:
- * ---------------
- * ext.upstreamPatches = [
- *   "filename1.cpp": [patches for file1],
- *   "filename2.h": [patches for file2]
- * ]
- * 
- * File Configuration Structure:
- * ----------------------------
- * Each file entry contains:
- * 
- * "filename.ext": [
- *   validations: [                    // Optional: Pre-patch validation rules
- *     [contains: "required_text"],    // Ensures file contains specific text
- *     [contains: "another_check"]     // Multiple validations run in sequence
- *   ],
- *   operations: [                     // Required: List of patch operations
- *     [
- *       type: "patch_type",           // Required: Type of patch operation
- *       name: "Human readable name",  // Optional: Description of what this patch does
- *       description: "Detailed...",   // Optional: Extended description
- *       find: "regex_pattern",        // Required: Regex pattern to find in file
- *       replace: "replacement_text",  // Required: Text to replace matches with
- *       idempotent_check: "check_text" // Optional: Text that indicates patch already applied
- *     ]
- *   ]
- * ]
- * 
- * PATCH OPERATION TYPES
- * ====================
- * 
- * 1. function_attribute:
- *    Purpose: Add attributes (like __attribute__) to function declarations
- *    Example: Add ASan no_sanitize attribute to prevent false positives
- *    find: "(bool\\s+StackFrame::unwindStub\\s*\\()"
- *    replace: "__attribute__((no_sanitize(\"address\"))) $1"
- * 
- * 2. expression_replace:
- *    Purpose: Replace unsafe code patterns with safe equivalents
- *    Example: Replace direct pointer dereference with memcpy for ASan compatibility
- *    find: "\\*\\(unsigned int\\*\\)\\s*entry"
- *    replace: "([&] { unsigned int val; memcpy(&val, entry, sizeof(val)); return val; }())"
- * 
- * 3. method_declaration:
- *    Purpose: Add new method declarations to class definitions
- *    Example: Add clearParsingCaches method to Symbols class
- *    find: "(static bool haveKernelSymbols\\(\\) \\{[^}]+\\})"
- *    replace: "$1\n    static void clearParsingCaches();"
- * 
- * 4. method_implementation:
- *    Purpose: Add complete method implementations to source files
- *    Example: Add clearParsingCaches implementation with cache clearing logic
- *    find: "(#endif \\/\\/ __linux__\\s*$)"
- *    replace: "void Symbols::clearParsingCaches() {\n    _parsed_inodes.clear();\n}\n\n$1"
- * 
- * REGEX PATTERNS AND REPLACEMENTS
- * ===============================
- * 
- * Pattern Syntax:
- * - Use Java regex syntax (java.util.regex.Pattern)
- * - Escape special characters: \\( \\) \\{ \\} \\[ \\] \\* \\+ \\? \\. \\|
- * - Use \\s for whitespace, \\w for word characters, \\d for digits
- * - Use capture groups: (pattern) to capture parts for reuse
- * - Use non-capturing groups: (?:pattern) when grouping without capture
- * 
- * Replacement Syntax:
- * - Use $1, $2, etc. to reference capture groups from find pattern
- * - Use \n for newlines in replacement text
- * - Use \t for tabs (though spaces are preferred for consistency)
- * - Escape dollar signs as \$ if literal $ needed
- * 
- * IDEMPOTENT OPERATIONS
- * ====================
- * 
- * Purpose: Prevent applying same patch multiple times
- * - Set idempotent_check to text that would exist after patch is applied
- * - System checks for this text before applying patch
- * - If found, patch is skipped with "already applied" message
- * - Critical for maintaining clean, predictable builds
- * 
- * Example:
- * find: "(bool\\s+StackFrame::unwindStub\\s*\\()"
- * replace: "__attribute__((no_sanitize(\"address\"))) $1"
- * idempotent_check: "__attribute__((no_sanitize(\"address\"))) bool StackFrame::unwindStub("
- * 
- * VALIDATION RULES
- * ===============
- * 
- * Purpose: Ensure upstream file structure hasn't changed in incompatible ways
- * Types:
- * - contains: "text" - File must contain this exact text
- * - Validates that expected functions, classes, or patterns exist
- * - Fails fast if upstream changes break patch assumptions
- * - Helps maintain compatibility across upstream updates
- * 
- * Best Practices:
- * - Validate key function signatures that patches modify
- * - Validate class names and critical code structures
- * - Keep validations minimal but sufficient to catch breaking changes
- * 
- * MAINTENANCE GUIDELINES
- * =====================
- * 
- * Adding New Patches:
- * 1. Add file entry if not exists: "newfile.cpp": [...]
- * 2. Add validations to verify expected code structure
- * 3. Add operation with appropriate type, find, replace
- * 4. Always include idempotent_check to prevent double-application
- * 5. Test thoroughly with clean upstream files
- * 
- * Modifying Existing Patches:
- * 1. Update find pattern if upstream code changed
- * 2. Update replace text if modification requirements changed
- * 3. Update idempotent_check to match new replacement
- * 4. Update validations if structural assumptions changed
- * 
- * Removing Patches:
- * 1. Remove entire operation block
- * 2. Remove validations that are no longer needed
- * 3. Remove file entry if no operations remain
- * 4. Clean up any orphaned files that depended on removed patches
- */
-
-ext.upstreamPatches = [
-  // Stack frame unwinding patches for ASan compatibility and memory safety
-  "stackFrame_x64.cpp": [
-    validations: [
-      [contains: "StackFrame::"],
-      [contains: "StackFrame::unwindStub"],
-      [contains: "StackFrame::checkInterruptedSyscall"]
-    ],
-    operations: [
-      [
-        type: "function_attribute",
-        name: "Add ASan no_sanitize attribute to unwindStub",
-        description: "Adds __attribute__((no_sanitize(\"address\"))) to unwindStub function to prevent ASan false positives during stack unwinding",
-        find: "(bool\\s+StackFrame::unwindStub\\s*\\()",
-        replace: "__attribute__((no_sanitize(\"address\"))) \$1",
-        idempotent_check: "__attribute__((no_sanitize(\"address\"))) bool StackFrame::unwindStub("
-      ],
-      [
-        type: "expression_replace",
-        name: "Safe memory access for entry pointer check",
-        description: "Replaces unsafe pointer dereference with safe memcpy-based access to prevent ASan violations",
-        find: "entry\\s*!=\\s*NULL\\s*&&\\s*\\*\\(unsigned int\\*\\)\\s*entry\\s*==\\s*0xec8b4855",
-        replace: "entry != NULL && ([&] { unsigned int val; memcpy(&val, entry, sizeof(val)); return val; }()) == 0xec8b4855"
-      ],
-      [
-        type: "function_attribute",
-        name: "Add ASan no_sanitize attribute to checkInterruptedSyscall",
-        description: "Adds __attribute__((no_sanitize(\"address\"))) to checkInterruptedSyscall function",
-        find: "(bool\\s+StackFrame::checkInterruptedSyscall\\s*\\()",
-        replace: "__attribute__((no_sanitize(\"address\"))) \$1",
-        idempotent_check: "__attribute__((no_sanitize(\"address\"))) bool StackFrame::checkInterruptedSyscall("
-      ],
-      [
-        type: "expression_replace",
-        name: "Safe memory access for pc offset read",
-        description: "Replaces unsafe pointer dereference at pc-6 with safe memcpy-based access",
-        find: "\\*\\(int\\*\\)\\s*\\(pc\\s*-\\s*6\\)",
-        replace: "([&] { int val; memcpy(&val, (const void*)(pc - 6), sizeof(val)); return val; }())"
-      ]
-    ]
-  ],
-
-  // Stack walker patches for ASan compatibility
-  "stackWalker.cpp": [
-    validations: [[contains: "StackWalker::"], [contains: "StackWalker::walkVM"]],
-    operations: [
-      [
-        type: "function_attribute",
-        name: "Add ASan no_sanitize attribute to walkVM",
-        description: "Adds __attribute__((no_sanitize(\"address\"))) to walkVM function to prevent ASan false positives during VM stack walking",
-        find: "(int\\s+StackWalker::walkVM\\s*\\()",
-        replace: "__attribute__((no_sanitize(\"address\"))) \$1",
-        idempotent_check: "__attribute__((no_sanitize(\"address\"))) int StackWalker::walkVM("
-      ]
-    ]
-  ],
-
-  // Symbol management patches for DataDog-specific API extensions
-  "symbols.h": [
-    validations: [[contains: "class Symbols"], [contains: "static bool haveKernelSymbols"]],
-    operations: [
-      [
-        type: "method_declaration",
-        name: "Add clearParsingCaches method declaration",
-        description: "Adds clearParsingCaches static method declaration to Symbols class for test compatibility",
-        find: "(static bool haveKernelSymbols\\(\\) \\{[^}]+\\})",
-        replace: "\$1\n    // Clear internal caches - mainly for test purposes\n    static void clearParsingCaches();",
-        idempotent_check: "static void clearParsingCaches();"
-      ]
-    ]
-  ],
-
-  // Symbol implementation patches for DataDog-specific API extensions
-  "symbols_linux.cpp": [
-    validations: [[contains: "#ifdef __linux__"], [contains: "_parsed_inodes"], [contains: "loadSymbolTable"]],
-    operations: [
-      [
-        type: "method_implementation",
-        name: "Add clearParsingCaches method implementation",
-        description: "Adds clearParsingCaches static method implementation that clears internal parsing caches",
-        find: "(#endif \\/\\/ __linux__\\s*\$)",
-        replace: "// Implementation of clearParsingCaches for test compatibility\nvoid Symbols::clearParsingCaches() {\n    _parsed_inodes.clear();\n}\n\n\$1",
-        idempotent_check: "void Symbols::clearParsingCaches()"
-      ],
-      [
-        type: "expression_replace",
-        name: "Add overflow protection to symbol address calculation",
-        description: "Replace unsafe pointer arithmetic with overflow-protected version to prevent ASAN errors from corrupted ELF symbol values",
-        find: "const char\\* addr = base != NULL \\? base \\+ sym->st_value : \\(const char\\*\\)sym->st_value;",
-        replace: "const char* addr;\n                if (base != NULL) {\n                    // Check for overflow when adding sym->st_value to base\n                    uintptr_t base_addr = (uintptr_t)base;\n                    uint64_t symbol_value = sym->st_value;\n                    \n                    // Skip this symbol if addition would overflow\n                    // First check if symbol_value exceeds the address space\n                    if (symbol_value > UINTPTR_MAX) {\n                        continue;\n                    }\n                    // Then check if addition would overflow\n                    if (base_addr > UINTPTR_MAX - (uintptr_t)symbol_value) {\n                        continue;\n                    }\n                    \n                    // Perform addition using integer arithmetic to avoid pointer overflow\n                    addr = (const char*)(base_addr + (uintptr_t)symbol_value);\n                } else {\n                    addr = (const char*)sym->st_value;\n                }",
-        idempotent_check: "if (symbol_value > UINTPTR_MAX)"
-      ]
-    ]
-  ],
-
-  // VM structures patches for safe memory access
-  "vmStructs.cpp": [
-    validations: [
-      [contains: "VMMethod::id()"],
-      [contains: "const_method + _constmethod_constants_offset"]
-    ],
-    operations: [
-      [
-        type: "expression_replace",
-        name: "Fix unsafe memory access in VMMethod::id",
-        description: "Replace direct pointer dereference with SafeAccess::load to prevent ASan errors during crash-protected memory access",
-        find: "const char\\* cpool = \\*\\(const char\\*\\*\\) \\(const_method \\+ _constmethod_constants_offset\\);\\s*unsigned short num = \\*\\(unsigned short\\*\\) \\(const_method \\+ _constmethod_idnum_offset\\);",
-        replace: "const char* cpool = (const char*) SafeAccess::load((void**)(const_method + _constmethod_constants_offset));\n    unsigned short num = (unsigned short) SafeAccess::load32((int32_t*)(const_method + _constmethod_idnum_offset), 0);",
-        idempotent_check: "SafeAccess::load((void**)(const_method + _constmethod_constants_offset))"
-      ]
-    ]
-  ],
-
-  // Stack frame header patches for DataDog-specific API extensions
-  "stackFrame.h": [
-    validations: [
-      [contains: "class StackFrame"],
-      [contains: "unwindStub"],
-      [contains: "adjustSP"]
-    ],
-    operations: [
-      [
-        type: "expression_replace",
-        name: "Make StackFrame constructor explicit",
-        description: "Add explicit keyword to prevent implicit conversions",
-        find: "StackFrame\\(void\\* ucontext\\)",
-        replace: "explicit StackFrame(void* ucontext)",
-        idempotent_check: "explicit StackFrame(void* ucontext)"
-      ],
-      [
-        type: "method_declaration",
-        name: "Add DataDog SP baseline helper methods",
-        description: "Add sender_sp_baseline, read_caller_pc_from_sp, and read_saved_fp_from_sp methods for DataDog unwinding logic",
-        find: "(void adjustSP\\(const void\\* entry, const void\\* pc, uintptr_t& sp\\);)",
-        replace: "\$1\n\n    // SP baseline helpers for compiled frame unwinding\n    uintptr_t sender_sp_baseline(const NMethod* nm, uintptr_t sp, uintptr_t fp, const void* pc);\n    const void* read_caller_pc_from_sp(uintptr_t sp_base);\n    uintptr_t read_saved_fp_from_sp(uintptr_t sp_base);",
-        idempotent_check: "uintptr_t sender_sp_baseline("
-      ]
-    ]
-  ]
-]
\ No newline at end of file

From 15d16f09218103c75d33506cf61eb9dd7df7e0a9 Mon Sep 17 00:00:00 2001
From: Roman Kennke <roman.kennke@datadoghq.com>
Date: Tue, 20 Jan 2026 15:18:10 +0100
Subject: [PATCH 2/2] Remove comment

---
 ddprof-lib/benchmarks/build.gradle | 1 -
 1 file changed, 1 deletion(-)

diff --git a/ddprof-lib/benchmarks/build.gradle b/ddprof-lib/benchmarks/build.gradle
index a752d1a1..c6bd1db5 100644
--- a/ddprof-lib/benchmarks/build.gradle
+++ b/ddprof-lib/benchmarks/build.gradle
@@ -17,7 +17,6 @@ application {
 
 // Include the main library headers
 tasks.withType(CppCompile).configureEach {
-  // TODO: Do we need this, or is this included by default?
   includes file('../src/main/cpp').toString()
 }