diff --git a/.github/workflows/dep_rust.yml b/.github/workflows/dep_rust.yml index 5736880e4..09122be1d 100644 --- a/.github/workflows/dep_rust.yml +++ b/.github/workflows/dep_rust.yml @@ -135,6 +135,12 @@ jobs: RUST_LOG: debug run: just test-rust-gdb-debugging ${{ matrix.config }} ${{ matrix.hypervisor == 'mshv3' && 'mshv3' || ''}} + - name: Run Rust Crashdump tests + env: + CARGO_TERM_COLOR: always + RUST_LOG: debug + run: just test-rust-crashdump ${{ matrix.config }} ${{ matrix.hypervisor == 'mshv3' && 'mshv3' || ''}} + ### Benchmarks ### - name: Install github-cli (Linux mariner) if: runner.os == 'Linux' && matrix.hypervisor == 'mshv' diff --git a/Cargo.lock b/Cargo.lock index 6b671b9ec..13b3f9587 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -663,6 +663,19 @@ version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" +[[package]] +name = "elfcore" +version = "1.1.5" +source = "git+https://github.com/hyperlight-dev/elfcore.git?rev=cef4c80e26bf4b2a5599e50d2d1730965f942c13#cef4c80e26bf4b2a5599e50d2d1730965f942c13" +dependencies = [ + "libc", + "nix", + "smallvec", + "thiserror 1.0.69", + "tracing", + "zerocopy 0.7.35", +] + [[package]] name = "endian-type" version = "0.1.2" @@ -1189,10 +1202,12 @@ dependencies = [ "built", "cfg-if", "cfg_aliases", + "chrono", "criterion", "crossbeam", "crossbeam-channel", "crossbeam-queue", + "elfcore", "env_logger", "flatbuffers", "gdbstub", @@ -1866,6 +1881,17 @@ dependencies = [ "smallvec", ] +[[package]] +name = "nix" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "598beaf3cc6fdd9a5dfb1630c2800c7acd31df7aaf0f565796fba2b53ca1af1b" +dependencies = [ + "bitflags 1.3.2", + "cfg-if", + "libc", +] + [[package]] name = "nom" version = "7.1.3" diff --git a/Justfile b/Justfile index 78cc7ab5f..2abdcd83b 100644 --- a/Justfile +++ b/Justfile @@ -72,6 +72,9 @@ test-like-ci config=default-target hypervisor="kvm": @# without any driver (should fail to compile) just test-compilation-fail {{config}} + @# test the crashdump feature + just test-rust-crashdump {{config}} + # runs all tests test target=default-target features="": (test-unit target features) (test-isolated target features) (test-integration "rust" target features) (test-integration "c" target features) (test-seccomp target features) @@ -114,6 +117,10 @@ test-rust-gdb-debugging target=default-target features="": cargo test --profile={{ if target == "debug" { "dev" } else { target } }} --example guest-debugging {{ if features =="" {'--features gdb'} else { "--features gdb," + features } }} cargo test --profile={{ if target == "debug" { "dev" } else { target } }} {{ if features =="" {'--features gdb'} else { "--features gdb," + features } }} -- test_gdb +# rust test for crashdump +test-rust-crashdump target=default-target features="": + cargo test --profile={{ if target == "debug" { "dev" } else { target } }} {{ if features =="" {'--features crashdump'} else { "--features crashdump," + features } }} -- test_crashdump + ################ ### LINTING #### diff --git a/docs/how-to-debug-a-hyperlight-guest.md b/docs/how-to-debug-a-hyperlight-guest.md index 8e5dd7d55..cb18d53d3 100644 --- a/docs/how-to-debug-a-hyperlight-guest.md +++ b/docs/how-to-debug-a-hyperlight-guest.md @@ -201,3 +201,222 @@ involved in the gdb debugging of a Hyperlight guest running inside a **KVM** or └─┘ │ | | | │ | └───────────────────────────────────────────────────────────────────────────────────────────────┘ ``` + +## Dumping the guest state to an ELF core dump when an unhandled crash occurs + +When a guest crashes because of an unknown VmExit or unhandled exception, the vCPU state is dumped to an `ELF` core dump file. +This can be used to inspect the state of the guest at the time of the crash. + +To make Hyperlight dump the state of the vCPU (general purpose registers, registers) to an `ELF` core dump file, set the feature `crashdump` and run a debug build. +This will result in a dump file being created in the temporary directory. +The name and location of the dump file will be printed to the console and logged as an error message. + +### Inspecting the core dump + +After the core dump has been created, to inspect the state of the guest, load the core dump file using `gdb` or `lldb`. +**NOTE: This feature has been tested with version `15.0` of `gdb` and version `17` of `lldb`, earlier versions may not work, it is recommended to use these versions or later.** + +To do this in vscode, the following configuration can be used to add debug configurations: + +```vscode +{ + "version": "0.2.0", + "inputs": [ + { + "id": "core_dump", + "type": "promptString", + "description": "Path to the core dump file", + }, + { + "id": "program", + "type": "promptString", + "description": "Path to the program to debug", + } + ], + "configurations": [ + { + "name": "[GDB] Load core dump file", + "type": "cppdbg", + "request": "launch", + "program": "${input:program}", + "coreDumpPath": "${input:core_dump}", + "cwd": "${workspaceFolder}", + "MIMode": "gdb", + "externalConsole": false, + "miDebuggerPath": "/usr/bin/gdb", + "setupCommands": [ + { + "description": "Enable pretty-printing for gdb", + "text": "-enable-pretty-printing", + "ignoreFailures": true + }, + { + "description": "Set Disassembly Flavor to Intel", + "text": "-gdb-set disassembly-flavor intel", + "ignoreFailures": true + } + ] + }, + { + "name": "[LLDB] Load core dump file", + "type": "lldb", + "request": "launch", + "stopOnEntry": true, + "processCreateCommands": [], + "targetCreateCommands": [ + "target create -c ${input:core_dump} ${input:program}", + ], + }, + ] +} +``` +**NOTE: The `CodeLldb` debug session does not stop after launching. To see the code, stack frames and registers you need to +press the `pause` button. This is a known issue with the `CodeLldb` extension [#1245](https://github.com/vadimcn/codelldb/issues/1245). +The `cppdbg` extension works as expected and stops at the entry point of the program.** + +## Compiling guests with debug information for release builds + +This section explains how to compile a guest with debugging information but still have optimized code, and how to separate the debug information from the binary. + +### Creating a release build with debug information + +To create a release build with debug information, you can add a custom profile to your `Cargo.toml` file: + +```toml +[profile.release-with-debug] +inherits = "release" +debug = true +``` + +This creates a new profile called `release-with-debug` that inherits all settings from the release profile but adds debug information. + +### Splitting debug information from the binary + +To reduce the binary size while still having debug information available, you can split the debug information into a separate file. +This is useful for production environments where you want smaller binaries but still want to be able to debug crashes. + +Here's a step-by-step guide: + +1. Build your guest with the release-with-debug profile: + ```bash + cargo build --profile release-with-debug + ``` + +2. Locate your binary in the target directory: + ```bash + TARGET_DIR="target" + PROFILE="release-with-debug" + ARCH="x86_64-unknown-none" # Your target architecture + BUILD_DIR="${TARGET_DIR}/${ARCH}/${PROFILE}" + BINARY=$(find "${BUILD_DIR}" -type f -executable -name "guest-binary" | head -1) + ``` + +3. Extract debug information into a full debug file: + ```bash + DEBUG_FILE_FULL="${BINARY}.debug.full" + objcopy --only-keep-debug "${BINARY}" "${DEBUG_FILE_FULL}" + ``` + +4. Create a symbols-only debug file (smaller, but still useful for stack traces): + ```bash + DEBUG_FILE="${BINARY}.debug" + objcopy --keep-file-symbols "${DEBUG_FILE_FULL}" "${DEBUG_FILE}" + ``` + +5. Strip debug information from the original binary but keep function names: + ```bash + objcopy --strip-debug "${BINARY}" + ``` + +6. Add a debug link to the stripped binary: + ```bash + objcopy --add-gnu-debuglink="${DEBUG_FILE}" "${BINARY}" + ``` + +After these steps, you'll have: +- An optimized binary with function names for basic stack traces +- A symbols-only debug file for stack traces +- A full debug file for complete source-level debugging + +### Analyzing core dumps with the debug files + +When you have a core dump from a crashed guest, you can analyze it with different levels of detail using either GDB or LLDB. + +#### Using GDB + +1. For basic analysis with function names (stack traces): + ```bash + gdb ${BINARY} -c /path/to/core.dump + ``` + +2. For full source-level debugging: + ```bash + gdb -s ${DEBUG_FILE_FULL} ${BINARY} -c /path/to/core.dump + ``` + +#### Using LLDB + +LLDB provides similar capabilities with slightly different commands: + +1. For basic analysis with function names (stack traces): + ```bash + lldb ${BINARY} -c /path/to/core.dump + ``` + +2. For full source-level debugging: + ```bash + lldb -o "target create -c /path/to/core.dump ${BINARY}" -o "add-dsym ${DEBUG_FILE_FULL}" + ``` + +3. If your debug symbols are in a separate file: + ```bash + lldb ${BINARY} -c /path/to/core.dump + (lldb) add-dsym ${DEBUG_FILE_FULL} + ``` + +### VSCode Debug Configurations + +You can configure VSCode (in `.vscode/launch.json`) to use these files by modifying the debug configurations: + +#### For GDB + +```json +{ + "name": "[GDB] Load core dump with full debug symbols", + "type": "cppdbg", + "request": "launch", + "program": "${input:program}", + "coreDumpPath": "${input:core_dump}", + "cwd": "${workspaceFolder}", + "MIMode": "gdb", + "externalConsole": false, + "miDebuggerPath": "/usr/bin/gdb", + "setupCommands": [ + { + "description": "Enable pretty-printing for gdb", + "text": "-enable-pretty-printing", + "ignoreFailures": true + } + ] +} +``` + +#### For LLDB + +```json +{ + "name": "[LLDB] Load core dump with full debug symbols", + "type": "lldb", + "request": "launch", + "program": "${input:program}", + "cwd": "${workspaceFolder}", + "processCreateCommands": [], + "targetCreateCommands": [ + "target create -c ${input:core_dump} ${input:program}" + ], + "postRunCommands": [ + // if debug symbols are in a different file + "add-dsym ${input:debug_file_path}" + ] +} +``` diff --git a/src/hyperlight_host/Cargo.toml b/src/hyperlight_host/Cargo.toml index b1dddbae0..d5435ce60 100644 --- a/src/hyperlight_host/Cargo.toml +++ b/src/hyperlight_host/Cargo.toml @@ -38,10 +38,11 @@ vmm-sys-util = "0.14.0" crossbeam = "0.8.0" crossbeam-channel = "0.5.15" thiserror = "2.0.12" -tempfile = { version = "3.20", optional = true } +chrono = { version = "0.4", optional = true } anyhow = "1.0" metrics = "0.24.2" serde_json = "1.0" +elfcore = { git = "https://github.com/hyperlight-dev/elfcore.git", rev = "cef4c80e26bf4b2a5599e50d2d1730965f942c13" } [target.'cfg(windows)'.dependencies] windows = { version = "0.61", features = [ @@ -123,7 +124,8 @@ function_call_metrics = [] executable_heap = [] # This feature enables printing of debug information to stdout in debug builds print_debug = [] -crashdump = ["dep:tempfile"] # Dumps the VM state to a file on unexpected errors or crashes. The path of the file will be printed on stdout and logged. This feature can only be used in debug builds. +# Dumps the VM state to a file on unexpected errors or crashes. The path of the file will be printed on stdout and logged. +crashdump = ["dep:chrono"] kvm = ["dep:kvm-bindings", "dep:kvm-ioctls"] mshv2 = ["dep:mshv-bindings2", "dep:mshv-ioctls2"] mshv3 = ["dep:mshv-bindings3", "dep:mshv-ioctls3"] diff --git a/src/hyperlight_host/build.rs b/src/hyperlight_host/build.rs index 4484e27d5..75f9eba53 100644 --- a/src/hyperlight_host/build.rs +++ b/src/hyperlight_host/build.rs @@ -93,8 +93,7 @@ fn main() -> Result<()> { gdb: { all(feature = "gdb", debug_assertions, any(feature = "kvm", feature = "mshv2", feature = "mshv3"), target_os = "linux") }, kvm: { all(feature = "kvm", target_os = "linux") }, mshv: { all(any(feature = "mshv2", feature = "mshv3"), target_os = "linux") }, - // crashdump feature is aliased with debug_assertions to make it only available in debug-builds. - crashdump: { all(feature = "crashdump", debug_assertions) }, + crashdump: { all(feature = "crashdump") }, // print_debug feature is aliased with debug_assertions to make it only available in debug-builds. print_debug: { all(feature = "print_debug", debug_assertions) }, // the following features are mutually exclusive but rather than enforcing that here we are enabling mshv3 to override mshv2 when both are enabled diff --git a/src/hyperlight_host/src/hypervisor/crashdump.rs b/src/hyperlight_host/src/hypervisor/crashdump.rs index de3fe7253..65919b4fa 100644 --- a/src/hyperlight_host/src/hypervisor/crashdump.rs +++ b/src/hyperlight_host/src/hypervisor/crashdump.rs @@ -14,47 +14,473 @@ See the License for the specific language governing permissions and limitations under the License. */ +use std::cmp::min; use std::io::Write; -use tempfile::NamedTempFile; +use chrono; +use elfcore::{ + ArchComponentState, ArchState, CoreDumpBuilder, CoreError, Elf64_Auxv, ProcessInfoSource, + ReadProcessMemory, ThreadView, VaProtection, VaRegion, +}; use super::Hypervisor; +use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags}; use crate::{new_error, Result}; -/// Dump registers + memory regions + raw memory to a tempfile -#[cfg(crashdump)] -pub(crate) fn crashdump_to_tempfile(hv: &dyn Hypervisor) -> Result<()> { - let mut temp_file = NamedTempFile::with_prefix("mem")?; - let hv_details = format!("{:#x?}", hv); +/// This constant is used to identify the XSAVE state in the core dump +const NT_X86_XSTATE: u32 = 0x202; +/// This constant identifies the entry point of the program in an Auxiliary Vector +/// note of ELF. This tells a debugger whether the entry point of the program changed +/// so it can load the symbols correctly. +const AT_ENTRY: u64 = 9; +/// This constant is used to mark the end of the Auxiliary Vector note +const AT_NULL: u64 = 0; +/// The PID of the core dump process - this is a placeholder value +const CORE_DUMP_PID: i32 = 1; +/// The page size of the core dump +const CORE_DUMP_PAGE_SIZE: usize = 0x1000; - // write hypervisor details such as registers, info about mapped memory regions, etc. - temp_file.write_all(hv_details.as_bytes())?; - temp_file.write_all(b"================ MEMORY DUMP =================\n")?; +/// Structure to hold the crash dump context +/// This structure contains the information needed to create a core dump +#[derive(Debug)] +pub(crate) struct CrashDumpContext<'a> { + regions: &'a [MemoryRegion], + regs: [u64; 27], + xsave: Vec, + entry: u64, + binary: Option, + filename: Option, +} + +impl<'a> CrashDumpContext<'a> { + pub(crate) fn new( + regions: &'a [MemoryRegion], + regs: [u64; 27], + xsave: Vec, + entry: u64, + binary: Option, + filename: Option, + ) -> Self { + Self { + regions, + regs, + xsave, + entry, + binary, + filename, + } + } +} + +/// Structure that contains the process information for the core dump +/// This serves as a source of information for `elfcore`'s [`CoreDumpBuilder`] +struct GuestView { + regions: Vec, + threads: Vec, + aux_vector: Vec, +} + +impl GuestView { + fn new(ctx: &CrashDumpContext) -> Self { + // Map the regions to the format `CoreDumpBuilder` expects + let regions = ctx + .regions + .iter() + .filter(|r| !r.host_region.is_empty()) + .map(|r| VaRegion { + begin: r.guest_region.start as u64, + end: r.guest_region.end as u64, + offset: r.host_region.start as u64, + protection: VaProtection { + is_private: false, + read: r.flags.contains(MemoryRegionFlags::READ), + write: r.flags.contains(MemoryRegionFlags::WRITE), + execute: r.flags.contains(MemoryRegionFlags::EXECUTE), + }, + mapped_file_name: None, + }) + .collect(); + + let filename = ctx + .filename + .as_ref() + .map_or("".to_string(), |s| s.to_string()); + + let cmd = ctx + .binary + .as_ref() + .map_or("".to_string(), |s| s.to_string()); - // write the raw memory dump for each memory region - for region in hv.get_memory_regions() { - if region.host_region.start == 0 || region.host_region.is_empty() { - continue; + // The xsave state is checked as it can be empty + let mut components = vec![]; + if !ctx.xsave.is_empty() { + components.push(ArchComponentState { + name: "XSAVE", + note_type: NT_X86_XSTATE, + note_name: b"LINUX", + data: ctx.xsave.clone(), + }); } - // SAFETY: we got this memory region from the hypervisor so should never be invalid - let region_slice = unsafe { - std::slice::from_raw_parts( - region.host_region.start as *const u8, - region.host_region.len(), - ) + + // Create the thread view + // The thread view contains the information about the thread + // NOTE: Some of these fields are not used in the current implementation + let thread = ThreadView { + flags: 0, // Kernel flags for the process + tid: 1, + uid: 0, // User ID + gid: 0, // Group ID + comm: filename, + ppid: 0, // Parent PID + pgrp: 0, // Process group ID + nice: 0, // Nice value + state: 0, // Process state + utime: 0, // User time + stime: 0, // System time + cutime: 0, // Children User time + cstime: 0, // Children User time + cursig: 0, // Current signal + session: 0, // Session ID of the process + sighold: 0, // Blocked signal + sigpend: 0, // Pending signal + cmd_line: cmd, + + arch_state: Box::new(ArchState { + gpr_state: ctx.regs.to_vec(), + components, + }), }; - temp_file.write_all(region_slice)?; + + // Create the auxv vector + // The first entry is AT_ENTRY, which is the entry point of the program + // The entry point is the address where the program starts executing + // This helps the debugger to know that the entry is changed by an offset + // so the symbols can be loaded correctly. + // The second entry is AT_NULL, which marks the end of the vector + let auxv = vec![ + Elf64_Auxv { + a_type: AT_ENTRY, + a_val: ctx.entry, + }, + Elf64_Auxv { + a_type: AT_NULL, + a_val: 0, + }, + ]; + + Self { + regions, + threads: vec![thread], + aux_vector: auxv, + } + } +} + +impl ProcessInfoSource for GuestView { + fn pid(&self) -> i32 { + CORE_DUMP_PID + } + fn threads(&self) -> &[elfcore::ThreadView] { + &self.threads + } + fn page_size(&self) -> usize { + CORE_DUMP_PAGE_SIZE + } + fn aux_vector(&self) -> Option<&[elfcore::Elf64_Auxv]> { + Some(&self.aux_vector) + } + fn va_regions(&self) -> &[elfcore::VaRegion] { + &self.regions + } + fn mapped_files(&self) -> Option<&[elfcore::MappedFile]> { + // We don't have mapped files + None + } +} + +/// Structure that reads the guest memory +/// This structure serves as a custom memory reader for `elfcore`'s +/// [`CoreDumpBuilder`] +struct GuestMemReader { + regions: Vec, +} + +impl GuestMemReader { + fn new(ctx: &CrashDumpContext) -> Self { + Self { + regions: ctx.regions.to_vec(), + } + } +} + +impl ReadProcessMemory for GuestMemReader { + fn read_process_memory( + &mut self, + base: usize, + buf: &mut [u8], + ) -> std::result::Result { + for r in self.regions.iter() { + // Check if the base address is within the guest region + if base >= r.guest_region.start && base < r.guest_region.end { + let offset = base - r.guest_region.start; + let region_slice = unsafe { + std::slice::from_raw_parts( + r.host_region.start as *const u8, + r.host_region.len(), + ) + }; + + // Calculate how much we can copy + let copy_size = min(buf.len(), region_slice.len() - offset); + if copy_size == 0 { + return std::result::Result::Ok(0); + } + + // Only copy the amount that fits in both buffers + buf[..copy_size].copy_from_slice(®ion_slice[offset..offset + copy_size]); + + // Return the number of bytes copied + return std::result::Result::Ok(copy_size); + } + } + + // If we reach here, we didn't find a matching region + std::result::Result::Ok(0) + } +} + +/// Create core dump file from the hypervisor information if the sandbox is configured +/// to allow core dumps. +/// +/// This function generates an ELF core dump file capturing the hypervisor's state, +/// which can be used for debugging when crashes occur. +/// The location of the core dump file is determined by the `HYPERLIGHT_CORE_DUMP_DIR` +/// environment variable. If not set, it defaults to the system's temporary directory. +/// +/// # Arguments +/// * `hv`: Reference to the hypervisor implementation +/// +/// # Returns +/// * `Result<()>`: Success or error +pub(crate) fn generate_crashdump(hv: &dyn Hypervisor) -> Result<()> { + log::info!("Creating core dump file..."); + + // Get crash context from hypervisor + let ctx = hv + .crashdump_context() + .map_err(|e| new_error!("Failed to get crashdump context: {:?}", e))?; + + // Get env variable for core dump directory + let core_dump_dir = std::env::var("HYPERLIGHT_CORE_DUMP_DIR").ok(); + + // Compute file path on the filesystem + let file_path = core_dump_file_path(core_dump_dir); + + let create_dump_file = || { + // Create the file + Ok(Box::new( + std::fs::File::create(&file_path) + .map_err(|e| new_error!("Failed to create core dump file: {:?}", e))?, + ) as Box) + }; + + checked_core_dump(ctx, create_dump_file).map(|_| { + println!("Core dump created successfully: {}", file_path); + log::error!("Core dump file: {}", file_path); + }) +} + +/// Computes the file path for the core dump file. +/// +/// The file path is generated based on the current timestamp and an +/// output directory. +/// If the directory does not exist, it falls back to the system's temp directory. +/// If the variable is not set, it defaults to the system's temporary directory. +/// The filename is formatted as `hl_core_.elf`. +/// +/// Arguments: +/// * `dump_dir`: The environment variable value to check for the output directory. +/// +/// Returns: +/// * `String`: The file path for the core dump file. +fn core_dump_file_path(dump_dir: Option) -> String { + // Generate timestamp string for the filename using chrono + let timestamp = chrono::Local::now() + .format("%Y%m%d_T%H%M%S%.3f") + .to_string(); + + // Determine the output directory based on environment variable + let output_dir = if let Some(dump_dir) = dump_dir { + // Check if the directory exists + // If it doesn't exist, fall back to the system temp directory + // This is to ensure that the core dump can be created even if the directory is not set + if std::path::Path::new(&dump_dir).exists() { + std::path::PathBuf::from(dump_dir) + } else { + log::warn!( + "Directory \"{}\" does not exist, falling back to temp directory", + dump_dir + ); + std::env::temp_dir() + } + } else { + // Fall back to the system temp directory + std::env::temp_dir() + }; + + // Create the filename with timestamp + let filename = format!("hl_core_{}.elf", timestamp); + let file_path = output_dir.join(filename); + + file_path.to_string_lossy().to_string() +} + +/// Create core dump from Hypervisor context if the sandbox is configured to allow core dumps. +/// +/// Arguments: +/// * `ctx`: Optional crash dump context from the hypervisor. This contains the information +/// needed to create the core dump. If `None`, no core dump will be created. +/// * `get_writer`: Closure that returns a writer to the output destination. +/// +/// Returns: +/// * `Result`: The number of bytes written to the core dump file. +fn checked_core_dump( + ctx: Option, + get_writer: impl FnOnce() -> Result>, +) -> Result { + let mut nbytes = 0; + // If the HV returned a context it means we can create a core dump + // This is the case when the sandbox has been configured at runtime to allow core dumps + if let Some(ctx) = ctx { + // Set up data sources for the core dump + let guest_view = GuestView::new(&ctx); + let memory_reader = GuestMemReader::new(&ctx); + + // Create and write core dump + let core_builder = CoreDumpBuilder::from_source(guest_view, memory_reader); + + let writer = get_writer()?; + // Write the core dump directly to the file + nbytes = core_builder + .write(writer) + .map_err(|e| new_error!("Failed to write core dump: {:?}", e))?; + } + + Ok(nbytes) +} + +/// Test module for the crash dump functionality +#[cfg(test)] +mod test { + use super::*; + + /// Test the core_dump_file_path function when the environment variable is set to an existing + /// directory + #[test] + fn test_crashdump_file_path_valid() { + // Get CWD + let valid_dir = std::env::current_dir() + .unwrap() + .to_string_lossy() + .to_string(); + + // Call the function + let path = core_dump_file_path(Some(valid_dir.clone())); + + // Check if the path is correct + assert!(path.contains(&valid_dir)); + } + + /// Test the core_dump_file_path function when the environment variable is set to an invalid + /// directory + #[test] + fn test_crashdump_file_path_invalid() { + // Call the function + let path = core_dump_file_path(Some("/tmp/not_existing_dir".to_string())); + + // Get the temp directory + let temp_dir = std::env::temp_dir().to_string_lossy().to_string(); + + // Check if the path is correct + assert!(path.contains(&temp_dir)); + } + + /// Test the core_dump_file_path function when the environment is not set + /// Check against the default temp directory by using the env::temp_dir() function + #[test] + fn test_crashdump_file_path_default() { + // Call the function + let path = core_dump_file_path(None); + + let temp_dir = std::env::temp_dir().to_string_lossy().to_string(); + + // Check if the path is correct + assert!(path.starts_with(&temp_dir)); } - temp_file.flush()?; - // persist the tempfile to disk - let persist_path = temp_file.path().with_extension("dmp"); - temp_file - .persist(&persist_path) - .map_err(|e| new_error!("Failed to persist crashdump file: {:?}", e))?; + /// Test core is not created when the context is None + #[test] + fn test_crashdump_not_created_when_context_is_none() { + // Call the function with None context + let result = checked_core_dump(None, || Ok(Box::new(std::io::empty()))); - println!("Memory dumped to file: {:?}", persist_path); - log::error!("Memory dumped to file: {:?}", persist_path); + // Check if the result is ok and the number of bytes is 0 + assert!(result.is_ok()); + assert_eq!(result.unwrap(), 0); + } + + /// Test the core dump creation with no regions fails + #[test] + fn test_crashdump_write_fails_when_no_regions() { + // Create a dummy context + let ctx = CrashDumpContext::new( + &[], + [0; 27], + vec![], + 0, + Some("dummy_binary".to_string()), + Some("dummy_filename".to_string()), + ); + + let get_writer = || Ok(Box::new(std::io::empty()) as Box); + + // Call the function + let result = checked_core_dump(Some(ctx), get_writer); + + // Check if the result is an error + // This should fail because there are no regions + assert!(result.is_err()); + } + + /// Check core dump with a dummy region to local vec + /// This test checks if the core dump is created successfully + #[test] + fn test_crashdump_dummy_core_dump() { + let dummy_vec = vec![0; 0x1000]; + let regions = vec![MemoryRegion { + guest_region: 0x1000..0x2000, + host_region: dummy_vec.as_ptr() as usize..dummy_vec.as_ptr() as usize + dummy_vec.len(), + flags: MemoryRegionFlags::READ | MemoryRegionFlags::WRITE, + region_type: crate::mem::memory_region::MemoryRegionType::Code, + }]; + // Create a dummy context + let ctx = CrashDumpContext::new( + ®ions, + [0; 27], + vec![], + 0x1000, + Some("dummy_binary".to_string()), + Some("dummy_filename".to_string()), + ); + + let get_writer = || Ok(Box::new(std::io::empty()) as Box); - Ok(()) + // Call the function + let result = checked_core_dump(Some(ctx), get_writer); + + // Check if the result is ok and the number of bytes is 0 + assert!(result.is_ok()); + // Check the number of bytes written is more than 0x1000 (the size of the region) + assert_eq!(result.unwrap(), 0x2000); + } } diff --git a/src/hyperlight_host/src/hypervisor/hyperv_linux.rs b/src/hyperlight_host/src/hypervisor/hyperv_linux.rs index 104f75cab..33957aa7e 100644 --- a/src/hyperlight_host/src/hypervisor/hyperv_linux.rs +++ b/src/hyperlight_host/src/hypervisor/hyperv_linux.rs @@ -50,6 +50,8 @@ use mshv_bindings::{ }; use mshv_ioctls::{Mshv, MshvError, VcpuFd, VmFd}; use tracing::{instrument, Span}; +#[cfg(crashdump)] +use {super::crashdump, std::path::Path}; use super::fpu::{FP_CONTROL_WORD_DEFAULT, FP_TAG_WORD_DEFAULT, MXCSR_DEFAULT}; #[cfg(gdb)] @@ -65,6 +67,8 @@ use super::{ use crate::hypervisor::HyperlightExit; use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags}; use crate::mem::ptr::{GuestPtr, RawPtr}; +#[cfg(crashdump)] +use crate::sandbox::uninitialized::SandboxRuntimeConfig; use crate::sandbox::SandboxConfiguration; #[cfg(gdb)] use crate::HyperlightError; @@ -302,6 +306,8 @@ pub(crate) struct HypervLinuxDriver { debug: Option, #[cfg(gdb)] gdb_conn: Option>, + #[cfg(crashdump)] + rt_cfg: SandboxRuntimeConfig, } impl HypervLinuxDriver { @@ -321,6 +327,7 @@ impl HypervLinuxDriver { pml4_ptr: GuestPtr, config: &SandboxConfiguration, #[cfg(gdb)] gdb_conn: Option>, + #[cfg(crashdump)] rt_cfg: SandboxRuntimeConfig, ) -> Result { let mshv = Mshv::new()?; let pr = Default::default(); @@ -408,6 +415,8 @@ impl HypervLinuxDriver { debug, #[cfg(gdb)] gdb_conn, + #[cfg(crashdump)] + rt_cfg, }) } @@ -747,8 +756,61 @@ impl Hypervisor for HypervLinuxDriver { } #[cfg(crashdump)] - fn get_memory_regions(&self) -> &[MemoryRegion] { - &self.mem_regions + fn crashdump_context(&self) -> Result> { + if self.rt_cfg.guest_core_dump { + let mut regs = [0; 27]; + + let vcpu_regs = self.vcpu_fd.get_regs()?; + let sregs = self.vcpu_fd.get_sregs()?; + let xsave = self.vcpu_fd.get_xsave()?; + + // Set up the registers for the crash dump + regs[0] = vcpu_regs.r15; // r15 + regs[1] = vcpu_regs.r14; // r14 + regs[2] = vcpu_regs.r13; // r13 + regs[3] = vcpu_regs.r12; // r12 + regs[4] = vcpu_regs.rbp; // rbp + regs[5] = vcpu_regs.rbx; // rbx + regs[6] = vcpu_regs.r11; // r11 + regs[7] = vcpu_regs.r10; // r10 + regs[8] = vcpu_regs.r9; // r9 + regs[9] = vcpu_regs.r8; // r8 + regs[10] = vcpu_regs.rax; // rax + regs[11] = vcpu_regs.rcx; // rcx + regs[12] = vcpu_regs.rdx; // rdx + regs[13] = vcpu_regs.rsi; // rsi + regs[14] = vcpu_regs.rdi; // rdi + regs[15] = 0; // orig rax + regs[16] = vcpu_regs.rip; // rip + regs[17] = sregs.cs.selector as u64; // cs + regs[18] = vcpu_regs.rflags; // eflags + regs[19] = vcpu_regs.rsp; // rsp + regs[20] = sregs.ss.selector as u64; // ss + regs[21] = sregs.fs.base; // fs_base + regs[22] = sregs.gs.base; // gs_base + regs[23] = sregs.ds.selector as u64; // ds + regs[24] = sregs.es.selector as u64; // es + regs[25] = sregs.fs.selector as u64; // fs + regs[26] = sregs.gs.selector as u64; // gs + + // Get the filename from the binary path + let filename = self.rt_cfg.binary_path.clone().and_then(|path| { + Path::new(&path) + .file_name() + .and_then(|name| name.to_os_string().into_string().ok()) + }); + + Ok(Some(crashdump::CrashDumpContext::new( + &self.mem_regions, + regs, + xsave.buffer.to_vec(), + self.entrypoint, + self.rt_cfg.binary_path.clone(), + filename, + ))) + } else { + Ok(None) + } } #[cfg(gdb)] @@ -872,6 +934,15 @@ mod tests { &config, #[cfg(gdb)] None, + #[cfg(crashdump)] + SandboxRuntimeConfig { + #[cfg(crashdump)] + binary_path: None, + #[cfg(gdb)] + debug_info: None, + #[cfg(crashdump)] + guest_core_dump: true, + }, ) .unwrap(); } diff --git a/src/hyperlight_host/src/hypervisor/hyperv_windows.rs b/src/hyperlight_host/src/hypervisor/hyperv_windows.rs index f692ec179..86603f7e2 100644 --- a/src/hyperlight_host/src/hypervisor/hyperv_windows.rs +++ b/src/hyperlight_host/src/hypervisor/hyperv_windows.rs @@ -30,6 +30,8 @@ use windows::Win32::System::Hypervisor::{ WHV_REGISTER_VALUE, WHV_RUN_VP_EXIT_CONTEXT, WHV_RUN_VP_EXIT_REASON, WHV_X64_SEGMENT_REGISTER, WHV_X64_SEGMENT_REGISTER_0, }; +#[cfg(crashdump)] +use {super::crashdump, std::path::Path}; use super::fpu::{FP_TAG_WORD_DEFAULT, MXCSR_DEFAULT}; #[cfg(gdb)] @@ -48,6 +50,8 @@ use crate::hypervisor::fpu::FP_CONTROL_WORD_DEFAULT; use crate::hypervisor::wrappers::WHvGeneralRegisters; use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags}; use crate::mem::ptr::{GuestPtr, RawPtr}; +#[cfg(crashdump)] +use crate::sandbox::uninitialized::SandboxRuntimeConfig; use crate::{debug, new_error, Result}; /// A Hypervisor driver for HyperV-on-Windows. @@ -60,6 +64,8 @@ pub(crate) struct HypervWindowsDriver { orig_rsp: GuestPtr, mem_regions: Vec, interrupt_handle: Arc, + #[cfg(crashdump)] + rt_cfg: SandboxRuntimeConfig, } /* This does not automatically impl Send/Sync because the host * address of the shared memory region is a raw pointer, which are @@ -70,6 +76,7 @@ unsafe impl Send for HypervWindowsDriver {} unsafe impl Sync for HypervWindowsDriver {} impl HypervWindowsDriver { + #[allow(clippy::too_many_arguments)] #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] pub(crate) fn new( mem_regions: Vec, @@ -79,6 +86,7 @@ impl HypervWindowsDriver { entrypoint: u64, rsp: u64, mmap_file_handle: HandleWrapper, + #[cfg(crashdump)] rt_cfg: SandboxRuntimeConfig, ) -> Result { // create and setup hypervisor partition let mut partition = VMPartition::new(1)?; @@ -113,6 +121,8 @@ impl HypervWindowsDriver { partition_handle, dropped: AtomicBool::new(false), }), + #[cfg(crashdump)] + rt_cfg, }) } @@ -515,8 +525,61 @@ impl Hypervisor for HypervWindowsDriver { } #[cfg(crashdump)] - fn get_memory_regions(&self) -> &[MemoryRegion] { - &self.mem_regions + fn crashdump_context(&self) -> Result> { + if self.rt_cfg.guest_core_dump { + let mut regs = [0; 27]; + + let vcpu_regs = self.processor.get_regs()?; + let sregs = self.processor.get_sregs()?; + let xsave = self.processor.get_xsave()?; + + // Set the registers in the order expected by the crashdump context + regs[0] = vcpu_regs.r15; // r15 + regs[1] = vcpu_regs.r14; // r14 + regs[2] = vcpu_regs.r13; // r13 + regs[3] = vcpu_regs.r12; // r12 + regs[4] = vcpu_regs.rbp; // rbp + regs[5] = vcpu_regs.rbx; // rbx + regs[6] = vcpu_regs.r11; // r11 + regs[7] = vcpu_regs.r10; // r10 + regs[8] = vcpu_regs.r9; // r9 + regs[9] = vcpu_regs.r8; // r8 + regs[10] = vcpu_regs.rax; // rax + regs[11] = vcpu_regs.rcx; // rcx + regs[12] = vcpu_regs.rdx; // rdx + regs[13] = vcpu_regs.rsi; // rsi + regs[14] = vcpu_regs.rdi; // rdi + regs[15] = 0; // orig rax + regs[16] = vcpu_regs.rip; // rip + regs[17] = unsafe { sregs.cs.Segment.Selector } as u64; // cs + regs[18] = vcpu_regs.rflags; // eflags + regs[19] = vcpu_regs.rsp; // rsp + regs[20] = unsafe { sregs.ss.Segment.Selector } as u64; // ss + regs[21] = unsafe { sregs.fs.Segment.Base }; // fs_base + regs[22] = unsafe { sregs.gs.Segment.Base }; // gs_base + regs[23] = unsafe { sregs.ds.Segment.Selector } as u64; // ds + regs[24] = unsafe { sregs.es.Segment.Selector } as u64; // es + regs[25] = unsafe { sregs.fs.Segment.Selector } as u64; // fs + regs[26] = unsafe { sregs.gs.Segment.Selector } as u64; // gs + + // Get the filename from the config + let filename = self.rt_cfg.binary_path.clone().and_then(|path| { + Path::new(&path) + .file_name() + .and_then(|name| name.to_os_string().into_string().ok()) + }); + + Ok(Some(crashdump::CrashDumpContext::new( + &self.mem_regions, + regs, + xsave, + self.entrypoint, + self.rt_cfg.binary_path.clone(), + filename, + ))) + } else { + Ok(None) + } } } diff --git a/src/hyperlight_host/src/hypervisor/hypervisor_handler.rs b/src/hyperlight_host/src/hypervisor/hypervisor_handler.rs new file mode 100644 index 000000000..314cde3db --- /dev/null +++ b/src/hyperlight_host/src/hypervisor/hypervisor_handler.rs @@ -0,0 +1,1082 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#[cfg(target_os = "windows")] +use core::ffi::c_void; +use std::ops::DerefMut; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::{Arc, Mutex}; +use std::thread; +use std::thread::{sleep, JoinHandle}; +use std::time::Duration; + +#[cfg(target_os = "linux")] +use crossbeam::atomic::AtomicCell; +use crossbeam_channel::{Receiver, Sender}; +#[cfg(target_os = "linux")] +use libc::{pthread_kill, pthread_self, ESRCH}; +use log::{error, info, LevelFilter}; +use tracing::{instrument, Span}; +#[cfg(target_os = "linux")] +use vmm_sys_util::signal::SIGRTMIN; +#[cfg(target_os = "windows")] +use windows::Win32::System::Hypervisor::{WHvCancelRunVirtualProcessor, WHV_PARTITION_HANDLE}; + +#[cfg(gdb)] +use super::gdb::create_gdb_thread; +#[cfg(gdb)] +use crate::hypervisor::handlers::DbgMemAccessHandlerWrapper; +use crate::hypervisor::handlers::{MemAccessHandlerWrapper, OutBHandlerWrapper}; +#[cfg(target_os = "windows")] +use crate::hypervisor::wrappers::HandleWrapper; +use crate::hypervisor::Hypervisor; +use crate::mem::layout::SandboxMemoryLayout; +use crate::mem::mgr::SandboxMemoryManager; +use crate::mem::ptr::{GuestPtr, RawPtr}; +use crate::mem::ptr_offset::Offset; +use crate::mem::shared_mem::{GuestSharedMemory, HostSharedMemory, SharedMemory}; +#[cfg(gdb)] +use crate::sandbox::config::DebugInfo; +use crate::sandbox::hypervisor::{get_available_hypervisor, HypervisorType}; +#[cfg(any(crashdump, gdb))] +use crate::sandbox::uninitialized::SandboxRuntimeConfig; +#[cfg(target_os = "linux")] +use crate::signal_handlers::setup_signal_handlers; +use crate::HyperlightError::{ + GuestExecutionHungOnHostFunctionCall, + HypervisorHandlerExecutionCancelAttemptOnFinishedExecution, NoHypervisorFound, +}; +use crate::{log_then_return, new_error, HyperlightError, Result}; + +type HypervisorHandlerTx = Sender; +type HypervisorHandlerRx = Receiver; +type HandlerMsgTx = Sender; +type HandlerMsgRx = Receiver; + +#[derive(Clone)] +pub(crate) struct HypervisorHandler { + communication_channels: HvHandlerCommChannels, + configuration: HvHandlerConfig, + execution_variables: HvHandlerExecVars, +} + +impl HypervisorHandler { + pub(crate) fn set_running(&self, running: bool) { + self.execution_variables + .running + .store(running, Ordering::SeqCst); + } + + #[cfg(target_os = "linux")] + pub(crate) fn set_run_cancelled(&self, run_cancelled: bool) { + self.execution_variables.run_cancelled.store(run_cancelled); + } +} + +// Note: `join_handle` and `running` have to be `Arc` because we need +// this struct to be `Clone` to be able to pass it to the Hypervisor handler thread. +// +// `join_handle` also has to be `Mutex` because we need to be able to `take` it when we +// `try_join_hypervisor_handler_thread`. +#[derive(Clone)] +struct HvHandlerExecVars { + join_handle: Arc>>>>, + shm: Arc>>>, + timeout: Arc>, + #[cfg(target_os = "linux")] + thread_id: Arc>>, + #[cfg(target_os = "windows")] + partition_handle: Arc>>, + running: Arc, + #[cfg(target_os = "linux")] + run_cancelled: Arc>, +} + +impl HvHandlerExecVars { + /// Sets the `join_handle`, to be called `thread::spawn` in `start_hypervisor_handler`. + fn set_join_handle(&mut self, join_handle: JoinHandle>) -> Result<()> { + *self + .join_handle + .try_lock() + .map_err(|_| new_error!("Failed to set_join_handle"))? = Some(join_handle); + + Ok(()) + } + + #[cfg(target_os = "linux")] + fn set_thread_id(&mut self, thread_id: libc::pthread_t) -> Result<()> { + *self + .thread_id + .try_lock() + .map_err(|_| new_error!("Failed to set_thread_id"))? = Some(thread_id); + + Ok(()) + } + + #[cfg(target_os = "linux")] + fn get_thread_id(&self) -> Result { + (*self + .thread_id + .try_lock() + .map_err(|_| new_error!("Failed to get_thread_id"))?) + .ok_or_else(|| new_error!("thread_id not set")) + } + + #[cfg(target_os = "windows")] + fn set_partition_handle(&mut self, partition_handle: WHV_PARTITION_HANDLE) -> Result<()> { + *self + .partition_handle + .try_lock() + .map_err(|_| new_error!("Failed to set_partition_handle"))? = Some(partition_handle); + + Ok(()) + } + + #[cfg(target_os = "windows")] + fn get_partition_handle(&self) -> Result> { + Ok(*self + .partition_handle + .try_lock() + .map_err(|_| new_error!("Failed to get_partition_handle"))?) + } + + fn set_timeout(&mut self, timeout: Duration) -> Result<()> { + *self + .timeout + .try_lock() + .map_err(|_| new_error!("Failed to set_timeout"))? = timeout; + + Ok(()) + } + + fn get_timeout(&self) -> Result { + Ok(*self + .timeout + .try_lock() + .map_err(|_| new_error!("Failed to get_timeout"))?) + } +} + +#[derive(Clone)] +struct HvHandlerCommChannels { + to_handler_tx: HypervisorHandlerTx, + to_handler_rx: HypervisorHandlerRx, + from_handler_tx: HandlerMsgTx, + from_handler_rx: HandlerMsgRx, +} + +#[derive(Clone)] +pub(crate) struct HvHandlerConfig { + pub(crate) peb_addr: RawPtr, + pub(crate) seed: u64, + pub(crate) page_size: u32, + pub(crate) dispatch_function_addr: Arc>>, + pub(crate) max_init_time: Duration, + pub(crate) max_exec_time: Duration, + pub(crate) outb_handler: OutBHandlerWrapper, + pub(crate) mem_access_handler: MemAccessHandlerWrapper, + pub(crate) max_wait_for_cancellation: Duration, + pub(crate) max_guest_log_level: Option, + #[cfg(gdb)] + pub(crate) dbg_mem_access_handler: DbgMemAccessHandlerWrapper, +} + +impl HypervisorHandler { + /// Creates a new Hypervisor Handler with a given configuration. This call must precede a call + /// to `start_hypervisor_handler`. + pub(crate) fn new(configuration: HvHandlerConfig) -> Self { + let (to_handler_tx, to_handler_rx) = crossbeam_channel::unbounded(); + let (from_handler_tx, from_handler_rx) = crossbeam_channel::unbounded(); + + let communication_channels = HvHandlerCommChannels { + to_handler_tx, + to_handler_rx, + from_handler_tx, + from_handler_rx, + }; + + let execution_variables = HvHandlerExecVars { + join_handle: Arc::new(Mutex::new(None)), + shm: Arc::new(Mutex::new(None)), + #[cfg(target_os = "linux")] + thread_id: Arc::new(Mutex::new(None)), + #[cfg(target_os = "windows")] + partition_handle: Arc::new(Mutex::new(None)), + running: Arc::new(AtomicBool::new(false)), + #[cfg(target_os = "linux")] + run_cancelled: Arc::new(AtomicCell::new(false)), + timeout: Arc::new(Mutex::new(configuration.max_init_time)), + }; + + Self { + communication_channels, + configuration, + execution_variables, + } + } + + /// Sets up a Hypervisor 'handler', designed to listen to messages to execute a specific action, + /// such as: + /// - `initialise` resources, + /// - `dispatch_call_from_host` in the vCPU, and + /// - `terminate_execution` of the vCPU. + /// + /// To send messages to the hypervisor handler thread, use `execute_hypervisor_handler_action`. + #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] + pub(crate) fn start_hypervisor_handler( + &mut self, + sandbox_memory_manager: SandboxMemoryManager, + #[cfg(any(crashdump, gdb))] rt_cfg: SandboxRuntimeConfig, + ) -> Result<()> { + let configuration = self.configuration.clone(); + + *self + .execution_variables + .shm + .try_lock() + .map_err(|e| new_error!("Failed to lock shm: {}", e))? = Some(sandbox_memory_manager); + + // Other than running initialization and code execution, the handler thread also handles + // cancellation. When we need to cancel the execution there are 2 possible cases + // we have to deal with depending on if the vCPU is currently running or not. + // + // 1. If the vCPU is executing, then we need to cancel the execution. + // 2. If the vCPU is not executing, then we need to signal to the thread + // that it should exit the loop. + // + // For the first case, on Linux, we send a signal to the thread running the + // vCPU to interrupt it and cause an EINTR error on the underlying VM run call. + // + // For the second case, we set a flag that is checked on each iteration of the run loop + // and if it is set to true then the loop will exit. + + // On Linux, we have another problem to deal with. The way we terminate a running vCPU + // (case 1 above) is to send a signal to the thread running the vCPU to interrupt it. + // + // There is a possibility that the signal is sent and received just before the thread + // calls run on the vCPU (between the check on the cancelled_run variable and the call to run) + // - see this StackOverflow question for more details + // https://stackoverflow.com/questions/25799667/fixing-race-condition-when-sending-signal-to-interrupt-system-call) + // + // To solve this, we need to keep sending the signal until we know that the spawned thread + // knows it should cancel the execution. + #[cfg(target_os = "linux")] + self.execution_variables.run_cancelled.store(false); + + let to_handler_rx = self.communication_channels.to_handler_rx.clone(); + let mut execution_variables = self.execution_variables.clone(); + let from_handler_tx = self.communication_channels.from_handler_tx.clone(); + let hv_handler_clone = self.clone(); + + // Hyperlight has two signal handlers: + // (1) for timeouts, and + // (2) for seccomp (when enabled). + // + // This sets up Hyperlight signal handlers for the process, which are chained + // to the existing signal handlers. + #[cfg(target_os = "linux")] + setup_signal_handlers()?; + + let join_handle = { + thread::Builder::new() + .name("Hypervisor Handler".to_string()) + .spawn(move || -> Result<()> { + let mut hv: Option> = None; + for action in to_handler_rx { + match action { + HypervisorHandlerAction::Initialise => { + { + hv = Some(set_up_hypervisor_partition( + execution_variables.shm.try_lock().map_err(|e| new_error!("Failed to lock shm: {}", e))?.deref_mut().as_mut().ok_or_else(|| new_error!("shm not set"))?, + #[cfg(any(crashdump, gdb))] + &rt_cfg, + )?); + } + let hv = hv.as_mut().ok_or_else(|| new_error!("Hypervisor not set"))?; + + #[cfg(target_os = "windows")] + execution_variables.set_partition_handle(hv.get_partition_handle())?; + #[cfg(target_os = "linux")] + { + // We cannot use the Killable trait, so we get the `pthread_t` via a libc + // call. + execution_variables.set_thread_id(unsafe { pthread_self() })?; + } + + #[cfg(target_os = "linux")] + execution_variables.run_cancelled.store(false); + + log::info!("Initialising Hypervisor Handler"); + + let mut evar_lock_guard = + execution_variables.shm.try_lock().map_err(|e| { + new_error!( + "Error locking exec var shm lock: {}:{}: {}", + file!(), + line!(), + e + ) + })?; + // This apparently-useless lock is + // needed to ensure the host does not + // make unsynchronized accesses while + // the guest is executing. See the + // documentation for + // GuestSharedMemory::lock. + let mem_lock_guard = evar_lock_guard + .as_mut() + .ok_or_else(|| { + new_error!("guest shm lock: {}:{}:", file!(), line!()) + })? + .shared_mem + .lock + .try_read(); + + let res = hv.initialise( + configuration.peb_addr.clone(), + configuration.seed, + configuration.page_size, + configuration.outb_handler.clone(), + configuration.mem_access_handler.clone(), + Some(hv_handler_clone.clone()), + configuration.max_guest_log_level, + #[cfg(gdb)] + configuration.dbg_mem_access_handler.clone(), + ); + drop(mem_lock_guard); + drop(evar_lock_guard); + + execution_variables.running.store(false, Ordering::SeqCst); + + match res { + Ok(_) => { + log::info!("Initialised Hypervisor Handler"); + from_handler_tx + .send(HandlerMsg::FinishedHypervisorHandlerAction) + .map_err(|_| { + HyperlightError::HypervisorHandlerCommunicationFailure() + })?; + } + Err(e) => { + log::info!( + "Error initialising Hypervisor Handler: {:?}", + e + ); + from_handler_tx.send(HandlerMsg::Error(e)).map_err(|_| { + HyperlightError::HypervisorHandlerCommunicationFailure() + })?; + } + } + } + HypervisorHandlerAction::DispatchCallFromHost(function_name) => { + let hv = hv.as_mut().ok_or_else(|| new_error!("Hypervisor not initialized"))?; + + #[cfg(target_os = "linux")] + execution_variables.run_cancelled.store(false); + + info!("Dispatching call from host: {}", function_name); + + let dispatch_function_addr = configuration + .dispatch_function_addr + .clone() + .try_lock() + .map_err(|e| { + new_error!( + "Error locking at {}:{}: {}", + file!(), + line!(), + e + ) + })? + .clone() + .ok_or_else(|| new_error!("Hypervisor not initialized"))?; + + let mut evar_lock_guard = + execution_variables.shm.try_lock().map_err(|e| { + new_error!( + "Error locking exec var shm lock: {}:{}: {}", + file!(), + line!(), + e + ) + })?; + // This apparently-useless lock is + // needed to ensure the host does not + // make unsynchronized accesses while + // the guest is executing. See the + // documentation for + // GuestSharedMemory::lock. + let mem_lock_guard = evar_lock_guard + .as_mut() + .ok_or_else(|| { + new_error!("guest shm lock {}:{}", file!(), line!()) + })? + .shared_mem + .lock + .try_read(); + + let res = crate::metrics::maybe_time_and_emit_guest_call( + &function_name, + || { + hv.dispatch_call_from_host( + dispatch_function_addr, + configuration.outb_handler.clone(), + configuration.mem_access_handler.clone(), + Some(hv_handler_clone.clone()), + #[cfg(gdb)] + configuration.dbg_mem_access_handler.clone(), + ) + }, + ); + + drop(mem_lock_guard); + drop(evar_lock_guard); + + execution_variables.running.store(false, Ordering::SeqCst); + + match res { + Ok(_) => { + log::info!( + "Finished dispatching call from host: {}", + function_name + ); + from_handler_tx + .send(HandlerMsg::FinishedHypervisorHandlerAction) + .map_err(|_| { + HyperlightError::HypervisorHandlerCommunicationFailure() + })?; + } + Err(e) => { + log::info!( + "Error dispatching call from host: {}: {:?}", + function_name, + e + ); + from_handler_tx.send(HandlerMsg::Error(e)).map_err(|_| { + HyperlightError::HypervisorHandlerCommunicationFailure() + })?; + } + } + } + HypervisorHandlerAction::TerminateHandlerThread => { + info!("Terminating Hypervisor Handler Thread"); + break; + } + } + } + + // If we make it here, it means the main thread issued a `TerminateHandlerThread` action, + // and we are now exiting the handler thread. + { + from_handler_tx + .send(HandlerMsg::FinishedHypervisorHandlerAction) + .map_err(|_| { + HyperlightError::HypervisorHandlerCommunicationFailure() + })?; + } + + Ok(()) + }) + }; + + self.execution_variables.set_join_handle(join_handle?)?; + + Ok(()) + } + + /// Try `join` on `HypervisorHandler` thread for `timeout` duration. + /// - Before attempting a join, this function checks if execution isn't already finished. + /// Note: This function call takes ownership of the `JoinHandle`. + #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] + pub(crate) fn try_join_hypervisor_handler_thread(&mut self) -> Result<()> { + let mut join_handle_guard = self + .execution_variables + .join_handle + .try_lock() + .map_err(|e| new_error!("Error locking at {}:{}: {}", file!(), line!(), e))?; + if let Some(handle) = join_handle_guard.take() { + // check if thread is handle.is_finished for `timeout` + // note: dropping the transmitter in `kill_hypervisor_handler_thread` + // should have caused the thread to finish, in here, we are just syncing. + let now = std::time::Instant::now(); + + while now.elapsed() < self.execution_variables.get_timeout()? { + if handle.is_finished() { + match handle.join() { + // as per docs, join should return immediately and not hang if finished + Ok(Ok(())) => return Ok(()), + Ok(Err(e)) => { + log_then_return!(e); + } + Err(e) => { + log_then_return!(new_error!("{:?}", e)); + } + } + } + sleep(Duration::from_millis(1)); // sleep to not busy wait + } + } + + return Err(HyperlightError::Error( + "Failed to finish Hypervisor handler thread".to_string(), + )); + } + + /// Tries to kill the Hypervisor Handler Thread. + #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] + pub(crate) fn kill_hypervisor_handler_thread(&mut self) -> Result<()> { + log::debug!("Killing Hypervisor Handler Thread"); + self.execute_hypervisor_handler_action(HypervisorHandlerAction::TerminateHandlerThread)?; + + self.try_join_hypervisor_handler_thread() + } + + /// Send a message to the Hypervisor Handler and wait for a response. + /// + /// This function should be used for most interactions with the Hypervisor + /// Handler. + pub(crate) fn execute_hypervisor_handler_action( + &mut self, + hypervisor_handler_action: HypervisorHandlerAction, + ) -> Result<()> { + log::debug!( + "Sending Hypervisor Handler Action: {:?}", + hypervisor_handler_action + ); + + match hypervisor_handler_action { + HypervisorHandlerAction::Initialise => self + .execution_variables + .set_timeout(self.configuration.max_init_time)?, + HypervisorHandlerAction::DispatchCallFromHost(_) => self + .execution_variables + .set_timeout(self.configuration.max_exec_time)?, + HypervisorHandlerAction::TerminateHandlerThread => self + .execution_variables + .set_timeout(self.configuration.max_init_time)?, + // note: terminate can never hang, so setting the timeout for it is just + // for completion of the match statement, and it is not really needed for + // `TerminateHandlerThread`. + } + + self.set_running(true); + self.communication_channels + .to_handler_tx + .send(hypervisor_handler_action) + .map_err(|_| HyperlightError::HypervisorHandlerCommunicationFailure())?; + + log::debug!("Waiting for Hypervisor Handler Response"); + + self.try_receive_handler_msg() + } + + /// Try to receive a `HandlerMsg` from the Hypervisor Handler Thread. + /// + /// Usually, you should use `execute_hypervisor_handler_action` to send and instantly + /// try to receive a message. + /// + /// This function is only useful when we time out, handle a timeout, + /// and still have to receive after sorting that out without sending + /// an extra message. + pub(crate) fn try_receive_handler_msg(&self) -> Result<()> { + // When gdb debugging is enabled, we don't want to timeout on receiving messages + // from the handler thread, as the thread may be paused by gdb. + // In this case, we will wait indefinitely for a message from the handler thread. + // Note: This applies to all the running sandboxes, not just the one being debugged. + #[cfg(gdb)] + let response = self.communication_channels.from_handler_rx.recv(); + #[cfg(not(gdb))] + let response = self + .communication_channels + .from_handler_rx + .recv_timeout(self.execution_variables.get_timeout()?); + + match response { + Ok(msg) => match msg { + HandlerMsg::Error(e) => Err(e), + HandlerMsg::FinishedHypervisorHandlerAction => Ok(()), + }, + Err(_) => { + // If we have timed out it may be that the handler thread returned an error before it sent a message, so rather than just timeout here + // we will try and get the join handle for the thread and if it has finished check to see if it returned an error + // if it did then we will return that error, otherwise we will return the timeout error + // we need to take ownership of the handle to join it + match self + .execution_variables + .join_handle + .try_lock() + .map_err(|_| HyperlightError::HypervisorHandlerMessageReceiveTimedout())? + .take_if(|handle| handle.is_finished()) + { + Some(handle) => { + // If the thread has finished, we try to join it and return the error if it has one + let res = handle.join(); + if res.as_ref().is_ok_and(|inner_res| inner_res.is_err()) { + #[allow(clippy::unwrap_used)] + // We know that the thread has finished and that the inner result is an error, so we can safely unwrap the result and the contained err + return Err(res.unwrap().unwrap_err()); + } + Err(HyperlightError::HypervisorHandlerMessageReceiveTimedout()) + } + None => Err(HyperlightError::HypervisorHandlerMessageReceiveTimedout()), + } + } + } + } + + /// Terminate the execution of the hypervisor handler + /// + /// This function is intended to be called after a guest function called has + /// timed-out (i.e., `from_handler_rx.recv_timeout(timeout).is_err()`). + /// + /// It is possible that, even after we timed-out, the guest function execution will + /// finish. If that is the case, this function is fundamentally a NOOP, because it + /// will restore the memory snapshot to the last state, and then re-initialise the + /// accidentally terminated vCPU. + /// + /// This function, usually, will return one of the following HyperlightError's + /// - `ExecutionCanceledByHost` if the execution was successfully terminated, or + /// - `HypervisorHandlerExecutionCancelAttemptOnFinishedExecution` if the execution + /// finished while we tried to terminate it. + /// + /// Hence, common usage of this function would be to match on the result. If you get a + /// `HypervisorHandlerExecutionCancelAttemptOnFinishedExecution`, you can safely ignore + /// retrieve the return value from shared memory. + pub(crate) fn terminate_hypervisor_handler_execution_and_reinitialise( + &mut self, + sandbox_memory_manager: &mut SandboxMemoryManager, + ) -> Result { + { + if !self.execution_variables.running.load(Ordering::SeqCst) { + info!("Execution finished while trying to cancel it"); + return Ok(HypervisorHandlerExecutionCancelAttemptOnFinishedExecution()); + } else { + self.terminate_execution()?; + } + } + + { + sleep(self.configuration.max_wait_for_cancellation); + // check if still running + if self.execution_variables.running.load(Ordering::SeqCst) { + // If we still fail to acquire the hv_lock, this means that + // we had actually timed-out on a host function call as the + // `WHvCancelRunVirtualProcessor` didn't unlock. + + log::info!("Tried to cancel guest execution on host function call"); + return Err(GuestExecutionHungOnHostFunctionCall()); + } + } + + // Receive `ExecutionCancelledByHost` or other + let res = match self.try_receive_handler_msg() { + Ok(_) => Ok(new_error!( + "Expected ExecutionCanceledByHost, but received FinishedHypervisorHandlerAction" + )), + Err(e) => match e { + HyperlightError::ExecutionCanceledByHost() => { + Ok(HyperlightError::ExecutionCanceledByHost()) + } + _ => Ok(new_error!( + "Expected ExecutionCanceledByHost, but received: {:?}", + e + )), + }, + }; + + // We cancelled execution, so we restore the state to what it was prior to the bad state + // that caused the timeout. + sandbox_memory_manager.restore_state_from_last_snapshot()?; + + // Re-initialise the vCPU. + // This is 100% needed because, otherwise, all it takes to cause a DoS is for a + // function to timeout as the vCPU will be in a bad state without re-init. + log::debug!("Re-initialising vCPU"); + self.execute_hypervisor_handler_action(HypervisorHandlerAction::Initialise)?; + + res + } + + pub(crate) fn set_dispatch_function_addr( + &mut self, + dispatch_function_addr: RawPtr, + ) -> Result<()> { + *self + .configuration + .dispatch_function_addr + .try_lock() + .map_err(|_| new_error!("Failed to set_dispatch_function_addr"))? = + Some(dispatch_function_addr); + + Ok(()) + } + + #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] + pub(crate) fn terminate_execution(&self) -> Result<()> { + error!( + "Execution timed out after {} milliseconds , cancelling execution", + self.execution_variables.get_timeout()?.as_millis() + ); + + #[cfg(target_os = "linux")] + { + let thread_id = self.execution_variables.get_thread_id()?; + if thread_id == u64::MAX { + log_then_return!("Failed to get thread id to signal thread"); + } + let mut count: u128 = 0; + // We need to send the signal multiple times in case the thread was between checking if it + // should be cancelled and entering the run loop + + // We cannot do this forever (if the thread is calling a host function that never + // returns we will sit here forever), so use the timeout_wait_to_cancel to limit the number + // of iterations + + let number_of_iterations = + self.configuration.max_wait_for_cancellation.as_micros() / 500; + + while !self.execution_variables.run_cancelled.load() { + count += 1; + + if count > number_of_iterations { + break; + } + + info!( + "Sending signal to thread {} iteration: {}", + thread_id, count + ); + + let ret = unsafe { pthread_kill(thread_id, SIGRTMIN()) }; + // We may get ESRCH if we try to signal a thread that has already exited + if ret < 0 && ret != ESRCH { + log_then_return!("error {} calling pthread_kill", ret); + } + std::thread::sleep(Duration::from_micros(500)); + } + if !self.execution_variables.run_cancelled.load() { + log_then_return!(GuestExecutionHungOnHostFunctionCall()); + } + } + #[cfg(target_os = "windows")] + { + unsafe { + WHvCancelRunVirtualProcessor( + #[allow(clippy::unwrap_used)] + self.execution_variables.get_partition_handle()?.unwrap(), // safe unwrap as we checked is some + 0, + 0, + ) + .map_err(|e| new_error!("Failed to cancel guest execution {:?}", e))?; + } + } + + Ok(()) + } +} + +/// `HypervisorHandlerActions` enumerates the +/// possible actions that a Hypervisor +/// handler can execute. +pub enum HypervisorHandlerAction { + /// Initialise the vCPU + Initialise, + /// Execute a function call (String = name) from the host + DispatchCallFromHost(String), + /// Terminate hypervisor handler thread + TerminateHandlerThread, +} + +// Debug impl for HypervisorHandlerAction: +// - just prints the enum variant type name. +impl std::fmt::Debug for HypervisorHandlerAction { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + HypervisorHandlerAction::Initialise => write!(f, "Initialise"), + HypervisorHandlerAction::DispatchCallFromHost(_) => write!(f, "DispatchCallFromHost"), + HypervisorHandlerAction::TerminateHandlerThread => write!(f, "TerminateHandlerThread"), + } + } +} + +/// `HandlerMsg` is structure used by the Hypervisor +/// handler to indicate that the Hypervisor Handler has +/// finished performing an action (i.e., `DispatchCallFromHost`, or +/// `Initialise`). +pub enum HandlerMsg { + FinishedHypervisorHandlerAction, + Error(HyperlightError), +} + +fn set_up_hypervisor_partition( + mgr: &mut SandboxMemoryManager, + #[cfg(any(crashdump, gdb))] rt_cfg: &SandboxRuntimeConfig, +) -> Result> { + let mem_size = u64::try_from(mgr.shared_mem.mem_size())?; + let mut regions = mgr.layout.get_memory_regions(&mgr.shared_mem)?; + let rsp_ptr = { + let rsp_u64 = mgr.set_up_shared_memory(mem_size, &mut regions)?; + let rsp_raw = RawPtr::from(rsp_u64); + GuestPtr::try_from(rsp_raw) + }?; + let base_ptr = GuestPtr::try_from(Offset::from(0))?; + let pml4_ptr = { + let pml4_offset_u64 = u64::try_from(SandboxMemoryLayout::PML4_OFFSET)?; + base_ptr + Offset::from(pml4_offset_u64) + }; + let entrypoint_ptr = { + let entrypoint_total_offset = mgr.load_addr.clone() + mgr.entrypoint_offset; + GuestPtr::try_from(entrypoint_total_offset) + }?; + + if base_ptr != pml4_ptr { + log_then_return!( + "Error: base_ptr ({:#?}) does not equal pml4_ptr ({:#?})", + base_ptr, + pml4_ptr + ); + } + if entrypoint_ptr <= pml4_ptr { + log_then_return!( + "Error: entrypoint_ptr ({:#?}) is not greater than pml4_ptr ({:#?})", + entrypoint_ptr, + pml4_ptr + ); + } + + // Create gdb thread if gdb is enabled and the configuration is provided + #[cfg(gdb)] + let gdb_conn = if let Some(DebugInfo { port }) = rt_cfg.debug_info { + let gdb_conn = create_gdb_thread(port, unsafe { pthread_self() }); + + // in case the gdb thread creation fails, we still want to continue + // without gdb + match gdb_conn { + Ok(gdb_conn) => Some(gdb_conn), + Err(e) => { + log::error!("Could not create gdb connection: {:#}", e); + + None + } + } + } else { + None + }; + + match *get_available_hypervisor() { + #[cfg(mshv)] + Some(HypervisorType::Mshv) => { + let hv = crate::hypervisor::hyperv_linux::HypervLinuxDriver::new( + regions, + entrypoint_ptr, + rsp_ptr, + pml4_ptr, + #[cfg(gdb)] + gdb_conn, + #[cfg(crashdump)] + rt_cfg.clone(), + )?; + Ok(Box::new(hv)) + } + + #[cfg(kvm)] + Some(HypervisorType::Kvm) => { + let hv = crate::hypervisor::kvm::KVMDriver::new( + regions, + pml4_ptr.absolute()?, + entrypoint_ptr.absolute()?, + rsp_ptr.absolute()?, + #[cfg(gdb)] + gdb_conn, + #[cfg(crashdump)] + rt_cfg.clone(), + )?; + Ok(Box::new(hv)) + } + + #[cfg(target_os = "windows")] + Some(HypervisorType::Whp) => { + let mmap_file_handle = mgr + .shared_mem + .with_exclusivity(|e| e.get_mmap_file_handle())?; + let hv = crate::hypervisor::hyperv_windows::HypervWindowsDriver::new( + regions, + mgr.shared_mem.raw_mem_size(), // we use raw_* here because windows driver requires 64K aligned addresses, + mgr.shared_mem.raw_ptr() as *mut c_void, // and instead convert it to base_addr where needed in the driver itself + pml4_ptr.absolute()?, + entrypoint_ptr.absolute()?, + rsp_ptr.absolute()?, + HandleWrapper::from(mmap_file_handle), + #[cfg(crashdump)] + rt_cfg.clone(), + )?; + Ok(Box::new(hv)) + } + + _ => { + log_then_return!(NoHypervisorFound()); + } + } +} + +#[cfg(test)] +mod tests { + use std::sync::{Arc, Barrier}; + use std::thread; + + use hyperlight_testing::simple_guest_as_string; + + #[cfg(target_os = "windows")] + use crate::sandbox::SandboxConfiguration; + use crate::sandbox::WrapperGetter; + use crate::sandbox_state::sandbox::EvolvableSandbox; + use crate::sandbox_state::transition::Noop; + use crate::HyperlightError::HypervisorHandlerExecutionCancelAttemptOnFinishedExecution; + use crate::{ + is_hypervisor_present, GuestBinary, HyperlightError, MultiUseSandbox, Result, + UninitializedSandbox, + }; + + fn create_multi_use_sandbox() -> MultiUseSandbox { + if !is_hypervisor_present() { + panic!("Panic on create_multi_use_sandbox because no hypervisor is present"); + } + + // Tests that use this function seem to fail with timeouts sporadically on windows so timeouts are raised here + + let cfg = { + #[cfg(target_os = "windows")] + { + let mut cfg = SandboxConfiguration::default(); + cfg.set_max_initialization_time(std::time::Duration::from_secs(10)); + cfg.set_max_execution_time(std::time::Duration::from_secs(3)); + Some(cfg) + } + #[cfg(not(target_os = "windows"))] + { + None + } + }; + + let usbox = UninitializedSandbox::new( + GuestBinary::FilePath(simple_guest_as_string().expect("Guest Binary Missing")), + cfg, + ) + .unwrap(); + + usbox.evolve(Noop::default()).unwrap() + } + + #[test] + #[ignore] // this test runs by itself because it uses a lot of system resources + fn create_1000_sandboxes() { + let barrier = Arc::new(Barrier::new(21)); + + let mut handles = vec![]; + + for _ in 0..20 { + let c = barrier.clone(); + + let handle = thread::spawn(move || { + c.wait(); + + for _ in 0..50 { + create_multi_use_sandbox(); + } + }); + + handles.push(handle); + } + + barrier.wait(); + + for handle in handles { + handle.join().unwrap(); + } + } + + #[test] + fn create_10_sandboxes() { + for _ in 0..10 { + create_multi_use_sandbox(); + } + } + + #[test] + fn hello_world() -> Result<()> { + let mut sandbox = create_multi_use_sandbox(); + + let msg = "Hello, World!\n".to_string(); + let res = sandbox.call_guest_function_by_name::("PrintOutput", msg); + + assert!(res.is_ok()); + + Ok(()) + } + + #[test] + fn terminate_execution_then_call_another_function() -> Result<()> { + let mut sandbox = create_multi_use_sandbox(); + + let res = sandbox.call_guest_function_by_name::<()>("Spin", ()); + + assert!(res.is_err()); + + match res.err().unwrap() { + HyperlightError::ExecutionCanceledByHost() => {} + _ => panic!("Expected ExecutionTerminated error"), + } + + let res = sandbox.call_guest_function_by_name::("Echo", "a".to_string()); + + assert!(res.is_ok()); + + Ok(()) + } + + #[test] + fn terminate_execution_of_an_already_finished_function_then_call_another_function() -> Result<()> + { + let call_print_output = |sandbox: &mut MultiUseSandbox| { + let msg = "Hello, World!\n".to_string(); + let res = sandbox.call_guest_function_by_name::("PrintOutput", msg); + + assert!(res.is_ok()); + }; + + let mut sandbox = create_multi_use_sandbox(); + call_print_output(&mut sandbox); + + // this simulates what would happen if a function actually successfully + // finished while we attempted to terminate execution + { + match sandbox + .get_hv_handler() + .clone() + .terminate_hypervisor_handler_execution_and_reinitialise( + sandbox.get_mgr_wrapper_mut().unwrap_mgr_mut(), + )? { + HypervisorHandlerExecutionCancelAttemptOnFinishedExecution() => {} + _ => panic!("Expected error demonstrating execution wasn't cancelled properly"), + } + } + + call_print_output(&mut sandbox); + call_print_output(&mut sandbox); + + Ok(()) + } +} diff --git a/src/hyperlight_host/src/hypervisor/kvm.rs b/src/hyperlight_host/src/hypervisor/kvm.rs index c7aefebc8..af84e9bef 100644 --- a/src/hyperlight_host/src/hypervisor/kvm.rs +++ b/src/hyperlight_host/src/hypervisor/kvm.rs @@ -26,6 +26,8 @@ use kvm_ioctls::Cap::UserMemory; use kvm_ioctls::{Kvm, VcpuExit, VcpuFd, VmFd}; use log::LevelFilter; use tracing::{instrument, Span}; +#[cfg(crashdump)] +use {super::crashdump, std::path::Path}; use super::fpu::{FP_CONTROL_WORD_DEFAULT, FP_TAG_WORD_DEFAULT, MXCSR_DEFAULT}; #[cfg(gdb)] @@ -40,6 +42,8 @@ use super::{ }; use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags}; use crate::mem::ptr::{GuestPtr, RawPtr}; +#[cfg(crashdump)] +use crate::sandbox::uninitialized::SandboxRuntimeConfig; use crate::sandbox::SandboxConfiguration; #[cfg(gdb)] use crate::HyperlightError; @@ -290,6 +294,8 @@ pub(crate) struct KVMDriver { debug: Option, #[cfg(gdb)] gdb_conn: Option>, + #[cfg(crashdump)] + rt_cfg: SandboxRuntimeConfig, } impl KVMDriver { @@ -304,6 +310,7 @@ impl KVMDriver { rsp: u64, config: &SandboxConfiguration, #[cfg(gdb)] gdb_conn: Option>, + #[cfg(crashdump)] rt_cfg: SandboxRuntimeConfig, ) -> Result { let kvm = Kvm::new()?; @@ -363,6 +370,8 @@ impl KVMDriver { debug, #[cfg(gdb)] gdb_conn, + #[cfg(crashdump)] + rt_cfg, }; Ok(ret) } @@ -655,8 +664,67 @@ impl Hypervisor for KVMDriver { } #[cfg(crashdump)] - fn get_memory_regions(&self) -> &[MemoryRegion] { - &self.mem_regions + fn crashdump_context(&self) -> Result> { + if self.rt_cfg.guest_core_dump { + let mut regs = [0; 27]; + + let vcpu_regs = self.vcpu_fd.get_regs()?; + let sregs = self.vcpu_fd.get_sregs()?; + let xsave = self.vcpu_fd.get_xsave()?; + + // Set the registers in the order expected by the crashdump context + regs[0] = vcpu_regs.r15; // r15 + regs[1] = vcpu_regs.r14; // r14 + regs[2] = vcpu_regs.r13; // r13 + regs[3] = vcpu_regs.r12; // r12 + regs[4] = vcpu_regs.rbp; // rbp + regs[5] = vcpu_regs.rbx; // rbx + regs[6] = vcpu_regs.r11; // r11 + regs[7] = vcpu_regs.r10; // r10 + regs[8] = vcpu_regs.r9; // r9 + regs[9] = vcpu_regs.r8; // r8 + regs[10] = vcpu_regs.rax; // rax + regs[11] = vcpu_regs.rcx; // rcx + regs[12] = vcpu_regs.rdx; // rdx + regs[13] = vcpu_regs.rsi; // rsi + regs[14] = vcpu_regs.rdi; // rdi + regs[15] = 0; // orig rax + regs[16] = vcpu_regs.rip; // rip + regs[17] = sregs.cs.selector as u64; // cs + regs[18] = vcpu_regs.rflags; // eflags + regs[19] = vcpu_regs.rsp; // rsp + regs[20] = sregs.ss.selector as u64; // ss + regs[21] = sregs.fs.base; // fs_base + regs[22] = sregs.gs.base; // gs_base + regs[23] = sregs.ds.selector as u64; // ds + regs[24] = sregs.es.selector as u64; // es + regs[25] = sregs.fs.selector as u64; // fs + regs[26] = sregs.gs.selector as u64; // gs + + // Get the filename from the runtime config + let filename = self.rt_cfg.binary_path.clone().and_then(|path| { + Path::new(&path) + .file_name() + .and_then(|name| name.to_os_string().into_string().ok()) + }); + + // The [`CrashDumpContext`] accepts xsave as a vector of u8, so we need to convert the + // xsave region to a vector of u8 + Ok(Some(crashdump::CrashDumpContext::new( + &self.mem_regions, + regs, + xsave + .region + .iter() + .flat_map(|item| item.to_le_bytes()) + .collect::>(), + self.entrypoint, + self.rt_cfg.binary_path.clone(), + filename, + ))) + } else { + Ok(None) + } } #[cfg(gdb)] diff --git a/src/hyperlight_host/src/hypervisor/mod.rs b/src/hyperlight_host/src/hypervisor/mod.rs index 105e59d66..6cb48566b 100644 --- a/src/hyperlight_host/src/hypervisor/mod.rs +++ b/src/hyperlight_host/src/hypervisor/mod.rs @@ -227,7 +227,7 @@ pub(crate) trait Hypervisor: Debug + Sync + Send { fn as_mut_hypervisor(&mut self) -> &mut dyn Hypervisor; #[cfg(crashdump)] - fn get_memory_regions(&self) -> &[MemoryRegion]; + fn crashdump_context(&self) -> Result>; #[cfg(gdb)] /// handles the cases when the vCPU stops due to a Debug event @@ -269,7 +269,7 @@ impl VirtualCPU { } Ok(HyperlightExit::Mmio(addr)) => { #[cfg(crashdump)] - crashdump::crashdump_to_tempfile(hv)?; + crashdump::generate_crashdump(hv)?; mem_access_fn .clone() @@ -281,7 +281,7 @@ impl VirtualCPU { } Ok(HyperlightExit::AccessViolation(addr, tried, region_permission)) => { #[cfg(crashdump)] - crashdump::crashdump_to_tempfile(hv)?; + crashdump::generate_crashdump(hv)?; if region_permission.intersects(MemoryRegionFlags::STACK_GUARD) { return Err(HyperlightError::StackOverflow()); @@ -300,14 +300,14 @@ impl VirtualCPU { } Ok(HyperlightExit::Unknown(reason)) => { #[cfg(crashdump)] - crashdump::crashdump_to_tempfile(hv)?; + crashdump::generate_crashdump(hv)?; log_then_return!("Unexpected VM Exit {:?}", reason); } Ok(HyperlightExit::Retry()) => continue, Err(e) => { #[cfg(crashdump)] - crashdump::crashdump_to_tempfile(hv)?; + crashdump::generate_crashdump(hv)?; return Err(e); } @@ -455,6 +455,8 @@ pub(crate) mod tests { use crate::hypervisor::DbgMemAccessHandlerCaller; use crate::mem::ptr::RawPtr; use crate::sandbox::uninitialized::GuestBinary; + #[cfg(any(crashdump, gdb))] + use crate::sandbox::uninitialized::SandboxRuntimeConfig; use crate::sandbox::uninitialized_evolve::set_up_hypervisor_partition; use crate::sandbox::{SandboxConfiguration, UninitializedSandbox}; use crate::{is_hypervisor_present, new_error, Result}; @@ -498,10 +500,17 @@ pub(crate) mod tests { let filename = dummy_guest_as_string().map_err(|e| new_error!("{}", e))?; let config: SandboxConfiguration = Default::default(); + #[cfg(any(crashdump, gdb))] + let rt_cfg: SandboxRuntimeConfig = Default::default(); let sandbox = UninitializedSandbox::new(GuestBinary::FilePath(filename.clone()), Some(config))?; let (_hshm, mut gshm) = sandbox.mgr.build(); - let mut vm = set_up_hypervisor_partition(&mut gshm, &config)?; + let mut vm = set_up_hypervisor_partition( + &mut gshm, + &config, + #[cfg(any(crashdump, gdb))] + &rt_cfg, + )?; vm.initialise( RawPtr::from(0x230000), 1234567890, diff --git a/src/hyperlight_host/src/hypervisor/windows_hypervisor_platform.rs b/src/hyperlight_host/src/hypervisor/windows_hypervisor_platform.rs index 711be5c39..386697270 100644 --- a/src/hyperlight_host/src/hypervisor/windows_hypervisor_platform.rs +++ b/src/hyperlight_host/src/hypervisor/windows_hypervisor_platform.rs @@ -26,6 +26,8 @@ use windows_result::HRESULT; use super::wrappers::HandleWrapper; use crate::hypervisor::wrappers::{WHvFPURegisters, WHvGeneralRegisters, WHvSpecialRegisters}; use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags}; +#[cfg(crashdump)] +use crate::HyperlightError; use crate::{new_error, Result}; /// Interop calls for Windows Hypervisor Platform APIs @@ -409,6 +411,59 @@ impl VMProcessor { } } + #[cfg(crashdump)] + #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")] + pub(super) fn get_xsave(&self) -> Result> { + // Get the required buffer size by calling with NULL buffer + let mut buffer_size_needed: u32 = 0; + + unsafe { + // First call with NULL buffer to get required size + // If the buffer is not large enough, the return value is WHV_E_INSUFFICIENT_BUFFER. + // In this case, BytesWritten receives the required buffer size. + let result = WHvGetVirtualProcessorXsaveState( + self.get_partition_hdl(), + 0, + std::ptr::null_mut(), + 0, + &mut buffer_size_needed, + ); + + // If it failed for reasons other than insufficient buffer, return error + if let Err(e) = result { + if e.code() != windows::Win32::Foundation::WHV_E_INSUFFICIENT_BUFFER { + return Err(HyperlightError::WindowsAPIError(e)); + } + } + } + + // Create a buffer with the appropriate size + let mut xsave_buffer = vec![0; buffer_size_needed as usize]; + + // Get the Xsave state + let mut written_bytes = 0; + unsafe { + WHvGetVirtualProcessorXsaveState( + self.get_partition_hdl(), + 0, + xsave_buffer.as_mut_ptr() as *mut std::ffi::c_void, + buffer_size_needed, + &mut written_bytes, + ) + }?; + + // Check if the number of written bytes matches the expected size + if written_bytes != buffer_size_needed { + return Err(new_error!( + "Failed to get Xsave state: expected {} bytes, got {}", + buffer_size_needed, + written_bytes + )); + } + + Ok(xsave_buffer) + } + pub(super) fn set_fpu(&mut self, regs: &WHvFPURegisters) -> Result<()> { const LEN: usize = 26; diff --git a/src/hyperlight_host/src/sandbox/config.rs b/src/hyperlight_host/src/sandbox/config.rs index 5c2746224..84550155e 100644 --- a/src/hyperlight_host/src/sandbox/config.rs +++ b/src/hyperlight_host/src/sandbox/config.rs @@ -35,6 +35,14 @@ pub struct DebugInfo { #[derive(Copy, Clone, Debug, Eq, PartialEq)] #[repr(C)] pub struct SandboxConfiguration { + /// Guest core dump output directory + /// This field is by default set to true which means the value core dumps will be placed in: + /// - HYPERLIGHT_CORE_DUMP_DIR environment variable if it is set + /// - default value of the temporary directory + /// + /// The core dump files generation can be disabled by setting this field to false. + #[cfg(crashdump)] + guest_core_dump: bool, /// Guest gdb debug port #[cfg(gdb)] guest_debug_info: Option, @@ -100,6 +108,7 @@ impl SandboxConfiguration { interrupt_retry_delay: Duration, interrupt_vcpu_sigrtmin_offset: u8, #[cfg(gdb)] guest_debug_info: Option, + #[cfg(crashdump)] guest_core_dump: bool, ) -> Self { Self { input_data_size: max(input_data_size, Self::MIN_INPUT_SIZE), @@ -110,6 +119,8 @@ impl SandboxConfiguration { interrupt_vcpu_sigrtmin_offset, #[cfg(gdb)] guest_debug_info, + #[cfg(crashdump)] + guest_core_dump, } } @@ -174,6 +185,15 @@ impl SandboxConfiguration { Ok(()) } + /// Toggles the guest core dump generation for a sandbox + /// Setting this to false disables the core dump generation + /// This is only used when the `crashdump` feature is enabled + #[cfg(crashdump)] + #[instrument(skip_all, parent = Span::current(), level= "Trace")] + pub fn set_guest_core_dump(&mut self, enable: bool) { + self.guest_core_dump = enable; + } + /// Sets the configuration for the guest debug #[cfg(gdb)] #[instrument(skip_all, parent = Span::current(), level= "Trace")] @@ -191,6 +211,12 @@ impl SandboxConfiguration { self.output_data_size } + #[cfg(crashdump)] + #[instrument(skip_all, parent = Span::current(), level= "Trace")] + pub(crate) fn get_guest_core_dump(&self) -> bool { + self.guest_core_dump + } + #[cfg(gdb)] #[instrument(skip_all, parent = Span::current(), level= "Trace")] pub(crate) fn get_guest_debug_info(&self) -> Option { @@ -236,6 +262,8 @@ impl Default for SandboxConfiguration { Self::INTERRUPT_VCPU_SIGRTMIN_OFFSET, #[cfg(gdb)] None, + #[cfg(crashdump)] + true, ) } } @@ -260,6 +288,8 @@ mod tests { SandboxConfiguration::INTERRUPT_VCPU_SIGRTMIN_OFFSET, #[cfg(gdb)] None, + #[cfg(crashdump)] + true, ); let exe_info = simple_guest_exe_info().unwrap(); @@ -287,6 +317,8 @@ mod tests { SandboxConfiguration::INTERRUPT_VCPU_SIGRTMIN_OFFSET, #[cfg(gdb)] None, + #[cfg(crashdump)] + true, ); assert_eq!(SandboxConfiguration::MIN_INPUT_SIZE, cfg.input_data_size); assert_eq!(SandboxConfiguration::MIN_OUTPUT_SIZE, cfg.output_data_size); diff --git a/src/hyperlight_host/src/sandbox/uninitialized.rs b/src/hyperlight_host/src/sandbox/uninitialized.rs index 8f52c81d3..6cd6609b4 100644 --- a/src/hyperlight_host/src/sandbox/uninitialized.rs +++ b/src/hyperlight_host/src/sandbox/uninitialized.rs @@ -52,6 +52,17 @@ const EXTRA_ALLOWED_SYSCALLS_FOR_WRITER_FUNC: &[super::ExtraAllowedSyscall] = &[ libc::SYS_close, ]; +#[cfg(any(crashdump, gdb))] +#[derive(Clone, Debug, Default)] +pub(crate) struct SandboxRuntimeConfig { + #[cfg(crashdump)] + pub(crate) binary_path: Option, + #[cfg(gdb)] + pub(crate) debug_info: Option, + #[cfg(crashdump)] + pub(crate) guest_core_dump: bool, +} + /// A preliminary `Sandbox`, not yet ready to execute guest code. /// /// Prior to initializing a full-fledged `Sandbox`, you must create one of @@ -66,6 +77,8 @@ pub struct UninitializedSandbox { pub(crate) mgr: MemMgrWrapper, pub(crate) max_guest_log_level: Option, pub(crate) config: SandboxConfiguration, + #[cfg(any(crashdump, gdb))] + pub(crate) rt_cfg: SandboxRuntimeConfig, } impl crate::sandbox_state::sandbox::UninitializedSandbox for UninitializedSandbox { @@ -145,18 +158,43 @@ impl UninitializedSandbox { GuestBinary::FilePath(binary_path) => { let path = Path::new(&binary_path) .canonicalize() - .map_err(|e| new_error!("GuestBinary not found: '{}': {}", binary_path, e))?; - GuestBinary::FilePath( - path.into_os_string() - .into_string() - .map_err(|e| new_error!("Error converting OsString to String: {:?}", e))?, - ) + .map_err(|e| new_error!("GuestBinary not found: '{}': {}", binary_path, e))? + .into_os_string() + .into_string() + .map_err(|e| new_error!("Error converting OsString to String: {:?}", e))?; + + GuestBinary::FilePath(path) } buffer @ GuestBinary::Buffer(_) => buffer, }; let sandbox_cfg = cfg.unwrap_or_default(); + #[cfg(any(crashdump, gdb))] + let rt_cfg = { + #[cfg(crashdump)] + let guest_core_dump = sandbox_cfg.get_guest_core_dump(); + + #[cfg(gdb)] + let debug_info = sandbox_cfg.get_guest_debug_info(); + + #[cfg(crashdump)] + let binary_path = if let GuestBinary::FilePath(ref path) = guest_binary { + Some(path.clone()) + } else { + None + }; + + SandboxRuntimeConfig { + #[cfg(crashdump)] + binary_path, + #[cfg(gdb)] + debug_info, + #[cfg(crashdump)] + guest_core_dump, + } + }; + let mut mem_mgr_wrapper = { let mut mgr = UninitializedSandbox::load_guest_binary(sandbox_cfg, &guest_binary)?; let stack_guard = Self::create_stack_guard(); @@ -173,6 +211,8 @@ impl UninitializedSandbox { mgr: mem_mgr_wrapper, max_guest_log_level: None, config: sandbox_cfg, + #[cfg(any(crashdump, gdb))] + rt_cfg, }; // If we were passed a writer for host print register it otherwise use the default. diff --git a/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs b/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs index 593ac9165..1c5ece325 100644 --- a/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs +++ b/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs @@ -22,6 +22,8 @@ use tracing::{instrument, Span}; use super::hypervisor::{get_available_hypervisor, HypervisorType}; #[cfg(gdb)] use super::mem_access::dbg_mem_access_handler_wrapper; +#[cfg(any(crashdump, gdb))] +use super::uninitialized::SandboxRuntimeConfig; use super::SandboxConfiguration; use crate::hypervisor::handlers::{MemAccessHandlerCaller, OutBHandlerCaller}; use crate::hypervisor::Hypervisor; @@ -69,7 +71,12 @@ where ) -> Result, { let (hshm, mut gshm) = u_sbox.mgr.build(); - let mut vm = set_up_hypervisor_partition(&mut gshm, &u_sbox.config)?; + let mut vm = set_up_hypervisor_partition( + &mut gshm, + &u_sbox.config, + #[cfg(any(crashdump, gdb))] + &u_sbox.rt_cfg, + )?; let outb_hdl = outb_handler_wrapper(hshm.clone(), u_sbox.host_funcs.clone()); let seed = { @@ -141,6 +148,7 @@ pub(super) fn evolve_impl_multi_use(u_sbox: UninitializedSandbox) -> Result, #[cfg_attr(target_os = "windows", allow(unused_variables))] config: &SandboxConfiguration, + #[cfg(any(crashdump, gdb))] rt_cfg: &SandboxRuntimeConfig, ) -> Result> { let mem_size = u64::try_from(mgr.shared_mem.mem_size())?; let mut regions = mgr.layout.get_memory_regions(&mgr.shared_mem)?; @@ -176,7 +184,7 @@ pub(crate) fn set_up_hypervisor_partition( // Create gdb thread if gdb is enabled and the configuration is provided #[cfg(gdb)] - let gdb_conn = if let Some(DebugInfo { port }) = config.get_guest_debug_info() { + let gdb_conn = if let Some(DebugInfo { port }) = rt_cfg.debug_info { use crate::hypervisor::gdb::create_gdb_thread; let gdb_conn = create_gdb_thread(port, unsafe { libc::pthread_self() }); @@ -206,6 +214,8 @@ pub(crate) fn set_up_hypervisor_partition( config, #[cfg(gdb)] gdb_conn, + #[cfg(crashdump)] + rt_cfg.clone(), )?; Ok(Box::new(hv)) } @@ -220,6 +230,8 @@ pub(crate) fn set_up_hypervisor_partition( config, #[cfg(gdb)] gdb_conn, + #[cfg(crashdump)] + rt_cfg.clone(), )?; Ok(Box::new(hv)) } @@ -241,6 +253,8 @@ pub(crate) fn set_up_hypervisor_partition( entrypoint_ptr.absolute()?, rsp_ptr.absolute()?, HandleWrapper::from(mmap_file_handle), + #[cfg(crashdump)] + rt_cfg.clone(), )?; Ok(Box::new(hv)) }