rust-lang · nia-e · May 27, 2025 · Jun 15, 2025 · Jun 15, 2025 · Jun 18, 2025
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -44,6 +44,7 @@ libloading = "0.8"
 nix = { version = "0.30.1", features = ["mman", "ptrace", "signal"] }
 ipc-channel = "0.19.0"
 serde = { version = "1.0.219", features = ["derive"] }
+capstone = "0.13"
 
 [dev-dependencies]
 ui_test = "0.29.1"

diff --git a/src/alloc/isolated_alloc.rs b/src/alloc/isolated_alloc.rs
@@ -1,5 +1,6 @@
 use std::alloc::{self, Layout};
 
+use nix::sys::mman;
 use rustc_index::bit_set::DenseBitSet;
 
 /// How many bytes of memory each bit in the bitset represents.
@@ -271,13 +272,54 @@ impl IsolatedAlloc {
     pub fn pages(&self) -> Vec<usize> {
         let mut pages: Vec<_> =
             self.page_ptrs.clone().into_iter().map(|p| p.expose_provenance()).collect();
-        for (ptr, size) in &self.huge_ptrs {
+        self.huge_ptrs.iter().for_each(|(ptr, size)| {
             for i in 0..size / self.page_size {
                 pages.push(ptr.expose_provenance().strict_add(i * self.page_size));
             }
-        }
+        });
         pages
     }
+
+    /// Protects all owned memory as `PROT_NONE`, preventing accesses.
+    ///
+    /// SAFETY: Accessing memory after this point will result in a segfault
+    /// unless it is first unprotected.
+    pub unsafe fn prepare_ffi(&mut self) -> Result<(), nix::errno::Errno> {
+        let prot = mman::ProtFlags::PROT_NONE;
+        unsafe { self.mprotect(prot) }
+    }
+
+    /// Deprotects all owned memory by setting it to RW. Erroring here is very
+    /// likely unrecoverable, so it may panic if applying those permissions
+    /// fails.
+    pub fn unprep_ffi(&mut self) {
+        let prot = mman::ProtFlags::PROT_READ | mman::ProtFlags::PROT_WRITE;
+        unsafe {
+            self.mprotect(prot).unwrap();
+        }
+    }
+
+    /// Applies `prot` to every page managed by the allocator.
+    ///
+    /// SAFETY: Accessing memory in violation of the protection flags will
+    /// trigger a segfault.
+    unsafe fn mprotect(&mut self, prot: mman::ProtFlags) -> Result<(), nix::errno::Errno> {
+        for &pg in &self.page_ptrs {
+            unsafe {
+                // We already know only non-null ptrs are pushed to self.pages
+                let addr: std::ptr::NonNull<std::ffi::c_void> =
+                    std::ptr::NonNull::new_unchecked(pg.cast());
+                mman::mprotect(addr, self.page_size, prot)?;
+            }
+        }
+        for &(hpg, size) in &self.huge_ptrs {
+            unsafe {
+                let addr = std::ptr::NonNull::new_unchecked(hpg.cast());
+                mman::mprotect(addr, size.next_multiple_of(self.page_size), prot)?;
+            }
+        }
+        Ok(())
+    }
 }
 
 #[cfg(test)]

diff --git a/src/alloc_addresses/mod.rs b/src/alloc_addresses/mod.rs
@@ -470,13 +470,46 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
     /// This overapproximates the modifications which external code might make to memory:
     /// We set all reachable allocations as initialized, mark all reachable provenances as exposed
     /// and overwrite them with `Provenance::WILDCARD`.
-    fn prepare_exposed_for_native_call(&mut self) -> InterpResult<'tcx> {
+    fn prepare_exposed_for_native_call(&mut self, _paranoid: bool) -> InterpResult<'tcx> {
         let this = self.eval_context_mut();
         // We need to make a deep copy of this list, but it's fine; it also serves as scratch space
         // for the search within `prepare_for_native_call`.
         let exposed: Vec<AllocId> =
             this.machine.alloc_addresses.get_mut().exposed.iter().copied().collect();
-        this.prepare_for_native_call(exposed)
+        this.prepare_for_native_call(exposed /*, paranoid*/)
+    }
+
+    /// Makes use of information obtained about memory accesses during FFI to determine which
+    /// provenances should be exposed. Note that if `prepare_exposed_for_native_call` was not
+    /// called before the FFI (with `paranoid` set to false) then some of the writes may be
+    /// lost!
+    #[cfg(target_os = "linux")]
+    fn apply_events(
+        &mut self,
+        events: crate::shims::trace::messages::MemEvents,
+    ) -> InterpResult<'tcx> {
+        let this = self.eval_context_mut();
+
+        let mut reads = vec![];
+        let mut writes = vec![];
+        for acc in events.acc_events {
+            match acc {
+                // Ideally, we'd skip reads that occur after certain bytes were
+                // already written to. However, these are always just conservative
+                // overestimates - Read(range) means "a read maybe happened
+                // spanning at most range" - so we can't make use of this for
+                // now. Maybe we could also skip over reads/writes that hit the
+                // same bytes, but that's best added together with the stuff above.
+                shims::trace::AccessEvent::Read(range) => reads.push(range),
+                shims::trace::AccessEvent::Write(range) => {
+                    writes.push(range);
+                }
+            }
+        }
+        let _exposed: Vec<AllocId> =
+            this.machine.alloc_addresses.get_mut().exposed.iter().copied().collect();
+        interp_ok(())
+        //this.apply_accesses(exposed, events.reads, events.writes)
     }
 }
 

diff --git a/src/shims/native_lib.rs b/src/shims/native_lib.rs
@@ -219,8 +219,15 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
             }
         }
 
-        // Prepare all exposed memory.
-        this.prepare_exposed_for_native_call()?;
+        // Prepare all exposed memory, depending on whether we have a supervisor process.
+        #[cfg(target_os = "linux")]
+        if super::trace::Supervisor::poll() {
+            this.prepare_exposed_for_native_call(false)?;
+        } else {
+            this.prepare_exposed_for_native_call(true)?;
+        }
+        #[cfg(not(target_os = "linux"))]
+        this.prepare_exposed_for_native_call(true)?;
 
         // Convert them to `libffi::high::Arg` type.
         let libffi_args = libffi_args
@@ -229,7 +236,15 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
             .collect::<Vec<libffi::high::Arg<'_>>>();
 
         // Call the function and store output, depending on return type in the function signature.
-        let (ret, _) = this.call_native_with_args(link_name, dest, code_ptr, libffi_args)?;
+        let (ret, maybe_memevents) =
+            this.call_native_with_args(link_name, dest, code_ptr, libffi_args)?;
+
+        #[cfg(target_os = "linux")]
+        if let Some(events) = maybe_memevents {
+            this.apply_events(events)?;
+        }
+        #[cfg(not(target_os = "linux"))]
+        let _ = maybe_memevents; // Suppress the unused warning.
 
         this.write_immediate(*ret, dest)?;
         interp_ok(true)
@@ -250,15 +265,15 @@ unsafe fn do_native_call<T: libffi::high::CType>(
 
     unsafe {
         if let Some(alloc) = alloc {
-            // SAFETY: We don't touch the machine memory past this point
+            // SAFETY: We don't touch the machine memory past this point.
             let (guard, stack_ptr) = Supervisor::start_ffi(alloc.clone());
-            // SAFETY: Upheld by caller
+            // SAFETY: Upheld by caller.
             let ret = ffi::call(ptr, args);
             // SAFETY: We got the guard and stack pointer from start_ffi, and
-            // the allocator is the same
+            // the allocator is the same.
             (ret, Supervisor::end_ffi(guard, alloc, stack_ptr))
         } else {
-            // SAFETY: Upheld by caller
+            // SAFETY: Upheld by caller.
             (ffi::call(ptr, args), None)
         }
     }

diff --git a/src/shims/trace/child.rs b/src/shims/trace/child.rs
@@ -52,30 +52,40 @@ impl Supervisor {
         // If the supervisor is not initialised for whatever reason, fast-fail.
         // This might be desired behaviour, as even on platforms where ptracing
         // is not implemented it enables us to enforce that only one FFI call
-        // happens at a time
+        // happens at a time.
         let Some(sv) = sv_guard.take() else {
             return (sv_guard, None);
         };
 
         // Get pointers to all the pages the supervisor must allow accesses in
-        // and prepare the fake stack
+        // and prepare the fake stack.
         let page_ptrs = alloc.borrow().pages();
         let raw_stack_ptr: *mut [u8; FAKE_STACK_SIZE] =
             Box::leak(Box::new([0u8; FAKE_STACK_SIZE])).as_mut_ptr().cast();
         let stack_ptr = raw_stack_ptr.expose_provenance();
         let start_info = StartFfiInfo { page_ptrs, stack_ptr };
 
+        // SAFETY: We do not access machine memory past this point until the
+        // supervisor is ready to allow it.
+        unsafe {
+            if alloc.borrow_mut().prepare_ffi().is_err() {
+                // Don't mess up unwinding by maybe leaving the memory partly protected
+                alloc.borrow_mut().unprep_ffi();
+                panic!("Cannot protect memory for FFI call!");
+            }
+        }
+
         // Send over the info.
         // NB: if we do not wait to receive a blank confirmation response, it is
         // possible that the supervisor is alerted of the SIGSTOP *before* it has
         // actually received the start_info, thus deadlocking! This way, we can
-        // enforce an ordering for these events
+        // enforce an ordering for these events.
         sv.message_tx.send(TraceRequest::StartFfi(start_info)).unwrap();
         sv.confirm_rx.recv().unwrap();
         *sv_guard = Some(sv);
         // We need to be stopped for the supervisor to be able to make certain
         // modifications to our memory - simply waiting on the recv() doesn't
-        // count
+        // count.
         signal::raise(signal::SIGSTOP).unwrap();
         (sv_guard, Some(raw_stack_ptr))
     }
@@ -90,7 +100,7 @@ impl Supervisor {
     /// one passed to it also.
     pub unsafe fn end_ffi(
         mut sv_guard: std::sync::MutexGuard<'static, Option<Supervisor>>,
-        _alloc: Rc<RefCell<IsolatedAlloc>>,
+        alloc: Rc<RefCell<IsolatedAlloc>>,
         raw_stack_ptr: Option<*mut [u8; FAKE_STACK_SIZE]>,
     ) -> Option<MemEvents> {
         // We can't use IPC channels here to signal that FFI mode has ended,
@@ -99,19 +109,22 @@ impl Supervisor {
         // simpler and more robust to simply use the signals which are left for
         // arbitrary usage. Since this will block until we are continued by the
         // supervisor, we can assume past this point that everything is back to
-        // normal
+        // normal.
         signal::raise(signal::SIGUSR1).unwrap();
 
+        // This is safe! It just sets memory to normal expected permissions.
+        alloc.borrow_mut().unprep_ffi();
+
         // If this is `None`, then `raw_stack_ptr` is None and does not need to
         // be deallocated (and there's no need to worry about the guard, since
-        // it contains nothing)
+        // it contains nothing).
         let sv = sv_guard.take()?;
         // SAFETY: Caller upholds that this pointer was allocated as a box with
-        // this type
+        // this type.
         unsafe {
             drop(Box::from_raw(raw_stack_ptr.unwrap()));
         }
-        // On the off-chance something really weird happens, don't block forever
+        // On the off-chance something really weird happens, don't block forever.
         let ret = sv
             .event_rx
             .try_recv_timeout(std::time::Duration::from_secs(5))
@@ -138,71 +151,74 @@ impl Supervisor {
 /// The invariants for `fork()` must be upheld by the caller.
 pub unsafe fn init_sv() -> Result<(), SvInitError> {
     // FIXME: Much of this could be reimplemented via the mitosis crate if we upstream the
-    // relevant missing bits
+    // relevant missing bits.
 
     // On Linux, this will check whether ptrace is fully disabled by the Yama module.
     // If Yama isn't running or we're not on Linux, we'll still error later, but
-    // this saves a very expensive fork call
+    // this saves a very expensive fork call.
     let ptrace_status = std::fs::read_to_string("/proc/sys/kernel/yama/ptrace_scope");
     if let Ok(stat) = ptrace_status {
         if let Some(stat) = stat.chars().next() {
-            // Fast-error if ptrace is fully disabled on the system
+            // Fast-error if ptrace is fully disabled on the system.
             if stat == '3' {
                 return Err(SvInitError);
             }
         }
     }
 
-    // Initialise the supervisor if it isn't already, placing it into SUPERVISOR
+    // Initialise the supervisor if it isn't already, placing it into SUPERVISOR.
     let mut lock = SUPERVISOR.lock().unwrap();
     if lock.is_some() {
         return Ok(());
     }
 
-    // Prepare the IPC channels we need
+    // Prepare the IPC channels we need.
     let (message_tx, message_rx) = ipc::channel().unwrap();
     let (confirm_tx, confirm_rx) = ipc::channel().unwrap();
     let (event_tx, event_rx) = ipc::channel().unwrap();
-    // SAFETY: Calling sysconf(_SC_PAGESIZE) is always safe and cannot error
+    // SAFETY: Calling sysconf(_SC_PAGESIZE) is always safe and cannot error.
     let page_size = unsafe { libc::sysconf(libc::_SC_PAGESIZE) }.try_into().unwrap();
 
+    // Set up the pagesize used in the memory protection functions.
+    // SAFETY: sysconf(_SC_PAGESIZE) is always safe and doesn't error
+    super::parent::PAGE_SIZE.store(page_size, std::sync::atomic::Ordering::Relaxed);
+
     unsafe {
         // TODO: Maybe use clone3() instead for better signalling of when the child exits?
         // SAFETY: Caller upholds that only one thread exists.
         match unistd::fork().unwrap() {
             unistd::ForkResult::Parent { child } => {
                 // If somehow another thread does exist, prevent it from accessing the lock
-                // and thus breaking our safety invariants
+                // and thus breaking our safety invariants.
                 std::mem::forget(lock);
                 // The child process is free to unwind, so we won't to avoid doubly freeing
-                // system resources
+                // system resources.
                 let init = std::panic::catch_unwind(|| {
                     let listener =
                         ChildListener { message_rx, attached: false, override_retcode: None };
-                    // Trace as many things as possible, to be able to handle them as needed
+                    // Trace as many things as possible, to be able to handle them as needed.
                     let options = ptrace::Options::PTRACE_O_TRACESYSGOOD
                         | ptrace::Options::PTRACE_O_TRACECLONE
                         | ptrace::Options::PTRACE_O_TRACEFORK;
-                    // Attach to the child process without stopping it
+                    // Attach to the child process without stopping it.
                     match ptrace::seize(child, options) {
                         // Ptrace works :D
                         Ok(_) => {
-                            let code = sv_loop(listener, child, event_tx, confirm_tx, page_size)
-                                .unwrap_err();
+                            let code = sv_loop(listener, child, event_tx, confirm_tx).unwrap_err();
                             // If a return code of 0 is not explicitly given, assume something went
-                            // wrong and return 1
+                            // wrong and return 1.
                             std::process::exit(code.unwrap_or(1))
                         }
-                        // Ptrace does not work and we failed to catch that
+                        // Ptrace does not work and we failed to catch that.
                         Err(_) => {
-                            // If we can't ptrace, Miri continues being the parent
+                            // If we can't ptrace, Miri continues being the parent.
                             signal::kill(child, signal::SIGKILL).unwrap();
                             SvInitError
                         }
                     }
                 });
                 match init {
-                    // The "Ok" case means that we couldn't ptrace
+                    // The "Ok" case means that we couldn't ptrace.
                     Ok(e) => return Err(e),
                     Err(p) => {
                         eprintln!("Supervisor process panicked!\n{p:?}");
@@ -212,12 +228,12 @@ pub unsafe fn init_sv() -> Result<(), SvInitError> {
             }
             unistd::ForkResult::Child => {
                 // Make sure we never get orphaned and stuck in SIGSTOP or similar
-                // SAFETY: prctl PR_SET_PDEATHSIG is always safe to call
+                // SAFETY: prctl PR_SET_PDEATHSIG is always safe to call.
                 let ret = libc::prctl(libc::PR_SET_PDEATHSIG, libc::SIGTERM);
                 assert_eq!(ret, 0);
                 // First make sure the parent succeeded with ptracing us!
                 signal::raise(signal::SIGSTOP).unwrap();
-                // If we're the child process, save the supervisor info
+                // If we're the child process, save the supervisor info.
                 *lock = Some(Supervisor { message_tx, confirm_rx, event_rx });
             }
         }