miri/shims/native_lib/trace/
child.rs

1use std::cell::RefCell;
2use std::rc::Rc;
3
4use ipc_channel::ipc;
5use nix::sys::{ptrace, signal};
6use nix::unistd;
7use rustc_const_eval::interpret::InterpResult;
8
9use super::CALLBACK_STACK_SIZE;
10use super::messages::{Confirmation, StartFfiInfo, TraceRequest};
11use super::parent::{ChildListener, sv_loop};
12use crate::alloc::isolated_alloc::IsolatedAlloc;
13use crate::shims::native_lib::MemEvents;
14
15/// A handle to the single, shared supervisor process across all `MiriMachine`s.
16/// Since it would be very difficult to trace multiple FFI calls in parallel, we
17/// need to ensure that either (a) only one `MiriMachine` is performing an FFI call
18/// at any given time, or (b) there are distinct supervisor and child processes for
19/// each machine. The former was chosen here.
20///
21/// This should only contain a `None` if the supervisor has not (yet) been initialised;
22/// otherwise, if `init_sv` was called and did not error, this will always be nonempty.
23static SUPERVISOR: std::sync::Mutex<Option<Supervisor>> = std::sync::Mutex::new(None);
24
25/// The main means of communication between the child and parent process,
26/// allowing the former to send requests and get info from the latter.
27pub struct Supervisor {
28    /// Sender for FFI-mode-related requests.
29    message_tx: ipc::IpcSender<TraceRequest>,
30    /// Used for synchronisation, allowing us to receive confirmation that the
31    /// parent process has handled the request from `message_tx`.
32    confirm_rx: ipc::IpcReceiver<Confirmation>,
33    /// Receiver for memory acceses that ocurred during the FFI call.
34    event_rx: ipc::IpcReceiver<MemEvents>,
35}
36
37/// Marker representing that an error occurred during creation of the supervisor.
38#[derive(Debug)]
39pub struct SvInitError;
40
41impl Supervisor {
42    /// Returns `true` if the supervisor process exists, and `false` otherwise.
43    pub fn is_enabled() -> bool {
44        SUPERVISOR.lock().unwrap().is_some()
45    }
46
47    /// Performs an arbitrary FFI call, enabling tracing from the supervisor.
48    /// As this locks the supervisor via a mutex, no other threads may enter FFI
49    /// until this function returns.
50    pub fn do_ffi<'tcx>(
51        alloc: &Rc<RefCell<IsolatedAlloc>>,
52        f: impl FnOnce() -> InterpResult<'tcx, crate::ImmTy<'tcx>>,
53    ) -> InterpResult<'tcx, (crate::ImmTy<'tcx>, Option<MemEvents>)> {
54        let mut sv_guard = SUPERVISOR.lock().unwrap();
55        // If the supervisor is not initialised for whatever reason, fast-return.
56        // As a side-effect, even on platforms where ptracing
57        // is not implemented, we enforce that only one FFI call
58        // happens at a time.
59        let Some(sv) = sv_guard.as_mut() else { return f().map(|v| (v, None)) };
60
61        // Get pointers to all the pages the supervisor must allow accesses in
62        // and prepare the callback stack.
63        let page_ptrs = alloc.borrow().pages().collect();
64        let raw_stack_ptr: *mut [u8; CALLBACK_STACK_SIZE] =
65            Box::leak(Box::new([0u8; CALLBACK_STACK_SIZE])).as_mut_ptr().cast();
66        let stack_ptr = raw_stack_ptr.expose_provenance();
67        let start_info = StartFfiInfo { page_ptrs, stack_ptr };
68
69        // SAFETY: We do not access machine memory past this point until the
70        // supervisor is ready to allow it.
71        unsafe {
72            if alloc.borrow_mut().start_ffi().is_err() {
73                // Don't mess up unwinding by maybe leaving the memory partly protected
74                alloc.borrow_mut().end_ffi();
75                panic!("Cannot protect memory for FFI call!");
76            }
77        }
78
79        // Send over the info.
80        // NB: if we do not wait to receive a blank confirmation response, it is
81        // possible that the supervisor is alerted of the SIGSTOP *before* it has
82        // actually received the start_info, thus deadlocking! This way, we can
83        // enforce an ordering for these events.
84        sv.message_tx.send(TraceRequest::StartFfi(start_info)).unwrap();
85        sv.confirm_rx.recv().unwrap();
86        // We need to be stopped for the supervisor to be able to make certain
87        // modifications to our memory - simply waiting on the recv() doesn't
88        // count.
89        signal::raise(signal::SIGSTOP).unwrap();
90
91        let res = f();
92
93        // We can't use IPC channels here to signal that FFI mode has ended,
94        // since they might allocate memory which could get us stuck in a SIGTRAP
95        // with no easy way out! While this could be worked around, it is much
96        // simpler and more robust to simply use the signals which are left for
97        // arbitrary usage. Since this will block until we are continued by the
98        // supervisor, we can assume past this point that everything is back to
99        // normal.
100        signal::raise(signal::SIGUSR1).unwrap();
101
102        // This is safe! It just sets memory to normal expected permissions.
103        alloc.borrow_mut().end_ffi();
104
105        // SAFETY: Caller upholds that this pointer was allocated as a box with
106        // this type.
107        unsafe {
108            drop(Box::from_raw(raw_stack_ptr));
109        }
110        // On the off-chance something really weird happens, don't block forever.
111        let events = sv
112            .event_rx
113            .try_recv_timeout(std::time::Duration::from_secs(5))
114            .map_err(|e| {
115                match e {
116                    ipc::TryRecvError::IpcError(_) => (),
117                    ipc::TryRecvError::Empty =>
118                        panic!("Waiting for accesses from supervisor timed out!"),
119                }
120            })
121            .ok();
122
123        res.map(|v| (v, events))
124    }
125}
126
127/// Initialises the supervisor process. If this function errors, then the
128/// supervisor process could not be created successfully; else, the caller
129/// is now the child process and can communicate via `do_ffi`, receiving back
130/// events at the end.
131///
132/// # Safety
133/// The invariants for `fork()` must be upheld by the caller, namely either:
134/// - Other threads do not exist, or;
135/// - If they do exist, either those threads or the resulting child process
136///   only ever act in [async-signal-safe](https://www.man7.org/linux/man-pages/man7/signal-safety.7.html) ways.
137pub unsafe fn init_sv() -> Result<(), SvInitError> {
138    // FIXME: Much of this could be reimplemented via the mitosis crate if we upstream the
139    // relevant missing bits.
140
141    // On Linux, this will check whether ptrace is fully disabled by the Yama module.
142    // If Yama isn't running or we're not on Linux, we'll still error later, but
143    // this saves a very expensive fork call.
144    let ptrace_status = std::fs::read_to_string("/proc/sys/kernel/yama/ptrace_scope");
145    if let Ok(stat) = ptrace_status {
146        if let Some(stat) = stat.chars().next() {
147            // Fast-error if ptrace is fully disabled on the system.
148            if stat == '3' {
149                return Err(SvInitError);
150            }
151        }
152    }
153
154    // Initialise the supervisor if it isn't already, placing it into SUPERVISOR.
155    let mut lock = SUPERVISOR.lock().unwrap();
156    if lock.is_some() {
157        return Ok(());
158    }
159
160    // Prepare the IPC channels we need.
161    let (message_tx, message_rx) = ipc::channel().unwrap();
162    let (confirm_tx, confirm_rx) = ipc::channel().unwrap();
163    let (event_tx, event_rx) = ipc::channel().unwrap();
164    // SAFETY: Calling sysconf(_SC_PAGESIZE) is always safe and cannot error.
165    let page_size = unsafe { libc::sysconf(libc::_SC_PAGESIZE) }.try_into().unwrap();
166    super::parent::PAGE_SIZE.store(page_size, std::sync::atomic::Ordering::Relaxed);
167
168    unsafe {
169        // TODO: Maybe use clone3() instead for better signalling of when the child exits?
170        // SAFETY: Caller upholds that only one thread exists.
171        match unistd::fork().unwrap() {
172            unistd::ForkResult::Parent { child } => {
173                // If somehow another thread does exist, prevent it from accessing the lock
174                // and thus breaking our safety invariants.
175                std::mem::forget(lock);
176                // The child process is free to unwind, so we won't to avoid doubly freeing
177                // system resources.
178                let init = std::panic::catch_unwind(|| {
179                    let listener = ChildListener::new(message_rx, confirm_tx.clone());
180                    // Trace as many things as possible, to be able to handle them as needed.
181                    let options = ptrace::Options::PTRACE_O_TRACESYSGOOD
182                        | ptrace::Options::PTRACE_O_TRACECLONE
183                        | ptrace::Options::PTRACE_O_TRACEFORK;
184                    // Attach to the child process without stopping it.
185                    match ptrace::seize(child, options) {
186                        // Ptrace works :D
187                        Ok(_) => {
188                            let code = sv_loop(listener, child, event_tx, confirm_tx).unwrap_err();
189                            // If a return code of 0 is not explicitly given, assume something went
190                            // wrong and return 1.
191                            std::process::exit(code.0.unwrap_or(1))
192                        }
193                        // Ptrace does not work and we failed to catch that.
194                        Err(_) => {
195                            // If we can't ptrace, Miri continues being the parent.
196                            signal::kill(child, signal::SIGKILL).unwrap();
197                            SvInitError
198                        }
199                    }
200                });
201                match init {
202                    // The "Ok" case means that we couldn't ptrace.
203                    Ok(e) => return Err(e),
204                    Err(p) => {
205                        eprintln!(
206                            "Supervisor process panicked!\n{p:?}\n\nTry running again without using the native-lib tracer."
207                        );
208                        std::process::exit(1);
209                    }
210                }
211            }
212            unistd::ForkResult::Child => {
213                // Make sure we never get orphaned and stuck in SIGSTOP or similar
214                // SAFETY: prctl PR_SET_PDEATHSIG is always safe to call.
215                let ret = libc::prctl(libc::PR_SET_PDEATHSIG, libc::SIGTERM);
216                assert_eq!(ret, 0);
217                // First make sure the parent succeeded with ptracing us!
218                signal::raise(signal::SIGSTOP).unwrap();
219                // If we're the child process, save the supervisor info.
220                *lock = Some(Supervisor { message_tx, confirm_rx, event_rx });
221            }
222        }
223    }
224    Ok(())
225}
226
227/// Instruct the supervisor process to return a particular code. Useful if for
228/// whatever reason this code fails to be intercepted normally.
229pub fn register_retcode_sv(code: i32) {
230    let mut sv_guard = SUPERVISOR.lock().unwrap();
231    if let Some(sv) = sv_guard.as_mut() {
232        sv.message_tx.send(TraceRequest::OverrideRetcode(code)).unwrap();
233        sv.confirm_rx.recv().unwrap();
234    }
235}