miri/shims/native_lib/
mod.rs

1//! Implements calling functions from a native library.
2
3use std::ops::Deref;
4
5use libffi::high::call as ffi;
6use libffi::low::CodePtr;
7use rustc_abi::{BackendRepr, HasDataLayout, Size};
8use rustc_middle::mir::interpret::Pointer;
9use rustc_middle::ty::{self as ty, IntTy, UintTy};
10use rustc_span::Symbol;
11
12#[cfg_attr(
13    not(all(
14        target_os = "linux",
15        target_env = "gnu",
16        any(target_arch = "x86", target_arch = "x86_64")
17    )),
18    path = "trace/stub.rs"
19)]
20pub mod trace;
21
22use crate::*;
23
24/// The final results of an FFI trace, containing every relevant event detected
25/// by the tracer.
26#[allow(dead_code)]
27#[cfg_attr(target_os = "linux", derive(serde::Serialize, serde::Deserialize))]
28#[derive(Debug)]
29pub struct MemEvents {
30    /// An list of memory accesses that occurred, in the order they occurred in.
31    pub acc_events: Vec<AccessEvent>,
32}
33
34/// A single memory access.
35#[allow(dead_code)]
36#[cfg_attr(target_os = "linux", derive(serde::Serialize, serde::Deserialize))]
37#[derive(Clone, Debug)]
38pub enum AccessEvent {
39    /// A read occurred on this memory range.
40    Read(AccessRange),
41    /// A write may have occurred on this memory range.
42    /// Some instructions *may* write memory without *always* doing that,
43    /// so this can be an over-approximation.
44    /// The range info, however, is reliable if the access did happen.
45    /// If the second field is true, the access definitely happened.
46    Write(AccessRange, bool),
47}
48
49impl AccessEvent {
50    fn get_range(&self) -> AccessRange {
51        match self {
52            AccessEvent::Read(access_range) => access_range.clone(),
53            AccessEvent::Write(access_range, _) => access_range.clone(),
54        }
55    }
56}
57
58/// The memory touched by a given access.
59#[allow(dead_code)]
60#[cfg_attr(target_os = "linux", derive(serde::Serialize, serde::Deserialize))]
61#[derive(Clone, Debug)]
62pub struct AccessRange {
63    /// The base address in memory where an access occurred.
64    pub addr: usize,
65    /// The number of bytes affected from the base.
66    pub size: usize,
67}
68
69impl AccessRange {
70    fn end(&self) -> usize {
71        self.addr.strict_add(self.size)
72    }
73}
74
75impl<'tcx> EvalContextExtPriv<'tcx> for crate::MiriInterpCx<'tcx> {}
76trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
77    /// Call native host function and return the output as an immediate.
78    fn call_native_with_args<'a>(
79        &mut self,
80        link_name: Symbol,
81        dest: &MPlaceTy<'tcx>,
82        ptr: CodePtr,
83        libffi_args: Vec<libffi::high::Arg<'a>>,
84    ) -> InterpResult<'tcx, (crate::ImmTy<'tcx>, Option<MemEvents>)> {
85        let this = self.eval_context_mut();
86        #[cfg(target_os = "linux")]
87        let alloc = this.machine.allocator.as_ref().unwrap();
88        #[cfg(not(target_os = "linux"))]
89        // Placeholder value.
90        let alloc = ();
91
92        trace::Supervisor::do_ffi(alloc, || {
93            // Call the function (`ptr`) with arguments `libffi_args`, and obtain the return value
94            // as the specified primitive integer type
95            let scalar = match dest.layout.ty.kind() {
96                // ints
97                ty::Int(IntTy::I8) => {
98                    // Unsafe because of the call to native code.
99                    // Because this is calling a C function it is not necessarily sound,
100                    // but there is no way around this and we've checked as much as we can.
101                    let x = unsafe { ffi::call::<i8>(ptr, libffi_args.as_slice()) };
102                    Scalar::from_i8(x)
103                }
104                ty::Int(IntTy::I16) => {
105                    let x = unsafe { ffi::call::<i16>(ptr, libffi_args.as_slice()) };
106                    Scalar::from_i16(x)
107                }
108                ty::Int(IntTy::I32) => {
109                    let x = unsafe { ffi::call::<i32>(ptr, libffi_args.as_slice()) };
110                    Scalar::from_i32(x)
111                }
112                ty::Int(IntTy::I64) => {
113                    let x = unsafe { ffi::call::<i64>(ptr, libffi_args.as_slice()) };
114                    Scalar::from_i64(x)
115                }
116                ty::Int(IntTy::Isize) => {
117                    let x = unsafe { ffi::call::<isize>(ptr, libffi_args.as_slice()) };
118                    Scalar::from_target_isize(x.try_into().unwrap(), this)
119                }
120                // uints
121                ty::Uint(UintTy::U8) => {
122                    let x = unsafe { ffi::call::<u8>(ptr, libffi_args.as_slice()) };
123                    Scalar::from_u8(x)
124                }
125                ty::Uint(UintTy::U16) => {
126                    let x = unsafe { ffi::call::<u16>(ptr, libffi_args.as_slice()) };
127                    Scalar::from_u16(x)
128                }
129                ty::Uint(UintTy::U32) => {
130                    let x = unsafe { ffi::call::<u32>(ptr, libffi_args.as_slice()) };
131                    Scalar::from_u32(x)
132                }
133                ty::Uint(UintTy::U64) => {
134                    let x = unsafe { ffi::call::<u64>(ptr, libffi_args.as_slice()) };
135                    Scalar::from_u64(x)
136                }
137                ty::Uint(UintTy::Usize) => {
138                    let x = unsafe { ffi::call::<usize>(ptr, libffi_args.as_slice()) };
139                    Scalar::from_target_usize(x.try_into().unwrap(), this)
140                }
141                // Functions with no declared return type (i.e., the default return)
142                // have the output_type `Tuple([])`.
143                ty::Tuple(t_list) if (*t_list).deref().is_empty() => {
144                    unsafe { ffi::call::<()>(ptr, libffi_args.as_slice()) };
145                    return interp_ok(ImmTy::uninit(dest.layout));
146                }
147                ty::RawPtr(..) => {
148                    let x = unsafe { ffi::call::<*const ()>(ptr, libffi_args.as_slice()) };
149                    let ptr = Pointer::new(Provenance::Wildcard, Size::from_bytes(x.addr()));
150                    Scalar::from_pointer(ptr, this)
151                }
152                _ =>
153                    return Err(err_unsup_format!(
154                        "unsupported return type for native call: {:?}",
155                        link_name
156                    ))
157                    .into(),
158            };
159            interp_ok(ImmTy::from_scalar(scalar, dest.layout))
160        })
161    }
162
163    /// Get the pointer to the function of the specified name in the shared object file,
164    /// if it exists. The function must be in one of the shared object files specified:
165    /// we do *not* return pointers to functions in dependencies of libraries.
166    fn get_func_ptr_explicitly_from_lib(&mut self, link_name: Symbol) -> Option<CodePtr> {
167        let this = self.eval_context_mut();
168        // Try getting the function from one of the shared libraries.
169        for (lib, lib_path) in &this.machine.native_lib {
170            let Ok(func): Result<libloading::Symbol<'_, unsafe extern "C" fn()>, _> =
171                (unsafe { lib.get(link_name.as_str().as_bytes()) })
172            else {
173                continue;
174            };
175            #[expect(clippy::as_conversions)] // fn-ptr to raw-ptr cast needs `as`.
176            let fn_ptr = *func.deref() as *mut std::ffi::c_void;
177
178            // FIXME: this is a hack!
179            // The `libloading` crate will automatically load system libraries like `libc`.
180            // On linux `libloading` is based on `dlsym`: https://docs.rs/libloading/0.7.3/src/libloading/os/unix/mod.rs.html#202
181            // and `dlsym`(https://linux.die.net/man/3/dlsym) looks through the dependency tree of the
182            // library if it can't find the symbol in the library itself.
183            // So, in order to check if the function was actually found in the specified
184            // `machine.external_so_lib` we need to check its `dli_fname` and compare it to
185            // the specified SO file path.
186            // This code is a reimplementation of the mechanism for getting `dli_fname` in `libloading`,
187            // from: https://docs.rs/libloading/0.7.3/src/libloading/os/unix/mod.rs.html#411
188            // using the `libc` crate where this interface is public.
189            let mut info = std::mem::MaybeUninit::<libc::Dl_info>::zeroed();
190            unsafe {
191                let res = libc::dladdr(fn_ptr, info.as_mut_ptr());
192                assert!(res != 0, "failed to load info about function we already loaded");
193                let info = info.assume_init();
194                #[cfg(target_os = "cygwin")]
195                let fname_ptr = info.dli_fname.as_ptr();
196                #[cfg(not(target_os = "cygwin"))]
197                let fname_ptr = info.dli_fname;
198                assert!(!fname_ptr.is_null());
199                if std::ffi::CStr::from_ptr(fname_ptr).to_str().unwrap()
200                    != lib_path.to_str().unwrap()
201                {
202                    // The function is not actually in this .so, check the next one.
203                    continue;
204                }
205            }
206
207            // Return a pointer to the function.
208            return Some(CodePtr(fn_ptr));
209        }
210        None
211    }
212
213    /// Applies the `events` to Miri's internal state. The event vector must be
214    /// ordered sequentially by when the accesses happened, and the sizes are
215    /// assumed to be exact.
216    fn tracing_apply_accesses(&mut self, events: MemEvents) -> InterpResult<'tcx> {
217        let this = self.eval_context_mut();
218
219        for evt in events.acc_events {
220            let evt_rg = evt.get_range();
221            // LLVM at least permits vectorising accesses to adjacent allocations,
222            // so we cannot assume 1 access = 1 allocation. :(
223            let mut rg = evt_rg.addr..evt_rg.end();
224            while let Some(curr) = rg.next() {
225                let Some(alloc_id) = this.alloc_id_from_addr(
226                    curr.to_u64(),
227                    rg.len().try_into().unwrap(),
228                    /* only_exposed_allocations */ true,
229                ) else {
230                    throw_ub_format!("Foreign code did an out-of-bounds access!")
231                };
232                let alloc = this.get_alloc_raw(alloc_id)?;
233                // The logical and physical address of the allocation coincide, so we can use
234                // this instead of `addr_from_alloc_id`.
235                let alloc_addr = alloc.get_bytes_unchecked_raw().addr();
236
237                // Determine the range inside the allocation that this access covers. This range is
238                // in terms of offsets from the start of `alloc`. The start of the overlap range
239                // will be `curr`; the end will be the minimum of the end of the allocation and the
240                // end of the access' range.
241                let overlap = curr.strict_sub(alloc_addr)
242                    ..std::cmp::min(alloc.len(), rg.end.strict_sub(alloc_addr));
243                // Skip forward however many bytes of the access are contained in the current
244                // allocation, subtracting 1 since the overlap range includes the current addr
245                // that was already popped off of the range.
246                rg.advance_by(overlap.len().strict_sub(1)).unwrap();
247
248                match evt {
249                    AccessEvent::Read(_) => {
250                        // FIXME: ProvenanceMap should have something like get_range().
251                        let p_map = alloc.provenance();
252                        for idx in overlap {
253                            // If a provenance was read by the foreign code, expose it.
254                            if let Some(prov) = p_map.get(Size::from_bytes(idx), this) {
255                                this.expose_provenance(prov)?;
256                            }
257                        }
258                    }
259                    AccessEvent::Write(_, certain) => {
260                        // Sometimes we aren't certain if a write happened, in which case we
261                        // only initialise that data if the allocation is mutable.
262                        if certain || alloc.mutability.is_mut() {
263                            let (alloc, cx) = this.get_alloc_raw_mut(alloc_id)?;
264                            alloc.process_native_write(
265                                &cx.tcx,
266                                Some(AllocRange {
267                                    start: Size::from_bytes(overlap.start),
268                                    size: Size::from_bytes(overlap.len()),
269                                }),
270                            )
271                        }
272                    }
273                }
274            }
275        }
276
277        interp_ok(())
278    }
279}
280
281impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
282pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
283    /// Call the native host function, with supplied arguments.
284    /// Needs to convert all the arguments from their Miri representations to
285    /// a native form (through `libffi` call).
286    /// Then, convert the return value from the native form into something that
287    /// can be stored in Miri's internal memory.
288    fn call_native_fn(
289        &mut self,
290        link_name: Symbol,
291        dest: &MPlaceTy<'tcx>,
292        args: &[OpTy<'tcx>],
293    ) -> InterpResult<'tcx, bool> {
294        let this = self.eval_context_mut();
295        // Get the pointer to the function in the shared object file if it exists.
296        let code_ptr = match this.get_func_ptr_explicitly_from_lib(link_name) {
297            Some(ptr) => ptr,
298            None => {
299                // Shared object file does not export this function -- try the shims next.
300                return interp_ok(false);
301            }
302        };
303
304        // Do we have ptrace?
305        let tracing = trace::Supervisor::is_enabled();
306
307        // Get the function arguments, and convert them to `libffi`-compatible form.
308        let mut libffi_args = Vec::<CArg>::with_capacity(args.len());
309        for arg in args.iter() {
310            if !matches!(arg.layout.backend_repr, BackendRepr::Scalar(_)) {
311                throw_unsup_format!("only scalar argument types are supported for native calls")
312            }
313            let imm = this.read_immediate(arg)?;
314            libffi_args.push(imm_to_carg(&imm, this)?);
315            // If we are passing a pointer, expose its provenance. Below, all exposed memory
316            // (previously exposed and new exposed) will then be properly prepared.
317            if matches!(arg.layout.ty.kind(), ty::RawPtr(..)) {
318                let ptr = imm.to_scalar().to_pointer(this)?;
319                let Some(prov) = ptr.provenance else {
320                    // Pointer without provenance may not access any memory anyway, skip.
321                    continue;
322                };
323                // The first time this happens, print a warning.
324                if !this.machine.native_call_mem_warned.replace(true) {
325                    // Newly set, so first time we get here.
326                    this.emit_diagnostic(NonHaltingDiagnostic::NativeCallSharedMem { tracing });
327                }
328
329                this.expose_provenance(prov)?;
330            }
331        }
332        // Convert arguments to `libffi::high::Arg` type.
333        let libffi_args = libffi_args
334            .iter()
335            .map(|arg| arg.arg_downcast())
336            .collect::<Vec<libffi::high::Arg<'_>>>();
337
338        // Prepare all exposed memory (both previously exposed, and just newly exposed since a
339        // pointer was passed as argument). Uninitialised memory is left as-is, but any data
340        // exposed this way is garbage anyway.
341        this.visit_reachable_allocs(this.exposed_allocs(), |this, alloc_id, info| {
342            // If there is no data behind this pointer, skip this.
343            if !matches!(info.kind, AllocKind::LiveData) {
344                return interp_ok(());
345            }
346            // It's okay to get raw access, what we do does not correspond to any actual
347            // AM operation, it just approximates the state to account for the native call.
348            let alloc = this.get_alloc_raw(alloc_id)?;
349            // Also expose the provenance of the interpreter-level allocation, so it can
350            // be read by FFI. The `black_box` is defensive programming as LLVM likes
351            // to (incorrectly) optimize away ptr2int casts whose result is unused.
352            std::hint::black_box(alloc.get_bytes_unchecked_raw().expose_provenance());
353
354            if !tracing {
355                // Expose all provenances in this allocation, since the native code can do $whatever.
356                // Can be skipped when tracing; in that case we'll expose just the actually-read parts later.
357                for prov in alloc.provenance().provenances() {
358                    this.expose_provenance(prov)?;
359                }
360            }
361
362            // Prepare for possible write from native code if mutable.
363            if info.mutbl.is_mut() {
364                let (alloc, cx) = this.get_alloc_raw_mut(alloc_id)?;
365                // These writes could initialize everything and wreck havoc with the pointers.
366                // We can skip that when tracing; in that case we'll later do that only for the memory that got actually written.
367                if !tracing {
368                    alloc.process_native_write(&cx.tcx, None);
369                }
370                // Also expose *mutable* provenance for the interpreter-level allocation.
371                std::hint::black_box(alloc.get_bytes_unchecked_raw_mut().expose_provenance());
372            }
373
374            interp_ok(())
375        })?;
376
377        // Call the function and store output, depending on return type in the function signature.
378        let (ret, maybe_memevents) =
379            this.call_native_with_args(link_name, dest, code_ptr, libffi_args)?;
380
381        if tracing {
382            this.tracing_apply_accesses(maybe_memevents.unwrap())?;
383        }
384
385        this.write_immediate(*ret, dest)?;
386        interp_ok(true)
387    }
388}
389
390#[derive(Debug, Clone)]
391/// Enum of supported arguments to external C functions.
392// We introduce this enum instead of just calling `ffi::arg` and storing a list
393// of `libffi::high::Arg` directly, because the `libffi::high::Arg` just wraps a reference
394// to the value it represents: https://docs.rs/libffi/latest/libffi/high/call/struct.Arg.html
395// and we need to store a copy of the value, and pass a reference to this copy to C instead.
396enum CArg {
397    /// 8-bit signed integer.
398    Int8(i8),
399    /// 16-bit signed integer.
400    Int16(i16),
401    /// 32-bit signed integer.
402    Int32(i32),
403    /// 64-bit signed integer.
404    Int64(i64),
405    /// isize.
406    ISize(isize),
407    /// 8-bit unsigned integer.
408    UInt8(u8),
409    /// 16-bit unsigned integer.
410    UInt16(u16),
411    /// 32-bit unsigned integer.
412    UInt32(u32),
413    /// 64-bit unsigned integer.
414    UInt64(u64),
415    /// usize.
416    USize(usize),
417    /// Raw pointer, stored as C's `void*`.
418    RawPtr(*mut std::ffi::c_void),
419}
420
421impl<'a> CArg {
422    /// Convert a `CArg` to a `libffi` argument type.
423    fn arg_downcast(&'a self) -> libffi::high::Arg<'a> {
424        match self {
425            CArg::Int8(i) => ffi::arg(i),
426            CArg::Int16(i) => ffi::arg(i),
427            CArg::Int32(i) => ffi::arg(i),
428            CArg::Int64(i) => ffi::arg(i),
429            CArg::ISize(i) => ffi::arg(i),
430            CArg::UInt8(i) => ffi::arg(i),
431            CArg::UInt16(i) => ffi::arg(i),
432            CArg::UInt32(i) => ffi::arg(i),
433            CArg::UInt64(i) => ffi::arg(i),
434            CArg::USize(i) => ffi::arg(i),
435            CArg::RawPtr(i) => ffi::arg(i),
436        }
437    }
438}
439
440/// Extract the scalar value from the result of reading a scalar from the machine,
441/// and convert it to a `CArg`.
442fn imm_to_carg<'tcx>(v: &ImmTy<'tcx>, cx: &impl HasDataLayout) -> InterpResult<'tcx, CArg> {
443    interp_ok(match v.layout.ty.kind() {
444        // If the primitive provided can be converted to a type matching the type pattern
445        // then create a `CArg` of this primitive value with the corresponding `CArg` constructor.
446        // the ints
447        ty::Int(IntTy::I8) => CArg::Int8(v.to_scalar().to_i8()?),
448        ty::Int(IntTy::I16) => CArg::Int16(v.to_scalar().to_i16()?),
449        ty::Int(IntTy::I32) => CArg::Int32(v.to_scalar().to_i32()?),
450        ty::Int(IntTy::I64) => CArg::Int64(v.to_scalar().to_i64()?),
451        ty::Int(IntTy::Isize) =>
452            CArg::ISize(v.to_scalar().to_target_isize(cx)?.try_into().unwrap()),
453        // the uints
454        ty::Uint(UintTy::U8) => CArg::UInt8(v.to_scalar().to_u8()?),
455        ty::Uint(UintTy::U16) => CArg::UInt16(v.to_scalar().to_u16()?),
456        ty::Uint(UintTy::U32) => CArg::UInt32(v.to_scalar().to_u32()?),
457        ty::Uint(UintTy::U64) => CArg::UInt64(v.to_scalar().to_u64()?),
458        ty::Uint(UintTy::Usize) =>
459            CArg::USize(v.to_scalar().to_target_usize(cx)?.try_into().unwrap()),
460        ty::RawPtr(..) => {
461            let s = v.to_scalar().to_pointer(cx)?.addr();
462            // This relies on the `expose_provenance` in the `visit_reachable_allocs` callback
463            // above.
464            CArg::RawPtr(std::ptr::with_exposed_provenance_mut(s.bytes_usize()))
465        }
466        _ => throw_unsup_format!("unsupported argument type for native call: {}", v.layout.ty),
467    })
468}