miri/shims/native_lib/mod.rs
1//! Implements calling functions from a native library.
2
3use std::ops::Deref;
4
5use libffi::high::call as ffi;
6use libffi::low::CodePtr;
7use rustc_abi::{BackendRepr, HasDataLayout, Size};
8use rustc_middle::mir::interpret::Pointer;
9use rustc_middle::ty::{self as ty, IntTy, UintTy};
10use rustc_span::Symbol;
11
12#[cfg_attr(
13 not(all(
14 target_os = "linux",
15 target_env = "gnu",
16 any(target_arch = "x86", target_arch = "x86_64")
17 )),
18 path = "trace/stub.rs"
19)]
20pub mod trace;
21
22use crate::*;
23
24/// The final results of an FFI trace, containing every relevant event detected
25/// by the tracer.
26#[allow(dead_code)]
27#[cfg_attr(target_os = "linux", derive(serde::Serialize, serde::Deserialize))]
28#[derive(Debug)]
29pub struct MemEvents {
30 /// An list of memory accesses that occurred, in the order they occurred in.
31 pub acc_events: Vec<AccessEvent>,
32}
33
34/// A single memory access.
35#[allow(dead_code)]
36#[cfg_attr(target_os = "linux", derive(serde::Serialize, serde::Deserialize))]
37#[derive(Clone, Debug)]
38pub enum AccessEvent {
39 /// A read occurred on this memory range.
40 Read(AccessRange),
41 /// A write may have occurred on this memory range.
42 /// Some instructions *may* write memory without *always* doing that,
43 /// so this can be an over-approximation.
44 /// The range info, however, is reliable if the access did happen.
45 /// If the second field is true, the access definitely happened.
46 Write(AccessRange, bool),
47}
48
49impl AccessEvent {
50 fn get_range(&self) -> AccessRange {
51 match self {
52 AccessEvent::Read(access_range) => access_range.clone(),
53 AccessEvent::Write(access_range, _) => access_range.clone(),
54 }
55 }
56}
57
58/// The memory touched by a given access.
59#[allow(dead_code)]
60#[cfg_attr(target_os = "linux", derive(serde::Serialize, serde::Deserialize))]
61#[derive(Clone, Debug)]
62pub struct AccessRange {
63 /// The base address in memory where an access occurred.
64 pub addr: usize,
65 /// The number of bytes affected from the base.
66 pub size: usize,
67}
68
69impl AccessRange {
70 fn end(&self) -> usize {
71 self.addr.strict_add(self.size)
72 }
73}
74
75impl<'tcx> EvalContextExtPriv<'tcx> for crate::MiriInterpCx<'tcx> {}
76trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
77 /// Call native host function and return the output as an immediate.
78 fn call_native_with_args<'a>(
79 &mut self,
80 link_name: Symbol,
81 dest: &MPlaceTy<'tcx>,
82 ptr: CodePtr,
83 libffi_args: Vec<libffi::high::Arg<'a>>,
84 ) -> InterpResult<'tcx, (crate::ImmTy<'tcx>, Option<MemEvents>)> {
85 let this = self.eval_context_mut();
86 #[cfg(target_os = "linux")]
87 let alloc = this.machine.allocator.as_ref().unwrap();
88 #[cfg(not(target_os = "linux"))]
89 // Placeholder value.
90 let alloc = ();
91
92 trace::Supervisor::do_ffi(alloc, || {
93 // Call the function (`ptr`) with arguments `libffi_args`, and obtain the return value
94 // as the specified primitive integer type
95 let scalar = match dest.layout.ty.kind() {
96 // ints
97 ty::Int(IntTy::I8) => {
98 // Unsafe because of the call to native code.
99 // Because this is calling a C function it is not necessarily sound,
100 // but there is no way around this and we've checked as much as we can.
101 let x = unsafe { ffi::call::<i8>(ptr, libffi_args.as_slice()) };
102 Scalar::from_i8(x)
103 }
104 ty::Int(IntTy::I16) => {
105 let x = unsafe { ffi::call::<i16>(ptr, libffi_args.as_slice()) };
106 Scalar::from_i16(x)
107 }
108 ty::Int(IntTy::I32) => {
109 let x = unsafe { ffi::call::<i32>(ptr, libffi_args.as_slice()) };
110 Scalar::from_i32(x)
111 }
112 ty::Int(IntTy::I64) => {
113 let x = unsafe { ffi::call::<i64>(ptr, libffi_args.as_slice()) };
114 Scalar::from_i64(x)
115 }
116 ty::Int(IntTy::Isize) => {
117 let x = unsafe { ffi::call::<isize>(ptr, libffi_args.as_slice()) };
118 Scalar::from_target_isize(x.try_into().unwrap(), this)
119 }
120 // uints
121 ty::Uint(UintTy::U8) => {
122 let x = unsafe { ffi::call::<u8>(ptr, libffi_args.as_slice()) };
123 Scalar::from_u8(x)
124 }
125 ty::Uint(UintTy::U16) => {
126 let x = unsafe { ffi::call::<u16>(ptr, libffi_args.as_slice()) };
127 Scalar::from_u16(x)
128 }
129 ty::Uint(UintTy::U32) => {
130 let x = unsafe { ffi::call::<u32>(ptr, libffi_args.as_slice()) };
131 Scalar::from_u32(x)
132 }
133 ty::Uint(UintTy::U64) => {
134 let x = unsafe { ffi::call::<u64>(ptr, libffi_args.as_slice()) };
135 Scalar::from_u64(x)
136 }
137 ty::Uint(UintTy::Usize) => {
138 let x = unsafe { ffi::call::<usize>(ptr, libffi_args.as_slice()) };
139 Scalar::from_target_usize(x.try_into().unwrap(), this)
140 }
141 // Functions with no declared return type (i.e., the default return)
142 // have the output_type `Tuple([])`.
143 ty::Tuple(t_list) if (*t_list).deref().is_empty() => {
144 unsafe { ffi::call::<()>(ptr, libffi_args.as_slice()) };
145 return interp_ok(ImmTy::uninit(dest.layout));
146 }
147 ty::RawPtr(..) => {
148 let x = unsafe { ffi::call::<*const ()>(ptr, libffi_args.as_slice()) };
149 let ptr = Pointer::new(Provenance::Wildcard, Size::from_bytes(x.addr()));
150 Scalar::from_pointer(ptr, this)
151 }
152 _ =>
153 return Err(err_unsup_format!(
154 "unsupported return type for native call: {:?}",
155 link_name
156 ))
157 .into(),
158 };
159 interp_ok(ImmTy::from_scalar(scalar, dest.layout))
160 })
161 }
162
163 /// Get the pointer to the function of the specified name in the shared object file,
164 /// if it exists. The function must be in one of the shared object files specified:
165 /// we do *not* return pointers to functions in dependencies of libraries.
166 fn get_func_ptr_explicitly_from_lib(&mut self, link_name: Symbol) -> Option<CodePtr> {
167 let this = self.eval_context_mut();
168 // Try getting the function from one of the shared libraries.
169 for (lib, lib_path) in &this.machine.native_lib {
170 let Ok(func): Result<libloading::Symbol<'_, unsafe extern "C" fn()>, _> =
171 (unsafe { lib.get(link_name.as_str().as_bytes()) })
172 else {
173 continue;
174 };
175 #[expect(clippy::as_conversions)] // fn-ptr to raw-ptr cast needs `as`.
176 let fn_ptr = *func.deref() as *mut std::ffi::c_void;
177
178 // FIXME: this is a hack!
179 // The `libloading` crate will automatically load system libraries like `libc`.
180 // On linux `libloading` is based on `dlsym`: https://docs.rs/libloading/0.7.3/src/libloading/os/unix/mod.rs.html#202
181 // and `dlsym`(https://linux.die.net/man/3/dlsym) looks through the dependency tree of the
182 // library if it can't find the symbol in the library itself.
183 // So, in order to check if the function was actually found in the specified
184 // `machine.external_so_lib` we need to check its `dli_fname` and compare it to
185 // the specified SO file path.
186 // This code is a reimplementation of the mechanism for getting `dli_fname` in `libloading`,
187 // from: https://docs.rs/libloading/0.7.3/src/libloading/os/unix/mod.rs.html#411
188 // using the `libc` crate where this interface is public.
189 let mut info = std::mem::MaybeUninit::<libc::Dl_info>::zeroed();
190 unsafe {
191 let res = libc::dladdr(fn_ptr, info.as_mut_ptr());
192 assert!(res != 0, "failed to load info about function we already loaded");
193 let info = info.assume_init();
194 #[cfg(target_os = "cygwin")]
195 let fname_ptr = info.dli_fname.as_ptr();
196 #[cfg(not(target_os = "cygwin"))]
197 let fname_ptr = info.dli_fname;
198 assert!(!fname_ptr.is_null());
199 if std::ffi::CStr::from_ptr(fname_ptr).to_str().unwrap()
200 != lib_path.to_str().unwrap()
201 {
202 // The function is not actually in this .so, check the next one.
203 continue;
204 }
205 }
206
207 // Return a pointer to the function.
208 return Some(CodePtr(fn_ptr));
209 }
210 None
211 }
212
213 /// Applies the `events` to Miri's internal state. The event vector must be
214 /// ordered sequentially by when the accesses happened, and the sizes are
215 /// assumed to be exact.
216 fn tracing_apply_accesses(&mut self, events: MemEvents) -> InterpResult<'tcx> {
217 let this = self.eval_context_mut();
218
219 for evt in events.acc_events {
220 let evt_rg = evt.get_range();
221 // LLVM at least permits vectorising accesses to adjacent allocations,
222 // so we cannot assume 1 access = 1 allocation. :(
223 let mut rg = evt_rg.addr..evt_rg.end();
224 while let Some(curr) = rg.next() {
225 let Some(alloc_id) = this.alloc_id_from_addr(
226 curr.to_u64(),
227 rg.len().try_into().unwrap(),
228 /* only_exposed_allocations */ true,
229 ) else {
230 throw_ub_format!("Foreign code did an out-of-bounds access!")
231 };
232 let alloc = this.get_alloc_raw(alloc_id)?;
233 // The logical and physical address of the allocation coincide, so we can use
234 // this instead of `addr_from_alloc_id`.
235 let alloc_addr = alloc.get_bytes_unchecked_raw().addr();
236
237 // Determine the range inside the allocation that this access covers. This range is
238 // in terms of offsets from the start of `alloc`. The start of the overlap range
239 // will be `curr`; the end will be the minimum of the end of the allocation and the
240 // end of the access' range.
241 let overlap = curr.strict_sub(alloc_addr)
242 ..std::cmp::min(alloc.len(), rg.end.strict_sub(alloc_addr));
243 // Skip forward however many bytes of the access are contained in the current
244 // allocation, subtracting 1 since the overlap range includes the current addr
245 // that was already popped off of the range.
246 rg.advance_by(overlap.len().strict_sub(1)).unwrap();
247
248 match evt {
249 AccessEvent::Read(_) => {
250 // FIXME: ProvenanceMap should have something like get_range().
251 let p_map = alloc.provenance();
252 for idx in overlap {
253 // If a provenance was read by the foreign code, expose it.
254 if let Some(prov) = p_map.get(Size::from_bytes(idx), this) {
255 this.expose_provenance(prov)?;
256 }
257 }
258 }
259 AccessEvent::Write(_, certain) => {
260 // Sometimes we aren't certain if a write happened, in which case we
261 // only initialise that data if the allocation is mutable.
262 if certain || alloc.mutability.is_mut() {
263 let (alloc, cx) = this.get_alloc_raw_mut(alloc_id)?;
264 alloc.process_native_write(
265 &cx.tcx,
266 Some(AllocRange {
267 start: Size::from_bytes(overlap.start),
268 size: Size::from_bytes(overlap.len()),
269 }),
270 )
271 }
272 }
273 }
274 }
275 }
276
277 interp_ok(())
278 }
279}
280
281impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
282pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
283 /// Call the native host function, with supplied arguments.
284 /// Needs to convert all the arguments from their Miri representations to
285 /// a native form (through `libffi` call).
286 /// Then, convert the return value from the native form into something that
287 /// can be stored in Miri's internal memory.
288 fn call_native_fn(
289 &mut self,
290 link_name: Symbol,
291 dest: &MPlaceTy<'tcx>,
292 args: &[OpTy<'tcx>],
293 ) -> InterpResult<'tcx, bool> {
294 let this = self.eval_context_mut();
295 // Get the pointer to the function in the shared object file if it exists.
296 let code_ptr = match this.get_func_ptr_explicitly_from_lib(link_name) {
297 Some(ptr) => ptr,
298 None => {
299 // Shared object file does not export this function -- try the shims next.
300 return interp_ok(false);
301 }
302 };
303
304 // Do we have ptrace?
305 let tracing = trace::Supervisor::is_enabled();
306
307 // Get the function arguments, and convert them to `libffi`-compatible form.
308 let mut libffi_args = Vec::<CArg>::with_capacity(args.len());
309 for arg in args.iter() {
310 if !matches!(arg.layout.backend_repr, BackendRepr::Scalar(_)) {
311 throw_unsup_format!("only scalar argument types are supported for native calls")
312 }
313 let imm = this.read_immediate(arg)?;
314 libffi_args.push(imm_to_carg(&imm, this)?);
315 // If we are passing a pointer, expose its provenance. Below, all exposed memory
316 // (previously exposed and new exposed) will then be properly prepared.
317 if matches!(arg.layout.ty.kind(), ty::RawPtr(..)) {
318 let ptr = imm.to_scalar().to_pointer(this)?;
319 let Some(prov) = ptr.provenance else {
320 // Pointer without provenance may not access any memory anyway, skip.
321 continue;
322 };
323 // The first time this happens, print a warning.
324 if !this.machine.native_call_mem_warned.replace(true) {
325 // Newly set, so first time we get here.
326 this.emit_diagnostic(NonHaltingDiagnostic::NativeCallSharedMem { tracing });
327 }
328
329 this.expose_provenance(prov)?;
330 }
331 }
332 // Convert arguments to `libffi::high::Arg` type.
333 let libffi_args = libffi_args
334 .iter()
335 .map(|arg| arg.arg_downcast())
336 .collect::<Vec<libffi::high::Arg<'_>>>();
337
338 // Prepare all exposed memory (both previously exposed, and just newly exposed since a
339 // pointer was passed as argument). Uninitialised memory is left as-is, but any data
340 // exposed this way is garbage anyway.
341 this.visit_reachable_allocs(this.exposed_allocs(), |this, alloc_id, info| {
342 // If there is no data behind this pointer, skip this.
343 if !matches!(info.kind, AllocKind::LiveData) {
344 return interp_ok(());
345 }
346 // It's okay to get raw access, what we do does not correspond to any actual
347 // AM operation, it just approximates the state to account for the native call.
348 let alloc = this.get_alloc_raw(alloc_id)?;
349 // Also expose the provenance of the interpreter-level allocation, so it can
350 // be read by FFI. The `black_box` is defensive programming as LLVM likes
351 // to (incorrectly) optimize away ptr2int casts whose result is unused.
352 std::hint::black_box(alloc.get_bytes_unchecked_raw().expose_provenance());
353
354 if !tracing {
355 // Expose all provenances in this allocation, since the native code can do $whatever.
356 // Can be skipped when tracing; in that case we'll expose just the actually-read parts later.
357 for prov in alloc.provenance().provenances() {
358 this.expose_provenance(prov)?;
359 }
360 }
361
362 // Prepare for possible write from native code if mutable.
363 if info.mutbl.is_mut() {
364 let (alloc, cx) = this.get_alloc_raw_mut(alloc_id)?;
365 // These writes could initialize everything and wreck havoc with the pointers.
366 // We can skip that when tracing; in that case we'll later do that only for the memory that got actually written.
367 if !tracing {
368 alloc.process_native_write(&cx.tcx, None);
369 }
370 // Also expose *mutable* provenance for the interpreter-level allocation.
371 std::hint::black_box(alloc.get_bytes_unchecked_raw_mut().expose_provenance());
372 }
373
374 interp_ok(())
375 })?;
376
377 // Call the function and store output, depending on return type in the function signature.
378 let (ret, maybe_memevents) =
379 this.call_native_with_args(link_name, dest, code_ptr, libffi_args)?;
380
381 if tracing {
382 this.tracing_apply_accesses(maybe_memevents.unwrap())?;
383 }
384
385 this.write_immediate(*ret, dest)?;
386 interp_ok(true)
387 }
388}
389
390#[derive(Debug, Clone)]
391/// Enum of supported arguments to external C functions.
392// We introduce this enum instead of just calling `ffi::arg` and storing a list
393// of `libffi::high::Arg` directly, because the `libffi::high::Arg` just wraps a reference
394// to the value it represents: https://docs.rs/libffi/latest/libffi/high/call/struct.Arg.html
395// and we need to store a copy of the value, and pass a reference to this copy to C instead.
396enum CArg {
397 /// 8-bit signed integer.
398 Int8(i8),
399 /// 16-bit signed integer.
400 Int16(i16),
401 /// 32-bit signed integer.
402 Int32(i32),
403 /// 64-bit signed integer.
404 Int64(i64),
405 /// isize.
406 ISize(isize),
407 /// 8-bit unsigned integer.
408 UInt8(u8),
409 /// 16-bit unsigned integer.
410 UInt16(u16),
411 /// 32-bit unsigned integer.
412 UInt32(u32),
413 /// 64-bit unsigned integer.
414 UInt64(u64),
415 /// usize.
416 USize(usize),
417 /// Raw pointer, stored as C's `void*`.
418 RawPtr(*mut std::ffi::c_void),
419}
420
421impl<'a> CArg {
422 /// Convert a `CArg` to a `libffi` argument type.
423 fn arg_downcast(&'a self) -> libffi::high::Arg<'a> {
424 match self {
425 CArg::Int8(i) => ffi::arg(i),
426 CArg::Int16(i) => ffi::arg(i),
427 CArg::Int32(i) => ffi::arg(i),
428 CArg::Int64(i) => ffi::arg(i),
429 CArg::ISize(i) => ffi::arg(i),
430 CArg::UInt8(i) => ffi::arg(i),
431 CArg::UInt16(i) => ffi::arg(i),
432 CArg::UInt32(i) => ffi::arg(i),
433 CArg::UInt64(i) => ffi::arg(i),
434 CArg::USize(i) => ffi::arg(i),
435 CArg::RawPtr(i) => ffi::arg(i),
436 }
437 }
438}
439
440/// Extract the scalar value from the result of reading a scalar from the machine,
441/// and convert it to a `CArg`.
442fn imm_to_carg<'tcx>(v: &ImmTy<'tcx>, cx: &impl HasDataLayout) -> InterpResult<'tcx, CArg> {
443 interp_ok(match v.layout.ty.kind() {
444 // If the primitive provided can be converted to a type matching the type pattern
445 // then create a `CArg` of this primitive value with the corresponding `CArg` constructor.
446 // the ints
447 ty::Int(IntTy::I8) => CArg::Int8(v.to_scalar().to_i8()?),
448 ty::Int(IntTy::I16) => CArg::Int16(v.to_scalar().to_i16()?),
449 ty::Int(IntTy::I32) => CArg::Int32(v.to_scalar().to_i32()?),
450 ty::Int(IntTy::I64) => CArg::Int64(v.to_scalar().to_i64()?),
451 ty::Int(IntTy::Isize) =>
452 CArg::ISize(v.to_scalar().to_target_isize(cx)?.try_into().unwrap()),
453 // the uints
454 ty::Uint(UintTy::U8) => CArg::UInt8(v.to_scalar().to_u8()?),
455 ty::Uint(UintTy::U16) => CArg::UInt16(v.to_scalar().to_u16()?),
456 ty::Uint(UintTy::U32) => CArg::UInt32(v.to_scalar().to_u32()?),
457 ty::Uint(UintTy::U64) => CArg::UInt64(v.to_scalar().to_u64()?),
458 ty::Uint(UintTy::Usize) =>
459 CArg::USize(v.to_scalar().to_target_usize(cx)?.try_into().unwrap()),
460 ty::RawPtr(..) => {
461 let s = v.to_scalar().to_pointer(cx)?.addr();
462 // This relies on the `expose_provenance` in the `visit_reachable_allocs` callback
463 // above.
464 CArg::RawPtr(std::ptr::with_exposed_provenance_mut(s.bytes_usize()))
465 }
466 _ => throw_unsup_format!("unsupported argument type for native call: {}", v.layout.ty),
467 })
468}