miri/shims/
os_str.rs

1use std::borrow::Cow;
2use std::ffi::{OsStr, OsString};
3#[cfg(unix)]
4use std::os::unix::ffi::{OsStrExt, OsStringExt};
5#[cfg(windows)]
6use std::os::windows::ffi::{OsStrExt, OsStringExt};
7use std::path::{Path, PathBuf};
8
9use rustc_middle::ty::Ty;
10
11use crate::*;
12
13/// Represent how path separator conversion should be done.
14pub enum PathConversion {
15    HostToTarget,
16    TargetToHost,
17}
18
19#[cfg(unix)]
20pub fn bytes_to_os_str<'tcx>(bytes: &[u8]) -> InterpResult<'tcx, &OsStr> {
21    interp_ok(OsStr::from_bytes(bytes))
22}
23#[cfg(not(unix))]
24pub fn bytes_to_os_str<'tcx>(bytes: &[u8]) -> InterpResult<'tcx, &OsStr> {
25    // We cannot use `from_encoded_bytes_unchecked` here since we can't trust `bytes`.
26    let s = std::str::from_utf8(bytes)
27        .map_err(|_| err_unsup_format!("{:?} is not a valid utf-8 string", bytes))?;
28    interp_ok(OsStr::new(s))
29}
30
31impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
32pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
33    /// Helper function to read an OsString from a null-terminated sequence of bytes, which is what
34    /// the Unix APIs usually handle.
35    fn read_os_str_from_c_str<'a>(&'a self, ptr: Pointer) -> InterpResult<'tcx, &'a OsStr>
36    where
37        'tcx: 'a,
38    {
39        let this = self.eval_context_ref();
40        let bytes = this.read_c_str(ptr)?;
41        bytes_to_os_str(bytes)
42    }
43
44    /// Helper function to read an OsString from a 0x0000-terminated sequence of u16,
45    /// which is what the Windows APIs usually handle.
46    fn read_os_str_from_wide_str<'a>(&'a self, ptr: Pointer) -> InterpResult<'tcx, OsString>
47    where
48        'tcx: 'a,
49    {
50        #[cfg(windows)]
51        pub fn u16vec_to_osstring<'tcx>(u16_vec: Vec<u16>) -> InterpResult<'tcx, OsString> {
52            interp_ok(OsString::from_wide(&u16_vec[..]))
53        }
54        #[cfg(not(windows))]
55        pub fn u16vec_to_osstring<'tcx>(u16_vec: Vec<u16>) -> InterpResult<'tcx, OsString> {
56            let s = String::from_utf16(&u16_vec[..])
57                .map_err(|_| err_unsup_format!("{:?} is not a valid utf-16 string", u16_vec))?;
58            interp_ok(s.into())
59        }
60
61        let u16_vec = self.eval_context_ref().read_wide_str(ptr)?;
62        u16vec_to_osstring(u16_vec)
63    }
64
65    /// Helper function to write an OsStr as a null-terminated sequence of bytes, which is what the
66    /// Unix APIs usually handle. Returns `(success, full_len)`, where length includes the null
67    /// terminator. On failure, nothing is written.
68    fn write_os_str_to_c_str(
69        &mut self,
70        os_str: &OsStr,
71        ptr: Pointer,
72        size: u64,
73    ) -> InterpResult<'tcx, (bool, u64)> {
74        let bytes = os_str.as_encoded_bytes();
75        self.eval_context_mut().write_c_str(bytes, ptr, size)
76    }
77
78    /// Internal helper to share code between `write_os_str_to_wide_str` and
79    /// `write_os_str_to_wide_str_truncated`.
80    fn write_os_str_to_wide_str_helper(
81        &mut self,
82        os_str: &OsStr,
83        ptr: Pointer,
84        size: u64,
85        truncate: bool,
86    ) -> InterpResult<'tcx, (bool, u64)> {
87        #[cfg(windows)]
88        fn os_str_to_u16vec<'tcx>(os_str: &OsStr) -> InterpResult<'tcx, Vec<u16>> {
89            interp_ok(os_str.encode_wide().collect())
90        }
91        #[cfg(not(windows))]
92        fn os_str_to_u16vec<'tcx>(os_str: &OsStr) -> InterpResult<'tcx, Vec<u16>> {
93            // On non-Windows platforms the best we can do to transform Vec<u16> from/to OS strings is to do the
94            // intermediate transformation into strings. Which invalidates non-utf8 paths that are actually
95            // valid.
96            os_str
97                .to_str()
98                .map(|s| s.encode_utf16().collect())
99                .ok_or_else(|| err_unsup_format!("{:?} is not a valid utf-8 string", os_str))
100                .into()
101        }
102
103        let u16_vec = os_str_to_u16vec(os_str)?;
104        let (written, size_needed) = self.eval_context_mut().write_wide_str(&u16_vec, ptr, size)?;
105        if truncate && !written && size > 0 {
106            // Write the truncated part that fits.
107            let truncated_data = &u16_vec[..size.saturating_sub(1).try_into().unwrap()];
108            let (written, written_len) =
109                self.eval_context_mut().write_wide_str(truncated_data, ptr, size)?;
110            assert!(written && written_len == size);
111        }
112        interp_ok((written, size_needed))
113    }
114
115    /// Helper function to write an OsStr as a 0x0000-terminated u16-sequence, which is what the
116    /// Windows APIs usually handle. Returns `(success, full_len)`, where length is measured
117    /// in units of `u16` and includes the null terminator. On failure, nothing is written.
118    fn write_os_str_to_wide_str(
119        &mut self,
120        os_str: &OsStr,
121        ptr: Pointer,
122        size: u64,
123    ) -> InterpResult<'tcx, (bool, u64)> {
124        self.write_os_str_to_wide_str_helper(os_str, ptr, size, /*truncate*/ false)
125    }
126
127    /// Like `write_os_str_to_wide_str`, but on failure as much as possible is written into
128    /// the buffer (always with a null terminator).
129    fn write_os_str_to_wide_str_truncated(
130        &mut self,
131        os_str: &OsStr,
132        ptr: Pointer,
133        size: u64,
134    ) -> InterpResult<'tcx, (bool, u64)> {
135        self.write_os_str_to_wide_str_helper(os_str, ptr, size, /*truncate*/ true)
136    }
137
138    /// Allocate enough memory to store the given `OsStr` as a null-terminated sequence of bytes.
139    fn alloc_os_str_as_c_str(
140        &mut self,
141        os_str: &OsStr,
142        memkind: MemoryKind,
143    ) -> InterpResult<'tcx, Pointer> {
144        let size = u64::try_from(os_str.len()).unwrap().strict_add(1); // Make space for `0` terminator.
145        let this = self.eval_context_mut();
146
147        let arg_type = Ty::new_array(this.tcx.tcx, this.tcx.types.u8, size);
148        let arg_place = this.allocate(this.layout_of(arg_type).unwrap(), memkind)?;
149        let (written, _) = self.write_os_str_to_c_str(os_str, arg_place.ptr(), size).unwrap();
150        assert!(written);
151        interp_ok(arg_place.ptr())
152    }
153
154    /// Allocate enough memory to store the given `OsStr` as a null-terminated sequence of `u16`.
155    fn alloc_os_str_as_wide_str(
156        &mut self,
157        os_str: &OsStr,
158        memkind: MemoryKind,
159    ) -> InterpResult<'tcx, Pointer> {
160        let size = u64::try_from(os_str.len()).unwrap().strict_add(1); // Make space for `0x0000` terminator.
161        let this = self.eval_context_mut();
162
163        let arg_type = Ty::new_array(this.tcx.tcx, this.tcx.types.u16, size);
164        let arg_place = this.allocate(this.layout_of(arg_type).unwrap(), memkind)?;
165        let (written, _) = self.write_os_str_to_wide_str(os_str, arg_place.ptr(), size).unwrap();
166        assert!(written);
167        interp_ok(arg_place.ptr())
168    }
169
170    /// Read a null-terminated sequence of bytes, and perform path separator conversion if needed.
171    fn read_path_from_c_str<'a>(&'a self, ptr: Pointer) -> InterpResult<'tcx, Cow<'a, Path>>
172    where
173        'tcx: 'a,
174    {
175        let this = self.eval_context_ref();
176        let os_str = this.read_os_str_from_c_str(ptr)?;
177
178        interp_ok(match this.convert_path(Cow::Borrowed(os_str), PathConversion::TargetToHost) {
179            Cow::Borrowed(x) => Cow::Borrowed(Path::new(x)),
180            Cow::Owned(y) => Cow::Owned(PathBuf::from(y)),
181        })
182    }
183
184    /// Read a null-terminated sequence of `u16`s, and perform path separator conversion if needed.
185    fn read_path_from_wide_str(&self, ptr: Pointer) -> InterpResult<'tcx, PathBuf> {
186        let this = self.eval_context_ref();
187        let os_str = this.read_os_str_from_wide_str(ptr)?;
188
189        interp_ok(
190            this.convert_path(Cow::Owned(os_str), PathConversion::TargetToHost).into_owned().into(),
191        )
192    }
193
194    /// Write a Path to the machine memory (as a null-terminated sequence of bytes),
195    /// adjusting path separators if needed.
196    fn write_path_to_c_str(
197        &mut self,
198        path: &Path,
199        ptr: Pointer,
200        size: u64,
201    ) -> InterpResult<'tcx, (bool, u64)> {
202        let this = self.eval_context_mut();
203        let os_str =
204            this.convert_path(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget);
205        this.write_os_str_to_c_str(&os_str, ptr, size)
206    }
207
208    /// Write a Path to the machine memory (as a null-terminated sequence of `u16`s),
209    /// adjusting path separators if needed.
210    fn write_path_to_wide_str(
211        &mut self,
212        path: &Path,
213        ptr: Pointer,
214        size: u64,
215    ) -> InterpResult<'tcx, (bool, u64)> {
216        let this = self.eval_context_mut();
217        let os_str =
218            this.convert_path(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget);
219        this.write_os_str_to_wide_str(&os_str, ptr, size)
220    }
221
222    /// Write a Path to the machine memory (as a null-terminated sequence of `u16`s),
223    /// adjusting path separators if needed.
224    fn write_path_to_wide_str_truncated(
225        &mut self,
226        path: &Path,
227        ptr: Pointer,
228        size: u64,
229    ) -> InterpResult<'tcx, (bool, u64)> {
230        let this = self.eval_context_mut();
231        let os_str =
232            this.convert_path(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget);
233        this.write_os_str_to_wide_str_truncated(&os_str, ptr, size)
234    }
235
236    /// Allocate enough memory to store a Path as a null-terminated sequence of bytes,
237    /// adjusting path separators if needed.
238    fn alloc_path_as_c_str(
239        &mut self,
240        path: &Path,
241        memkind: MemoryKind,
242    ) -> InterpResult<'tcx, Pointer> {
243        let this = self.eval_context_mut();
244        let os_str =
245            this.convert_path(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget);
246        this.alloc_os_str_as_c_str(&os_str, memkind)
247    }
248
249    /// Allocate enough memory to store a Path as a null-terminated sequence of `u16`s,
250    /// adjusting path separators if needed.
251    fn alloc_path_as_wide_str(
252        &mut self,
253        path: &Path,
254        memkind: MemoryKind,
255    ) -> InterpResult<'tcx, Pointer> {
256        let this = self.eval_context_mut();
257        let os_str =
258            this.convert_path(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget);
259        this.alloc_os_str_as_wide_str(&os_str, memkind)
260    }
261
262    fn convert_path<'a>(
263        &self,
264        os_str: Cow<'a, OsStr>,
265        direction: PathConversion,
266    ) -> Cow<'a, OsStr> {
267        let this = self.eval_context_ref();
268        let target_os = &this.tcx.sess.target.os;
269
270        /// Adjust a Windows path to Unix conventions such that it un-does everything that
271        /// `unix_to_windows` did, and such that if the Windows input path was absolute, then the
272        /// Unix output path is absolute.
273        fn windows_to_unix<T>(path: &mut Vec<T>)
274        where
275            T: From<u8> + Copy + Eq,
276        {
277            let sep = T::from(b'/');
278            // Make sure all path separators are `/`.
279            for c in path.iter_mut() {
280                if *c == b'\\'.into() {
281                    *c = sep;
282                }
283            }
284            // If this starts with `//?/`, it was probably produced by `unix_to_windows`` and we
285            // remove the `//?` that got added to get the Unix path back out.
286            if path.get(0..4) == Some(&[sep, sep, b'?'.into(), sep]) {
287                // Remove first 3 characters. It still starts with `/` so it is absolute on Unix.
288                path.splice(0..3, std::iter::empty());
289            }
290            // If it starts with a drive letter (`X:/`), convert it to an absolute Unix path.
291            else if path.get(1..3) == Some(&[b':'.into(), sep]) {
292                // We add a `/` at the beginning, to store the absolute Windows
293                // path in something that looks like an absolute Unix path.
294                path.insert(0, sep);
295            }
296        }
297
298        /// Adjust a Unix path to Windows conventions such that it un-does everything that
299        /// `windows_to_unix` did, and such that if the Unix input path was absolute, then the
300        /// Windows output path is absolute.
301        fn unix_to_windows<T>(path: &mut Vec<T>)
302        where
303            T: From<u8> + Copy + Eq,
304        {
305            let sep = T::from(b'\\');
306            // Make sure all path separators are `\`.
307            for c in path.iter_mut() {
308                if *c == b'/'.into() {
309                    *c = sep;
310                }
311            }
312            // If the path is `\X:\`, the leading separator was probably added by `windows_to_unix`
313            // and we should get rid of it again.
314            if path.get(2..4) == Some(&[b':'.into(), sep]) && path[0] == sep {
315                // The new path is still absolute on Windows.
316                path.remove(0);
317            }
318            // If this starts withs a `\` but not a `\\`, then this was absolute on Unix but is
319            // relative on Windows (relative to "the root of the current directory", e.g. the
320            // drive letter).
321            else if path.first() == Some(&sep) && path.get(1) != Some(&sep) {
322                // We add `\\?` so it starts with `\\?\` which is some magic path on Windows
323                // that *is* considered absolute. This way we store the absolute Unix path
324                // in something that looks like an absolute Windows path.
325                path.splice(0..0, [sep, sep, b'?'.into()]);
326            }
327        }
328
329        // Below we assume that everything non-Windows works like Unix, at least
330        // when it comes to file system path conventions.
331        #[cfg(windows)]
332        return if target_os == "windows" {
333            // Windows-on-Windows, all fine.
334            os_str
335        } else {
336            // Unix target, Windows host.
337            let mut path: Vec<u16> = os_str.encode_wide().collect();
338            match direction {
339                PathConversion::HostToTarget => {
340                    windows_to_unix(&mut path);
341                }
342                PathConversion::TargetToHost => {
343                    unix_to_windows(&mut path);
344                }
345            }
346            Cow::Owned(OsString::from_wide(&path))
347        };
348        #[cfg(unix)]
349        return if target_os == "windows" {
350            // Windows target, Unix host.
351            let mut path: Vec<u8> = os_str.into_owned().into_encoded_bytes();
352            match direction {
353                PathConversion::HostToTarget => {
354                    unix_to_windows(&mut path);
355                }
356                PathConversion::TargetToHost => {
357                    windows_to_unix(&mut path);
358                }
359            }
360            Cow::Owned(OsString::from_vec(path))
361        } else {
362            // Unix-on-Unix, all is fine.
363            os_str
364        };
365    }
366}