rustc_middle/mir/interpret/allocation/
provenance_map.rs

1//! Store the provenance for each byte in the range, with a more efficient
2//! representation for the common case where PTR_SIZE consecutive bytes have the same provenance.
3
4use std::cmp;
5use std::ops::{Range, RangeBounds};
6
7use rustc_abi::{HasDataLayout, Size};
8use rustc_data_structures::sorted_map::SortedMap;
9use rustc_macros::HashStable;
10use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
11use tracing::trace;
12
13use super::{AllocRange, CtfeProvenance, Provenance, alloc_range};
14use crate::mir::interpret::{AllocError, AllocResult};
15
16/// A pointer fragment represents one byte of a pointer.
17/// If the bytes are re-assembled in their original order, the pointer can be used again.
18/// Wildcard provenance is allowed to have index 0 everywhere.
19#[derive(Clone, PartialEq, Eq, Hash, Debug)]
20#[derive(HashStable)]
21pub struct PointerFrag<Prov> {
22    /// The position of this fragment inside the pointer (in `0..8`).
23    pub idx: u8,
24    /// The provenance of the pointer this is a fragment of.
25    pub prov: Prov,
26    /// The raw bytes of the pointer this is a fragment of.
27    /// This is taken as a direct subslice of the raw allocation data, so we don't have to worry
28    /// about endianness. If the pointer size is less than 8, only the first N bytes of this are
29    /// ever non-zero.
30    pub bytes: [u8; 8],
31}
32
33/// Stores the provenance information of pointers stored in memory.
34#[derive(Clone, PartialEq, Eq, Hash, Debug)]
35#[derive(HashStable)]
36pub struct ProvenanceMap<Prov = CtfeProvenance> {
37    /// `Provenance` in this map applies from the given offset for an entire pointer-size worth of
38    /// bytes. Two entries in this map are always at least a pointer size apart.
39    ptrs: SortedMap<Size, Prov>,
40    /// This stores byte-sized provenance fragments.
41    bytes: Option<Box<SortedMap<Size, PointerFrag<Prov>>>>,
42}
43
44// These impls are generic over `Prov` since `CtfeProvenance` is only decodable/encodable
45// for some particular `D`/`S`.
46impl<D: Decoder, Prov: Provenance + Decodable<D>> Decodable<D> for ProvenanceMap<Prov> {
47    fn decode(d: &mut D) -> Self {
48        // `bytes` is not in the serialized format
49        Self { ptrs: Decodable::decode(d), bytes: None }
50    }
51}
52impl<S: Encoder, Prov: Provenance + Encodable<S>> Encodable<S> for ProvenanceMap<Prov> {
53    fn encode(&self, s: &mut S) {
54        let Self { ptrs, bytes } = self;
55        assert!(bytes.is_none()); // interning refuses allocations with pointer fragments
56        ptrs.encode(s)
57    }
58}
59
60impl<Prov> ProvenanceMap<Prov> {
61    pub fn new() -> Self {
62        ProvenanceMap { ptrs: SortedMap::new(), bytes: None }
63    }
64
65    /// The caller must guarantee that the given provenance list is already sorted
66    /// by offset and contain no duplicates.
67    pub fn from_presorted_ptrs(r: Vec<(Size, Prov)>) -> Self {
68        ProvenanceMap { ptrs: SortedMap::from_presorted_elements(r), bytes: None }
69    }
70}
71
72impl ProvenanceMap {
73    /// Give access to the ptr-sized provenances (which can also be thought of as relocations, and
74    /// indeed that is how codegen treats them).
75    ///
76    /// Only use on interned allocations, as other allocations may have per-byte provenance!
77    #[inline]
78    pub fn ptrs(&self) -> &SortedMap<Size, CtfeProvenance> {
79        assert!(self.bytes.is_none(), "`ptrs()` called on non-interned allocation");
80        &self.ptrs
81    }
82}
83
84impl<Prov: Provenance> ProvenanceMap<Prov> {
85    fn adjusted_range_ptrs(range: AllocRange, cx: &impl HasDataLayout) -> Range<Size> {
86        // We have to go back `pointer_size - 1` bytes, as that one would still overlap with
87        // the beginning of this range.
88        let adjusted_start = Size::from_bytes(
89            range.start.bytes().saturating_sub(cx.data_layout().pointer_size().bytes() - 1),
90        );
91        adjusted_start..range.end()
92    }
93
94    /// Returns all ptr-sized provenance in the given range.
95    /// If the range has length 0, returns provenance that crosses the edge between `start-1` and
96    /// `start`.
97    fn range_ptrs_get(&self, range: AllocRange, cx: &impl HasDataLayout) -> &[(Size, Prov)] {
98        self.ptrs.range(Self::adjusted_range_ptrs(range, cx))
99    }
100
101    /// `pm.range_ptrs_is_empty(r, cx)` == `pm.range_ptrs_get(r, cx).is_empty()`, but is faster.
102    fn range_ptrs_is_empty(&self, range: AllocRange, cx: &impl HasDataLayout) -> bool {
103        self.ptrs.range_is_empty(Self::adjusted_range_ptrs(range, cx))
104    }
105
106    /// Check if there is ptr-sized provenance at the given index.
107    /// Does not mean anything for bytewise provenance! But can be useful as an optimization.
108    pub fn get_ptr(&self, offset: Size) -> Option<Prov> {
109        self.ptrs.get(&offset).copied()
110    }
111
112    /// Returns all byte-wise provenance in the given range.
113    fn range_bytes_get(&self, range: AllocRange) -> &[(Size, PointerFrag<Prov>)] {
114        if let Some(bytes) = self.bytes.as_ref() {
115            bytes.range(range.start..range.end())
116        } else {
117            &[]
118        }
119    }
120
121    /// Same as `range_bytes_get(range).is_empty()`, but faster.
122    fn range_bytes_is_empty(&self, range: AllocRange) -> bool {
123        self.bytes.as_ref().is_none_or(|bytes| bytes.range_is_empty(range.start..range.end()))
124    }
125
126    /// Get the provenance of a single byte. Must only be called if there is no
127    /// pointer-sized provenance here.
128    pub fn get_byte(&self, offset: Size, cx: &impl HasDataLayout) -> Option<&PointerFrag<Prov>> {
129        debug_assert!(self.range_ptrs_is_empty(alloc_range(offset, Size::from_bytes(1)), cx));
130        self.bytes.as_ref().and_then(|b| b.get(&offset))
131    }
132
133    /// Gets the provenances of all bytes (including from pointers) in a range.
134    pub fn get_range(
135        &self,
136        range: AllocRange,
137        cx: &impl HasDataLayout,
138    ) -> impl Iterator<Item = (AllocRange, Prov)> {
139        let ptr_size = cx.data_layout().pointer_size();
140        let ptr_provs = self
141            .range_ptrs_get(range, cx)
142            .iter()
143            .map(move |(offset, p)| (alloc_range(*offset, ptr_size), *p));
144        let byte_provs = self
145            .range_bytes_get(range)
146            .iter()
147            .map(move |(offset, frag)| (alloc_range(*offset, Size::from_bytes(1)), frag.prov));
148        ptr_provs.chain(byte_provs)
149    }
150
151    /// Attempt to merge per-byte provenance back into ptr chunks, if the right fragments
152    /// sit next to each other. Return `false` if that is not possible due to partial pointers.
153    pub fn merge_bytes(&mut self, cx: &impl HasDataLayout) -> bool {
154        let Some(bytes) = self.bytes.as_deref_mut() else {
155            return true;
156        };
157        let ptr_size = cx.data_layout().pointer_size();
158        while let Some((offset, first_frag)) = bytes.iter().next() {
159            let offset = *offset;
160            // Check if this fragment starts a pointer.
161            let range = offset..offset + ptr_size;
162            let frags = bytes.range(range.clone());
163            if frags.len() != ptr_size.bytes_usize() {
164                // We can't merge this one, no point in trying to merge the rest.
165                return false;
166            }
167            for (idx, (_offset, frag)) in frags.iter().enumerate() {
168                if !(frag.prov == first_frag.prov
169                    && frag.bytes == first_frag.bytes
170                    && frag.idx == idx as u8)
171                {
172                    return false;
173                }
174            }
175            // Looks like a pointer! Move it over to the ptr provenance map.
176            self.ptrs.insert(offset, first_frag.prov);
177            bytes.remove_range(range);
178        }
179        // We managed to convert everything into whole pointers.
180        self.bytes = None;
181        true
182    }
183
184    /// Try to read a pointer from the given location, possibly by loading from many per-byte
185    /// provenances.
186    pub fn read_ptr(&self, offset: Size, cx: &impl HasDataLayout) -> AllocResult<Option<Prov>> {
187        // If there is pointer-sized provenance exactly here, we can just return that.
188        if let Some(prov) = self.get_ptr(offset) {
189            return Ok(Some(prov));
190        }
191        // The other easy case is total absence of provenance, that also always works.
192        let range = alloc_range(offset, cx.data_layout().pointer_size());
193        let no_ptrs = self.range_ptrs_is_empty(range, cx);
194        if no_ptrs && self.range_bytes_is_empty(range) {
195            return Ok(None);
196        }
197        // If we get here, we have to check whether we can merge per-byte provenance.
198        let prov = 'prov: {
199            // If there is any ptr-sized provenance overlapping with this range,
200            // this is definitely mixing multiple pointers and we can bail.
201            if !no_ptrs {
202                break 'prov None;
203            }
204            // Scan all fragments, and ensure their indices, provenance, and bytes match.
205            // However, we have to ignore wildcard fragments for this (this is needed for Miri's
206            // native-lib mode). Therefore, we will only know the expected provenance and bytes
207            // once we find the first non-wildcard fragment.
208            let mut expected = None;
209            for idx in Size::ZERO..range.size {
210                // Ensure there is provenance here.
211                let Some(frag) = self.get_byte(offset + idx, cx) else {
212                    break 'prov None;
213                };
214                // If this is wildcard provenance, ignore this fragment.
215                if Some(frag.prov) == Prov::WILDCARD {
216                    continue;
217                }
218                // For non-wildcard fragments, the index must match.
219                if u64::from(frag.idx) != idx.bytes() {
220                    break 'prov None;
221                }
222                // If there are expectations registered, check them.
223                // If not, record this fragment as setting the expectations.
224                match expected {
225                    Some(expected) => {
226                        if (frag.prov, frag.bytes) != expected {
227                            break 'prov None;
228                        }
229                    }
230                    None => {
231                        expected = Some((frag.prov, frag.bytes));
232                    }
233                }
234            }
235            // The final provenance is the expected one we found along the way, or wildcard if
236            // we didn't find any.
237            Some(expected.map(|(prov, _addr)| prov).or_else(|| Prov::WILDCARD).unwrap())
238        };
239        if prov.is_none() && !Prov::OFFSET_IS_ADDR {
240            // There are some bytes with provenance here but overall the provenance does not add up.
241            // We need `OFFSET_IS_ADDR` to fall back to no-provenance here; without that option, we
242            // must error.
243            return Err(AllocError::ReadPartialPointer(offset));
244        }
245        Ok(prov)
246    }
247
248    /// Returns whether this allocation has provenance overlapping with the given range.
249    ///
250    /// Note: this function exists to allow `range_get_provenance` to be private, in order to somewhat
251    /// limit access to provenance outside of the `Allocation` abstraction.
252    ///
253    pub fn range_empty(&self, range: AllocRange, cx: &impl HasDataLayout) -> bool {
254        self.range_ptrs_is_empty(range, cx) && self.range_bytes_is_empty(range)
255    }
256
257    /// Yields all the provenances stored in this map.
258    pub fn provenances(&self) -> impl Iterator<Item = Prov> {
259        let bytes = self.bytes.iter().flat_map(|b| b.values().map(|frag| frag.prov));
260        self.ptrs.values().copied().chain(bytes)
261    }
262
263    pub fn insert_ptr(&mut self, offset: Size, prov: Prov, cx: &impl HasDataLayout) {
264        debug_assert!(self.range_empty(alloc_range(offset, cx.data_layout().pointer_size()), cx));
265        self.ptrs.insert(offset, prov);
266    }
267
268    /// Returns an iterator that yields the fragments of this pointer whose absolute positions are
269    /// inside `pos_range`.
270    fn ptr_fragments(
271        pos_range: impl RangeBounds<Size>,
272        ptr_pos: Size,
273        prov: Prov,
274        data_bytes: &[u8],
275        ptr_size: Size,
276    ) -> impl Iterator<Item = (Size, PointerFrag<Prov>)> {
277        if pos_range.is_empty() {
278            return either::Left(std::iter::empty());
279        }
280        // Read ptr_size many bytes starting at ptr_pos.
281        let mut bytes = [0u8; 8];
282        (&mut bytes[..ptr_size.bytes_usize()])
283            .copy_from_slice(&data_bytes[ptr_pos.bytes_usize()..][..ptr_size.bytes_usize()]);
284        // Yield the fragments of this pointer.
285        either::Right(
286            (ptr_pos..ptr_pos + ptr_size).filter(move |pos| pos_range.contains(pos)).map(
287                move |pos| (pos, PointerFrag { idx: (pos - ptr_pos).bytes() as u8, bytes, prov }),
288            ),
289        )
290    }
291
292    /// Removes all provenance inside the given range.
293    #[allow(irrefutable_let_patterns)] // these actually make the code more clear
294    pub fn clear(&mut self, range: AllocRange, data_bytes: &[u8], cx: &impl HasDataLayout) {
295        if range.size == Size::ZERO {
296            return;
297        }
298
299        let start = range.start;
300        let end = range.end();
301        // Clear the bytewise part -- this is easy.
302        if let Some(bytes) = self.bytes.as_mut() {
303            bytes.remove_range(start..end);
304        }
305
306        // Find all provenance overlapping the given range.
307        let ptrs_range = Self::adjusted_range_ptrs(range, cx);
308        if self.ptrs.range_is_empty(ptrs_range.clone()) {
309            // No provenance in this range, we are done. This is the common case.
310            return;
311        }
312        let pointer_size = cx.data_layout().pointer_size();
313
314        // This redoes some of the work of `range_is_empty`, but this path is much
315        // colder than the early return above, so it's worth it.
316        let ptrs = self.ptrs.range(ptrs_range.clone());
317
318        // We need to handle clearing the provenance from parts of a pointer.
319        if let &(first, prov) = ptrs.first().unwrap()
320            && first < start
321        {
322            // Insert the remaining part in the bytewise provenance.
323            let bytes = self.bytes.get_or_insert_with(Box::default);
324            for (pos, frag) in Self::ptr_fragments(..start, first, prov, data_bytes, pointer_size) {
325                bytes.insert(pos, frag);
326            }
327        }
328        if let &(last, prov) = ptrs.last().unwrap()
329            && last + pointer_size > end
330        {
331            // Insert the remaining part in the bytewise provenance.
332            let bytes = self.bytes.get_or_insert_with(Box::default);
333            for (pos, frag) in Self::ptr_fragments(end.., last, prov, data_bytes, pointer_size) {
334                bytes.insert(pos, frag);
335            }
336        }
337
338        // Forget all the provenance.
339        // Since provenance do not overlap, we know that removing until `last` (exclusive) is fine,
340        // i.e., this will not remove any other provenance just after the ones we care about.
341        self.ptrs.remove_range(ptrs_range);
342    }
343
344    /// Overwrites all provenance in the given range with wildcard provenance.
345    /// Pointers partially overwritten will have their provenances preserved
346    /// bytewise on their remaining bytes.
347    ///
348    /// Provided for usage in Miri and panics otherwise.
349    pub fn write_wildcards(
350        &mut self,
351        cx: &impl HasDataLayout,
352        data_bytes: &[u8],
353        range: AllocRange,
354    ) {
355        let wildcard = Prov::WILDCARD.unwrap();
356
357        // Clear existing provenance in this range.
358        self.clear(range, data_bytes, cx);
359
360        // Make everything in the range wildcards.
361        let bytes = self.bytes.get_or_insert_with(Box::default);
362        for offset in range.start..range.end() {
363            // The fragment index and bytes do not matter for wildcard provenance.
364            bytes.insert(
365                offset,
366                PointerFrag { prov: wildcard, idx: Default::default(), bytes: Default::default() },
367            );
368        }
369    }
370}
371
372/// A partial, owned list of provenance to transfer into another allocation.
373///
374/// Offsets are relative to the beginning of the copied range.
375pub struct ProvenanceCopy<Prov> {
376    ptrs: Box<[(Size, Prov)]>,
377    bytes: Box<[(Size, PointerFrag<Prov>)]>,
378}
379
380impl<Prov: Provenance> ProvenanceMap<Prov> {
381    pub fn prepare_copy(
382        &self,
383        range: AllocRange,
384        data_bytes: &[u8],
385        cx: &impl HasDataLayout,
386    ) -> ProvenanceCopy<Prov> {
387        let shift_offset = move |offset| offset - range.start;
388        let ptr_size = cx.data_layout().pointer_size();
389
390        // # Pointer-sized provenances
391        // Get the provenances that are entirely within this range.
392        // (Different from `range_get_ptrs` which asks if they overlap the range.)
393        // Only makes sense if we are copying at least one pointer worth of bytes.
394        let mut ptrs_box: Box<[_]> = Box::new([]);
395        if range.size >= ptr_size {
396            let adjusted_end = Size::from_bytes(range.end().bytes() - (ptr_size.bytes() - 1));
397            let ptrs = self.ptrs.range(range.start..adjusted_end);
398            ptrs_box = ptrs.iter().map(|&(offset, reloc)| (shift_offset(offset), reloc)).collect();
399        };
400
401        // # Byte-sized provenances
402        // This includes the existing bytewise provenance in the range, and ptr provenance
403        // that overlaps with the begin/end of the range.
404        let mut bytes_box: Box<[_]> = Box::new([]);
405        let begin_overlap = self.range_ptrs_get(alloc_range(range.start, Size::ZERO), cx).first();
406        let end_overlap = self.range_ptrs_get(alloc_range(range.end(), Size::ZERO), cx).first();
407        // We only need to go here if there is some overlap or some bytewise provenance.
408        if begin_overlap.is_some() || end_overlap.is_some() || self.bytes.is_some() {
409            let mut bytes: Vec<(Size, PointerFrag<Prov>)> = Vec::new();
410            // First, if there is a part of a pointer at the start, add that.
411            if let Some(&(pos, prov)) = begin_overlap {
412                // For really small copies, make sure we don't run off the end of the range.
413                let end = cmp::min(pos + ptr_size, range.end());
414                for (pos, frag) in
415                    Self::ptr_fragments(range.start..end, pos, prov, data_bytes, ptr_size)
416                {
417                    bytes.push((shift_offset(pos), frag));
418                }
419            } else {
420                trace!("no start overlapping entry");
421            }
422
423            // Then the main part, bytewise provenance from `self.bytes`.
424            bytes.extend(
425                self.range_bytes_get(range)
426                    .iter()
427                    .map(|(offset, frag)| (shift_offset(*offset), frag.clone())),
428            );
429
430            // And finally possibly parts of a pointer at the end.
431            // We only have to go here if this is actually different than the begin_overlap.
432            if let Some(&(pos, prov)) = end_overlap
433                && begin_overlap.is_none_or(|(begin, _)| *begin != pos)
434            {
435                // If this was a really small copy, we'd have handled this in begin_overlap.
436                assert!(pos >= range.start);
437                for (pos, frag) in
438                    Self::ptr_fragments(pos..range.end(), pos, prov, data_bytes, ptr_size)
439                {
440                    let pos = shift_offset(pos);
441                    // The last entry, if it exists, has a lower offset than us, so we
442                    // can add it at the end and remain sorted.
443                    debug_assert!(bytes.last().is_none_or(|bytes_entry| bytes_entry.0 < pos));
444                    bytes.push((pos, frag));
445                }
446            } else {
447                trace!("no end overlapping entry");
448            }
449            trace!("byte provenances: {bytes:?}");
450
451            // And again a buffer for the new list on the target side.
452            bytes_box = bytes.into_boxed_slice();
453        }
454
455        ProvenanceCopy { ptrs: ptrs_box, bytes: bytes_box }
456    }
457
458    /// Applies a provenance copy.
459    /// The affected range, as defined in the parameters to `prepare_copy` is expected
460    /// to be clear of provenance.
461    pub fn apply_copy(&mut self, copy: ProvenanceCopy<Prov>, range: AllocRange, repeat: u64) {
462        let shift_offset = |idx: u64, offset: Size| offset + range.start + idx * range.size;
463        if !copy.ptrs.is_empty() {
464            // We want to call `insert_presorted` only once so that, if possible, the entries
465            // after the range we insert are moved back only once.
466            let chunk_len = copy.ptrs.len() as u64;
467            self.ptrs.insert_presorted((0..chunk_len * repeat).map(|i| {
468                let chunk = i / chunk_len;
469                let (offset, prov) = copy.ptrs[(i % chunk_len) as usize];
470                (shift_offset(chunk, offset), prov)
471            }));
472        }
473        if !copy.bytes.is_empty() {
474            let chunk_len = copy.bytes.len() as u64;
475            self.bytes.get_or_insert_with(Box::default).insert_presorted(
476                (0..chunk_len * repeat).map(|i| {
477                    let chunk = i / chunk_len;
478                    let (offset, frag) = &copy.bytes[(i % chunk_len) as usize];
479                    (shift_offset(chunk, *offset), frag.clone())
480                }),
481            );
482        }
483    }
484}