rustc_transmute/layout/
dfa.rs

1use std::fmt;
2use std::iter::Peekable;
3use std::sync::atomic::{AtomicU32, Ordering};
4
5use super::{Byte, Reference, Region, Tree, Type, Uninhabited};
6use crate::{Map, Set};
7
8#[derive(PartialEq)]
9#[cfg_attr(test, derive(Clone))]
10pub(crate) struct Dfa<R, T>
11where
12    R: Region,
13    T: Type,
14{
15    pub(crate) transitions: Map<State, Transitions<R, T>>,
16    pub(crate) start: State,
17    pub(crate) accept: State,
18}
19
20#[derive(PartialEq, Clone, Debug)]
21pub(crate) struct Transitions<R, T>
22where
23    R: Region,
24    T: Type,
25{
26    byte_transitions: EdgeSet<State>,
27    ref_transitions: Map<Reference<R, T>, State>,
28}
29
30impl<R, T> Default for Transitions<R, T>
31where
32    R: Region,
33    T: Type,
34{
35    fn default() -> Self {
36        Self { byte_transitions: EdgeSet::empty(), ref_transitions: Map::default() }
37    }
38}
39
40/// The states in a [`Dfa`] represent byte offsets.
41#[derive(Hash, Eq, PartialEq, PartialOrd, Ord, Copy, Clone)]
42pub(crate) struct State(pub(crate) u32);
43
44impl State {
45    pub(crate) fn new() -> Self {
46        static COUNTER: AtomicU32 = AtomicU32::new(0);
47        Self(COUNTER.fetch_add(1, Ordering::SeqCst))
48    }
49}
50
51impl fmt::Debug for State {
52    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
53        write!(f, "S_{}", self.0)
54    }
55}
56
57impl<R, T> Dfa<R, T>
58where
59    R: Region,
60    T: Type,
61{
62    #[cfg(test)]
63    pub(crate) fn bool() -> Self {
64        Self::from_transitions(|accept| Transitions {
65            byte_transitions: EdgeSet::new(Byte::new(0x00..=0x01), accept),
66            ref_transitions: Map::default(),
67        })
68    }
69
70    pub(crate) fn unit() -> Self {
71        let transitions: Map<State, Transitions<R, T>> = Map::default();
72        let start = State::new();
73        let accept = start;
74
75        Self { transitions, start, accept }
76    }
77
78    pub(crate) fn from_byte(byte: Byte) -> Self {
79        Self::from_transitions(|accept| Transitions {
80            byte_transitions: EdgeSet::new(byte, accept),
81            ref_transitions: Map::default(),
82        })
83    }
84
85    pub(crate) fn from_ref(r: Reference<R, T>) -> Self {
86        Self::from_transitions(|accept| Transitions {
87            byte_transitions: EdgeSet::empty(),
88            ref_transitions: [(r, accept)].into_iter().collect(),
89        })
90    }
91
92    fn from_transitions(f: impl FnOnce(State) -> Transitions<R, T>) -> Self {
93        let start = State::new();
94        let accept = State::new();
95
96        Self { transitions: [(start, f(accept))].into_iter().collect(), start, accept }
97    }
98
99    pub(crate) fn from_tree(tree: Tree<!, R, T>) -> Result<Self, Uninhabited> {
100        Ok(match tree {
101            Tree::Byte(b) => Self::from_byte(b),
102            Tree::Ref(r) => Self::from_ref(r),
103            Tree::Alt(alts) => {
104                // Convert and filter the inhabited alternatives.
105                let mut alts = alts.into_iter().map(Self::from_tree).filter_map(Result::ok);
106                // If there are no alternatives, return `Uninhabited`.
107                let dfa = alts.next().ok_or(Uninhabited)?;
108                // Combine the remaining alternatives with `dfa`.
109                alts.fold(dfa, |dfa, alt| dfa.union(alt, State::new))
110            }
111            Tree::Seq(elts) => {
112                let mut dfa = Self::unit();
113                for elt in elts.into_iter().map(Self::from_tree) {
114                    dfa = dfa.concat(elt?);
115                }
116                dfa
117            }
118        })
119    }
120
121    /// Concatenate two `Dfa`s.
122    pub(crate) fn concat(self, other: Self) -> Self {
123        if self.start == self.accept {
124            return other;
125        } else if other.start == other.accept {
126            return self;
127        }
128
129        let start = self.start;
130        let accept = other.accept;
131
132        let mut transitions: Map<State, Transitions<R, T>> = self.transitions;
133
134        for (source, transition) in other.transitions {
135            let fix_state = |state| if state == other.start { self.accept } else { state };
136            let byte_transitions = transition.byte_transitions.map_states(&fix_state);
137            let ref_transitions = transition
138                .ref_transitions
139                .into_iter()
140                .map(|(r, state)| (r, fix_state(state)))
141                .collect();
142
143            let old = transitions
144                .insert(fix_state(source), Transitions { byte_transitions, ref_transitions });
145            assert!(old.is_none());
146        }
147
148        Self { transitions, start, accept }
149    }
150
151    /// Compute the union of two `Dfa`s.
152    pub(crate) fn union(self, other: Self, mut new_state: impl FnMut() -> State) -> Self {
153        // We implement `union` by lazily initializing a set of states
154        // corresponding to the product of states in `self` and `other`, and
155        // then add transitions between these states that correspond to where
156        // they exist between `self` and `other`.
157
158        let a = self;
159        let b = other;
160
161        let accept = new_state();
162
163        let mut mapping: Map<(Option<State>, Option<State>), State> = Map::default();
164
165        let mut mapped = |(a_state, b_state)| {
166            if Some(a.accept) == a_state || Some(b.accept) == b_state {
167                // If either `a_state` or `b_state` are accepting, map to a
168                // common `accept` state.
169                accept
170            } else {
171                *mapping.entry((a_state, b_state)).or_insert_with(&mut new_state)
172            }
173        };
174
175        let start = mapped((Some(a.start), Some(b.start)));
176        let mut transitions: Map<State, Transitions<R, T>> = Map::default();
177        let empty_transitions = Transitions::default();
178
179        struct WorkQueue {
180            queue: Vec<(Option<State>, Option<State>)>,
181            // Track all entries ever enqueued to avoid duplicating work. This
182            // gives us a guarantee that a given (a_state, b_state) pair will
183            // only ever be visited once.
184            enqueued: Set<(Option<State>, Option<State>)>,
185        }
186        impl WorkQueue {
187            fn enqueue(&mut self, a_state: Option<State>, b_state: Option<State>) {
188                if self.enqueued.insert((a_state, b_state)) {
189                    self.queue.push((a_state, b_state));
190                }
191            }
192        }
193        let mut queue = WorkQueue { queue: Vec::new(), enqueued: Set::default() };
194        queue.enqueue(Some(a.start), Some(b.start));
195
196        while let Some((a_src, b_src)) = queue.queue.pop() {
197            let src = mapped((a_src, b_src));
198            if src == accept {
199                // While it's possible to have a DFA whose accept state has
200                // out-edges, these do not affect the semantics of the DFA, and
201                // so there's no point in processing them. Continuing here also
202                // has the advantage of guaranteeing that we only ever process a
203                // given node in the output DFA once. In particular, with the
204                // exception of the accept state, we ensure that we only push a
205                // given node to the `queue` once. This allows the following
206                // code to assume that we're processing a node we've never
207                // processed before, which means we never need to merge two edge
208                // sets - we only ever need to construct a new edge set from
209                // whole cloth.
210                continue;
211            }
212
213            let a_transitions =
214                a_src.and_then(|a_src| a.transitions.get(&a_src)).unwrap_or(&empty_transitions);
215            let b_transitions =
216                b_src.and_then(|b_src| b.transitions.get(&b_src)).unwrap_or(&empty_transitions);
217
218            let byte_transitions = a_transitions.byte_transitions.union(
219                &b_transitions.byte_transitions,
220                |a_dst, b_dst| {
221                    assert!(a_dst.is_some() || b_dst.is_some());
222
223                    queue.enqueue(a_dst, b_dst);
224                    mapped((a_dst, b_dst))
225                },
226            );
227
228            let ref_transitions =
229                a_transitions.ref_transitions.keys().chain(b_transitions.ref_transitions.keys());
230
231            let ref_transitions = ref_transitions
232                .map(|ref_transition| {
233                    let a_dst = a_transitions.ref_transitions.get(ref_transition).copied();
234                    let b_dst = b_transitions.ref_transitions.get(ref_transition).copied();
235
236                    assert!(a_dst.is_some() || b_dst.is_some());
237
238                    queue.enqueue(a_dst, b_dst);
239                    (*ref_transition, mapped((a_dst, b_dst)))
240                })
241                .collect();
242
243            let old = transitions.insert(src, Transitions { byte_transitions, ref_transitions });
244            // See `if src == accept { ... }` above. The comment there explains
245            // why this assert is valid.
246            assert_eq!(old, None);
247        }
248
249        Self { transitions, start, accept }
250    }
251
252    pub(crate) fn get_uninit_edge_dst(&self, state: State) -> Option<State> {
253        let transitions = self.transitions.get(&state)?;
254        transitions.byte_transitions.get_uninit_edge_dst()
255    }
256
257    pub(crate) fn bytes_from(&self, start: State) -> impl Iterator<Item = (Byte, State)> {
258        self.transitions
259            .get(&start)
260            .into_iter()
261            .flat_map(|transitions| transitions.byte_transitions.iter())
262    }
263
264    pub(crate) fn refs_from(&self, start: State) -> impl Iterator<Item = (Reference<R, T>, State)> {
265        self.transitions
266            .get(&start)
267            .into_iter()
268            .flat_map(|transitions| transitions.ref_transitions.iter())
269            .map(|(r, s)| (*r, *s))
270    }
271
272    #[cfg(test)]
273    pub(crate) fn from_edges<B: Clone + Into<Byte>>(
274        start: u32,
275        accept: u32,
276        edges: &[(u32, B, u32)],
277    ) -> Self {
278        let start = State(start);
279        let accept = State(accept);
280        let mut transitions: Map<State, Vec<(Byte, State)>> = Map::default();
281
282        for &(src, ref edge, dst) in edges.iter() {
283            transitions.entry(State(src)).or_default().push((edge.clone().into(), State(dst)));
284        }
285
286        let transitions = transitions
287            .into_iter()
288            .map(|(src, edges)| {
289                (
290                    src,
291                    Transitions {
292                        byte_transitions: EdgeSet::from_edges(edges),
293                        ref_transitions: Map::default(),
294                    },
295                )
296            })
297            .collect();
298
299        Self { start, accept, transitions }
300    }
301}
302
303/// Serialize the DFA using the Graphviz DOT format.
304impl<R, T> fmt::Debug for Dfa<R, T>
305where
306    R: Region,
307    T: Type,
308{
309    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
310        writeln!(f, "digraph {{")?;
311        writeln!(f, "    {:?} [shape = doublecircle]", self.start)?;
312        writeln!(f, "    {:?} [shape = doublecircle]", self.accept)?;
313
314        for (src, transitions) in self.transitions.iter() {
315            for (t, dst) in transitions.byte_transitions.iter() {
316                writeln!(f, "    {src:?} -> {dst:?} [label=\"{t:?}\"]")?;
317            }
318
319            for (t, dst) in transitions.ref_transitions.iter() {
320                writeln!(f, "    {src:?} -> {dst:?} [label=\"{t:?}\"]")?;
321            }
322        }
323
324        writeln!(f, "}}")
325    }
326}
327
328use edge_set::EdgeSet;
329mod edge_set {
330    use smallvec::SmallVec;
331
332    use super::*;
333
334    /// The set of outbound byte edges associated with a DFA node.
335    #[derive(Eq, PartialEq, Clone, Debug)]
336    pub(super) struct EdgeSet<S = State> {
337        // A sequence of byte edges with contiguous byte values and a common
338        // destination is stored as a single run.
339        //
340        // Runs are non-empty, non-overlapping, and stored in ascending order.
341        runs: SmallVec<[(Byte, S); 1]>,
342    }
343
344    impl<S> EdgeSet<S> {
345        pub(crate) fn new(range: Byte, dst: S) -> Self {
346            let mut this = Self { runs: SmallVec::new() };
347            if !range.is_empty() {
348                this.runs.push((range, dst));
349            }
350            this
351        }
352
353        pub(crate) fn empty() -> Self {
354            Self { runs: SmallVec::new() }
355        }
356
357        #[cfg(test)]
358        pub(crate) fn from_edges(mut edges: Vec<(Byte, S)>) -> Self
359        where
360            S: Ord,
361        {
362            edges.sort();
363            Self { runs: edges.into() }
364        }
365
366        pub(crate) fn iter(&self) -> impl Iterator<Item = (Byte, S)>
367        where
368            S: Copy,
369        {
370            self.runs.iter().copied()
371        }
372
373        pub(crate) fn get_uninit_edge_dst(&self) -> Option<S>
374        where
375            S: Copy,
376        {
377            // Uninit is ordered last.
378            let &(range, dst) = self.runs.last()?;
379            if range.contains_uninit() { Some(dst) } else { None }
380        }
381
382        pub(crate) fn map_states<SS>(self, mut f: impl FnMut(S) -> SS) -> EdgeSet<SS> {
383            EdgeSet {
384                // NOTE: It appears as through `<Vec<_> as
385                // IntoIterator>::IntoIter` and `std::iter::Map` both implement
386                // `TrustedLen`, which in turn means that this `.collect()`
387                // allocates the correct number of elements once up-front [1].
388                //
389                // [1] https://doc.rust-lang.org/1.85.0/src/alloc/vec/spec_from_iter_nested.rs.html#47
390                runs: self.runs.into_iter().map(|(b, s)| (b, f(s))).collect(),
391            }
392        }
393
394        /// Unions two edge sets together.
395        ///
396        /// If `u = a.union(b)`, then for each byte value, `u` will have an edge
397        /// with that byte value and with the destination `join(Some(_), None)`,
398        /// `join(None, Some(_))`, or `join(Some(_), Some(_))` depending on whether `a`,
399        /// `b`, or both have an edge with that byte value.
400        ///
401        /// If neither `a` nor `b` have an edge with a particular byte value,
402        /// then no edge with that value will be present in `u`.
403        pub(crate) fn union(
404            &self,
405            other: &Self,
406            mut join: impl FnMut(Option<S>, Option<S>) -> S,
407        ) -> EdgeSet<S>
408        where
409            S: Copy + Eq,
410        {
411            let mut runs: SmallVec<[(Byte, S); 1]> = SmallVec::new();
412            let xs = self.runs.iter().copied();
413            let ys = other.runs.iter().copied();
414            for (range, (x, y)) in union(xs, ys) {
415                let state = join(x, y);
416                match runs.last_mut() {
417                    // Merge contiguous runs with a common destination.
418                    Some(&mut (ref mut last_range, ref mut last_state))
419                        if last_range.end == range.start && *last_state == state =>
420                    {
421                        last_range.end = range.end
422                    }
423                    _ => runs.push((range, state)),
424                }
425            }
426            EdgeSet { runs }
427        }
428    }
429}
430
431/// Merges two sorted sequences into one sorted sequence.
432pub(crate) fn union<S: Copy, X: Iterator<Item = (Byte, S)>, Y: Iterator<Item = (Byte, S)>>(
433    xs: X,
434    ys: Y,
435) -> UnionIter<X, Y> {
436    UnionIter { xs: xs.peekable(), ys: ys.peekable() }
437}
438
439pub(crate) struct UnionIter<X: Iterator, Y: Iterator> {
440    xs: Peekable<X>,
441    ys: Peekable<Y>,
442}
443
444// FIXME(jswrenn) we'd likely benefit from specializing try_fold here.
445impl<S: Copy, X: Iterator<Item = (Byte, S)>, Y: Iterator<Item = (Byte, S)>> Iterator
446    for UnionIter<X, Y>
447{
448    type Item = (Byte, (Option<S>, Option<S>));
449
450    fn next(&mut self) -> Option<Self::Item> {
451        use std::cmp::{self, Ordering};
452
453        let ret;
454        match (self.xs.peek_mut(), self.ys.peek_mut()) {
455            (None, None) => {
456                ret = None;
457            }
458            (Some(x), None) => {
459                ret = Some((x.0, (Some(x.1), None)));
460                self.xs.next();
461            }
462            (None, Some(y)) => {
463                ret = Some((y.0, (None, Some(y.1))));
464                self.ys.next();
465            }
466            (Some(x), Some(y)) => {
467                let start;
468                let end;
469                let dst;
470                match x.0.start.cmp(&y.0.start) {
471                    Ordering::Less => {
472                        start = x.0.start;
473                        end = cmp::min(x.0.end, y.0.start);
474                        dst = (Some(x.1), None);
475                    }
476                    Ordering::Greater => {
477                        start = y.0.start;
478                        end = cmp::min(x.0.start, y.0.end);
479                        dst = (None, Some(y.1));
480                    }
481                    Ordering::Equal => {
482                        start = x.0.start;
483                        end = cmp::min(x.0.end, y.0.end);
484                        dst = (Some(x.1), Some(y.1));
485                    }
486                }
487                ret = Some((Byte { start, end }, dst));
488                if start == x.0.start {
489                    x.0.start = end;
490                }
491                if start == y.0.start {
492                    y.0.start = end;
493                }
494                if x.0.is_empty() {
495                    self.xs.next();
496                }
497                if y.0.is_empty() {
498                    self.ys.next();
499                }
500            }
501        }
502        ret
503    }
504}