1use std::fmt;
2use std::iter::Peekable;
3use std::sync::atomic::{AtomicU32, Ordering};
4
5use super::{Byte, Reference, Region, Tree, Type, Uninhabited};
6use crate::{Map, Set};
7
8#[derive(PartialEq)]
9#[cfg_attr(test, derive(Clone))]
10pub(crate) struct Dfa<R, T>
11where
12 R: Region,
13 T: Type,
14{
15 pub(crate) transitions: Map<State, Transitions<R, T>>,
16 pub(crate) start: State,
17 pub(crate) accept: State,
18}
19
20#[derive(PartialEq, Clone, Debug)]
21pub(crate) struct Transitions<R, T>
22where
23 R: Region,
24 T: Type,
25{
26 byte_transitions: EdgeSet<State>,
27 ref_transitions: Map<Reference<R, T>, State>,
28}
29
30impl<R, T> Default for Transitions<R, T>
31where
32 R: Region,
33 T: Type,
34{
35 fn default() -> Self {
36 Self { byte_transitions: EdgeSet::empty(), ref_transitions: Map::default() }
37 }
38}
39
40#[derive(Hash, Eq, PartialEq, PartialOrd, Ord, Copy, Clone)]
42pub(crate) struct State(pub(crate) u32);
43
44impl State {
45 pub(crate) fn new() -> Self {
46 static COUNTER: AtomicU32 = AtomicU32::new(0);
47 Self(COUNTER.fetch_add(1, Ordering::SeqCst))
48 }
49}
50
51impl fmt::Debug for State {
52 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
53 write!(f, "S_{}", self.0)
54 }
55}
56
57impl<R, T> Dfa<R, T>
58where
59 R: Region,
60 T: Type,
61{
62 #[cfg(test)]
63 pub(crate) fn bool() -> Self {
64 Self::from_transitions(|accept| Transitions {
65 byte_transitions: EdgeSet::new(Byte::new(0x00..=0x01), accept),
66 ref_transitions: Map::default(),
67 })
68 }
69
70 pub(crate) fn unit() -> Self {
71 let transitions: Map<State, Transitions<R, T>> = Map::default();
72 let start = State::new();
73 let accept = start;
74
75 Self { transitions, start, accept }
76 }
77
78 pub(crate) fn from_byte(byte: Byte) -> Self {
79 Self::from_transitions(|accept| Transitions {
80 byte_transitions: EdgeSet::new(byte, accept),
81 ref_transitions: Map::default(),
82 })
83 }
84
85 pub(crate) fn from_ref(r: Reference<R, T>) -> Self {
86 Self::from_transitions(|accept| Transitions {
87 byte_transitions: EdgeSet::empty(),
88 ref_transitions: [(r, accept)].into_iter().collect(),
89 })
90 }
91
92 fn from_transitions(f: impl FnOnce(State) -> Transitions<R, T>) -> Self {
93 let start = State::new();
94 let accept = State::new();
95
96 Self { transitions: [(start, f(accept))].into_iter().collect(), start, accept }
97 }
98
99 pub(crate) fn from_tree(tree: Tree<!, R, T>) -> Result<Self, Uninhabited> {
100 Ok(match tree {
101 Tree::Byte(b) => Self::from_byte(b),
102 Tree::Ref(r) => Self::from_ref(r),
103 Tree::Alt(alts) => {
104 let mut alts = alts.into_iter().map(Self::from_tree).filter_map(Result::ok);
106 let dfa = alts.next().ok_or(Uninhabited)?;
108 alts.fold(dfa, |dfa, alt| dfa.union(alt, State::new))
110 }
111 Tree::Seq(elts) => {
112 let mut dfa = Self::unit();
113 for elt in elts.into_iter().map(Self::from_tree) {
114 dfa = dfa.concat(elt?);
115 }
116 dfa
117 }
118 })
119 }
120
121 pub(crate) fn concat(self, other: Self) -> Self {
123 if self.start == self.accept {
124 return other;
125 } else if other.start == other.accept {
126 return self;
127 }
128
129 let start = self.start;
130 let accept = other.accept;
131
132 let mut transitions: Map<State, Transitions<R, T>> = self.transitions;
133
134 for (source, transition) in other.transitions {
135 let fix_state = |state| if state == other.start { self.accept } else { state };
136 let byte_transitions = transition.byte_transitions.map_states(&fix_state);
137 let ref_transitions = transition
138 .ref_transitions
139 .into_iter()
140 .map(|(r, state)| (r, fix_state(state)))
141 .collect();
142
143 let old = transitions
144 .insert(fix_state(source), Transitions { byte_transitions, ref_transitions });
145 assert!(old.is_none());
146 }
147
148 Self { transitions, start, accept }
149 }
150
151 pub(crate) fn union(self, other: Self, mut new_state: impl FnMut() -> State) -> Self {
153 let a = self;
159 let b = other;
160
161 let accept = new_state();
162
163 let mut mapping: Map<(Option<State>, Option<State>), State> = Map::default();
164
165 let mut mapped = |(a_state, b_state)| {
166 if Some(a.accept) == a_state || Some(b.accept) == b_state {
167 accept
170 } else {
171 *mapping.entry((a_state, b_state)).or_insert_with(&mut new_state)
172 }
173 };
174
175 let start = mapped((Some(a.start), Some(b.start)));
176 let mut transitions: Map<State, Transitions<R, T>> = Map::default();
177 let empty_transitions = Transitions::default();
178
179 struct WorkQueue {
180 queue: Vec<(Option<State>, Option<State>)>,
181 enqueued: Set<(Option<State>, Option<State>)>,
185 }
186 impl WorkQueue {
187 fn enqueue(&mut self, a_state: Option<State>, b_state: Option<State>) {
188 if self.enqueued.insert((a_state, b_state)) {
189 self.queue.push((a_state, b_state));
190 }
191 }
192 }
193 let mut queue = WorkQueue { queue: Vec::new(), enqueued: Set::default() };
194 queue.enqueue(Some(a.start), Some(b.start));
195
196 while let Some((a_src, b_src)) = queue.queue.pop() {
197 let src = mapped((a_src, b_src));
198 if src == accept {
199 continue;
211 }
212
213 let a_transitions =
214 a_src.and_then(|a_src| a.transitions.get(&a_src)).unwrap_or(&empty_transitions);
215 let b_transitions =
216 b_src.and_then(|b_src| b.transitions.get(&b_src)).unwrap_or(&empty_transitions);
217
218 let byte_transitions = a_transitions.byte_transitions.union(
219 &b_transitions.byte_transitions,
220 |a_dst, b_dst| {
221 assert!(a_dst.is_some() || b_dst.is_some());
222
223 queue.enqueue(a_dst, b_dst);
224 mapped((a_dst, b_dst))
225 },
226 );
227
228 let ref_transitions =
229 a_transitions.ref_transitions.keys().chain(b_transitions.ref_transitions.keys());
230
231 let ref_transitions = ref_transitions
232 .map(|ref_transition| {
233 let a_dst = a_transitions.ref_transitions.get(ref_transition).copied();
234 let b_dst = b_transitions.ref_transitions.get(ref_transition).copied();
235
236 assert!(a_dst.is_some() || b_dst.is_some());
237
238 queue.enqueue(a_dst, b_dst);
239 (*ref_transition, mapped((a_dst, b_dst)))
240 })
241 .collect();
242
243 let old = transitions.insert(src, Transitions { byte_transitions, ref_transitions });
244 assert_eq!(old, None);
247 }
248
249 Self { transitions, start, accept }
250 }
251
252 pub(crate) fn get_uninit_edge_dst(&self, state: State) -> Option<State> {
253 let transitions = self.transitions.get(&state)?;
254 transitions.byte_transitions.get_uninit_edge_dst()
255 }
256
257 pub(crate) fn bytes_from(&self, start: State) -> impl Iterator<Item = (Byte, State)> {
258 self.transitions
259 .get(&start)
260 .into_iter()
261 .flat_map(|transitions| transitions.byte_transitions.iter())
262 }
263
264 pub(crate) fn refs_from(&self, start: State) -> impl Iterator<Item = (Reference<R, T>, State)> {
265 self.transitions
266 .get(&start)
267 .into_iter()
268 .flat_map(|transitions| transitions.ref_transitions.iter())
269 .map(|(r, s)| (*r, *s))
270 }
271
272 #[cfg(test)]
273 pub(crate) fn from_edges<B: Clone + Into<Byte>>(
274 start: u32,
275 accept: u32,
276 edges: &[(u32, B, u32)],
277 ) -> Self {
278 let start = State(start);
279 let accept = State(accept);
280 let mut transitions: Map<State, Vec<(Byte, State)>> = Map::default();
281
282 for &(src, ref edge, dst) in edges.iter() {
283 transitions.entry(State(src)).or_default().push((edge.clone().into(), State(dst)));
284 }
285
286 let transitions = transitions
287 .into_iter()
288 .map(|(src, edges)| {
289 (
290 src,
291 Transitions {
292 byte_transitions: EdgeSet::from_edges(edges),
293 ref_transitions: Map::default(),
294 },
295 )
296 })
297 .collect();
298
299 Self { start, accept, transitions }
300 }
301}
302
303impl<R, T> fmt::Debug for Dfa<R, T>
305where
306 R: Region,
307 T: Type,
308{
309 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
310 writeln!(f, "digraph {{")?;
311 writeln!(f, " {:?} [shape = doublecircle]", self.start)?;
312 writeln!(f, " {:?} [shape = doublecircle]", self.accept)?;
313
314 for (src, transitions) in self.transitions.iter() {
315 for (t, dst) in transitions.byte_transitions.iter() {
316 writeln!(f, " {src:?} -> {dst:?} [label=\"{t:?}\"]")?;
317 }
318
319 for (t, dst) in transitions.ref_transitions.iter() {
320 writeln!(f, " {src:?} -> {dst:?} [label=\"{t:?}\"]")?;
321 }
322 }
323
324 writeln!(f, "}}")
325 }
326}
327
328use edge_set::EdgeSet;
329mod edge_set {
330 use smallvec::SmallVec;
331
332 use super::*;
333
334 #[derive(Eq, PartialEq, Clone, Debug)]
336 pub(super) struct EdgeSet<S = State> {
337 runs: SmallVec<[(Byte, S); 1]>,
342 }
343
344 impl<S> EdgeSet<S> {
345 pub(crate) fn new(range: Byte, dst: S) -> Self {
346 let mut this = Self { runs: SmallVec::new() };
347 if !range.is_empty() {
348 this.runs.push((range, dst));
349 }
350 this
351 }
352
353 pub(crate) fn empty() -> Self {
354 Self { runs: SmallVec::new() }
355 }
356
357 #[cfg(test)]
358 pub(crate) fn from_edges(mut edges: Vec<(Byte, S)>) -> Self
359 where
360 S: Ord,
361 {
362 edges.sort();
363 Self { runs: edges.into() }
364 }
365
366 pub(crate) fn iter(&self) -> impl Iterator<Item = (Byte, S)>
367 where
368 S: Copy,
369 {
370 self.runs.iter().copied()
371 }
372
373 pub(crate) fn get_uninit_edge_dst(&self) -> Option<S>
374 where
375 S: Copy,
376 {
377 let &(range, dst) = self.runs.last()?;
379 if range.contains_uninit() { Some(dst) } else { None }
380 }
381
382 pub(crate) fn map_states<SS>(self, mut f: impl FnMut(S) -> SS) -> EdgeSet<SS> {
383 EdgeSet {
384 runs: self.runs.into_iter().map(|(b, s)| (b, f(s))).collect(),
391 }
392 }
393
394 pub(crate) fn union(
404 &self,
405 other: &Self,
406 mut join: impl FnMut(Option<S>, Option<S>) -> S,
407 ) -> EdgeSet<S>
408 where
409 S: Copy + Eq,
410 {
411 let mut runs: SmallVec<[(Byte, S); 1]> = SmallVec::new();
412 let xs = self.runs.iter().copied();
413 let ys = other.runs.iter().copied();
414 for (range, (x, y)) in union(xs, ys) {
415 let state = join(x, y);
416 match runs.last_mut() {
417 Some(&mut (ref mut last_range, ref mut last_state))
419 if last_range.end == range.start && *last_state == state =>
420 {
421 last_range.end = range.end
422 }
423 _ => runs.push((range, state)),
424 }
425 }
426 EdgeSet { runs }
427 }
428 }
429}
430
431pub(crate) fn union<S: Copy, X: Iterator<Item = (Byte, S)>, Y: Iterator<Item = (Byte, S)>>(
433 xs: X,
434 ys: Y,
435) -> UnionIter<X, Y> {
436 UnionIter { xs: xs.peekable(), ys: ys.peekable() }
437}
438
439pub(crate) struct UnionIter<X: Iterator, Y: Iterator> {
440 xs: Peekable<X>,
441 ys: Peekable<Y>,
442}
443
444impl<S: Copy, X: Iterator<Item = (Byte, S)>, Y: Iterator<Item = (Byte, S)>> Iterator
446 for UnionIter<X, Y>
447{
448 type Item = (Byte, (Option<S>, Option<S>));
449
450 fn next(&mut self) -> Option<Self::Item> {
451 use std::cmp::{self, Ordering};
452
453 let ret;
454 match (self.xs.peek_mut(), self.ys.peek_mut()) {
455 (None, None) => {
456 ret = None;
457 }
458 (Some(x), None) => {
459 ret = Some((x.0, (Some(x.1), None)));
460 self.xs.next();
461 }
462 (None, Some(y)) => {
463 ret = Some((y.0, (None, Some(y.1))));
464 self.ys.next();
465 }
466 (Some(x), Some(y)) => {
467 let start;
468 let end;
469 let dst;
470 match x.0.start.cmp(&y.0.start) {
471 Ordering::Less => {
472 start = x.0.start;
473 end = cmp::min(x.0.end, y.0.start);
474 dst = (Some(x.1), None);
475 }
476 Ordering::Greater => {
477 start = y.0.start;
478 end = cmp::min(x.0.start, y.0.end);
479 dst = (None, Some(y.1));
480 }
481 Ordering::Equal => {
482 start = x.0.start;
483 end = cmp::min(x.0.end, y.0.end);
484 dst = (Some(x.1), Some(y.1));
485 }
486 }
487 ret = Some((Byte { start, end }, dst));
488 if start == x.0.start {
489 x.0.start = end;
490 }
491 if start == y.0.start {
492 y.0.start = end;
493 }
494 if x.0.is_empty() {
495 self.xs.next();
496 }
497 if y.0.is_empty() {
498 self.ys.next();
499 }
500 }
501 }
502 ret
503 }
504}