charon_lib/transform/control_flow/ullbc_to_llbc.rs
1//! ULLBC to LLBC
2//!
3//! We reconstruct the control-flow in the Unstructured LLBC.
4//!
5//! The reconstruction algorithm is not written to be efficient (its complexity
6//! is probably very bad), but it was not written to be: this is still an early
7//! stage and we want the algorithm to generate the best reconstruction as
8//! possible. We still need to test the algorithm on more interesting examples,
9//! and will consider making it more efficient once it is a bit mature and well
10//! tested.
11//! Also note that we more importantly focus on making the algorithm sound: the
12//! reconstructed program must always be equivalent to the original MIR program,
13//! and the fact that the reconstruction preserves this property must be obvious.
14//!
15//! Finally, we conjecture the execution time shouldn't be too much a problem:
16//! we don't expect the translation mechanism to be applied on super huge functions
17//! (which will be difficult to formally analyze), and the MIR graphs are actually
18//! not that big because statements are grouped into code blocks (a block is made
19//! of a list of statements, followed by a terminator - branchings and jumps can
20//! only be performed by terminators -, meaning that MIR graphs don't have that
21//! many nodes and edges).
22
23use itertools::Itertools;
24use petgraph::algo::dijkstra;
25use petgraph::algo::dominators::{Dominators, simple_fast};
26use petgraph::graphmap::DiGraphMap;
27use petgraph::visit::{Dfs, DfsPostOrder, GraphBase, IntoNeighbors, Visitable, Walker};
28use std::collections::{BTreeSet, HashMap, HashSet};
29use std::mem;
30
31use crate::common::ensure_sufficient_stack;
32use crate::errors::sanity_check;
33use crate::ids::IndexVec;
34use crate::llbc_ast as tgt;
35use crate::meta::{Span, combine_span};
36use crate::transform::TransformCtx;
37use crate::transform::ctx::TransformPass;
38use crate::ullbc_ast::{self as src, BlockId};
39use crate::{ast::*, register_error};
40
41/// Arbitrary-precision numbers
42type BigUint = fraction::DynaInt<u64, fraction::BigUint>;
43type BigRational = fraction::Ratio<BigUint>;
44
45/// Control-Flow Graph
46type Cfg = DiGraphMap<src::BlockId, ()>;
47
48/// Information precomputed about a function's CFG.
49#[derive(Debug)]
50struct CfgInfo {
51 /// The CFG
52 pub cfg: Cfg,
53 /// The CFG where all the backward edges have been removed. Aka "forward CFG".
54 pub fwd_cfg: Cfg,
55 /// We consider the destination of the backward edges to be loop entries and
56 /// store them here.
57 pub loop_entries: HashSet<src::BlockId>,
58 /// The blocks whose terminators are a switch are stored here.
59 pub switch_blocks: HashSet<src::BlockId>,
60 /// Tree of which nodes dominates which other nodes.
61 #[expect(unused)]
62 pub dominator_tree: Dominators<BlockId>,
63 /// Computed data about each block.
64 pub block_data: IndexVec<BlockId, BlockData>,
65}
66
67#[derive(Debug)]
68struct BlockData {
69 pub id: BlockId,
70 /// Order in a reverse postorder numbering. `None` if the block is unreachable.
71 pub reverse_postorder: Option<u32>,
72 /// Node from where we can only reach error nodes (panic, etc.)
73 pub only_reach_error: bool,
74 /// List of reachable nodes, with the length of shortest path to them. Includes the current
75 /// node.
76 pub shortest_paths: hashbrown::HashMap<BlockId, usize>,
77 /// Let's say we put a quantity of water equal to 1 on the block, and the water flows downards.
78 /// Whenever there is a branching, the quantity of water gets equally divided between the
79 /// branches. When the control flows join, we put the water back together. The set below
80 /// computes the amount of water received by each descendant of the node.
81 ///
82 /// TODO: there must be a known algorithm which computes this, right?...
83 /// This is exactly this problems:
84 /// <https://stackoverflow.com/questions/78221666/algorithm-for-total-flow-through-weighted-directed-acyclic-graph>
85 /// TODO: the way I compute this is not efficient.
86 pub flow: IndexVec<BlockId, BigRational>,
87}
88
89#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
90struct BlockWithRank<T> {
91 /// A "rank" quantity that we use to order the nodes.
92 /// By placing the `rank` field before the `id`, we make sure that
93 /// we use the rank first in the lexicographic order.
94 rank: T,
95 id: src::BlockId,
96}
97type OrdBlockId = BlockWithRank<u32>;
98
99/// Error indicating that the control-flow graph is not reducible. The contained block id is a
100/// block involved in an irreducible subgraph.
101struct Irreducible(BlockId);
102
103impl CfgInfo {
104 /// Build the CFGs (the "regular" CFG and the CFG without backward edges) and
105 /// compute some information like the loop entries and the switch blocks.
106 fn build(body: &src::BodyContents) -> Result<Self, Irreducible> {
107 let start_block = BlockId::ZERO;
108
109 // Build the node graph (we ignore unwind paths for now).
110 let mut cfg = Cfg::new();
111 for (block_id, block) in body.iter_indexed() {
112 cfg.add_node(block_id);
113 for tgt in block.targets_ignoring_unwind() {
114 cfg.add_edge(block_id, tgt, ());
115 }
116 }
117
118 let empty_flow = body.map_ref(|_| BigRational::new(0u64.into(), 1u64.into()));
119 let mut block_data: IndexVec<BlockId, BlockData> =
120 body.map_ref_indexed(|id, _| BlockData {
121 id,
122 // Default values will stay for unreachable nodes, which are irrelevant.
123 reverse_postorder: None,
124 only_reach_error: false,
125 shortest_paths: Default::default(),
126 flow: empty_flow.clone(),
127 });
128
129 // Compute the dominator tree.
130 let dominator_tree = simple_fast(&cfg, start_block);
131
132 // Compute reverse postorder numbering.
133 for (i, block_id) in DfsPostOrder::new(&cfg, start_block).iter(&cfg).enumerate() {
134 let rev_post_id = body.len() - i;
135 assert!(rev_post_id <= u32::MAX as usize);
136 block_data[block_id].reverse_postorder = Some(rev_post_id as u32);
137 }
138
139 // Compute the forward graph (without backward edges).
140 let mut fwd_cfg = Cfg::new();
141 let mut loop_entries = HashSet::new();
142 let mut switch_blocks = HashSet::new();
143 for src in cfg.nodes() {
144 if block_data[src].reverse_postorder.is_none() {
145 // Unreachable block
146 continue;
147 }
148 fwd_cfg.add_node(src);
149 if body[src].terminator.kind.is_switch() {
150 switch_blocks.insert(src);
151 }
152 for tgt in cfg.neighbors(src) {
153 // Check if the edge is a backward edge.
154 if block_data[src].reverse_postorder >= block_data[tgt].reverse_postorder {
155 // This is a backward edge
156 loop_entries.insert(tgt);
157 // A cfg is reducible iff the target of every back edge dominates the
158 // edge's source.
159 if !dominator_tree.dominators(src).unwrap().contains(&tgt) {
160 return Err(Irreducible(src));
161 }
162 } else {
163 fwd_cfg.add_edge(src, tgt, ());
164 }
165 }
166 }
167
168 for block_id in DfsPostOrder::new(&fwd_cfg, start_block).iter(&fwd_cfg) {
169 let block = &body[block_id];
170 let targets = cfg.neighbors(block_id).collect_vec();
171 let fwd_targets = fwd_cfg.neighbors(block_id).collect_vec();
172
173 // Compute the nodes that can only reach error nodes.
174 // The node can only reach error nodes if:
175 // - it is an error node;
176 // - or it has neighbors and they all lead to errors.
177 // Note that if there is a backward edge, `only_reach_error` cannot contain this
178 // node yet. In other words, this does not consider infinite loops as reaching an
179 // error node.
180 if block.terminator.is_error()
181 || (!targets.is_empty()
182 && targets.iter().all(|&tgt| block_data[tgt].only_reach_error))
183 {
184 block_data[block_id].only_reach_error = true;
185 }
186
187 // Compute the flows between each pair of nodes.
188 let mut flow: IndexVec<src::BlockId, BigRational> =
189 mem::take(&mut block_data[block_id].flow);
190 if !fwd_targets.is_empty() {
191 // We need to divide the initial flow equally between the children
192 let factor = BigRational::new(1u64.into(), fwd_targets.len().into());
193
194 // For each child, multiply the flows of its own children by the ratio,
195 // and add.
196 for &child_id in &fwd_targets {
197 // First, add the child itself
198 flow[child_id] += factor.clone();
199
200 // Then add its successors
201 let child = &block_data[child_id];
202 for grandchild in child.reachable_excluding_self() {
203 // Flow from `child` to `grandchild`
204 let child_flow = child.flow[grandchild].clone();
205 flow[grandchild] += factor.clone() * child_flow;
206 }
207 }
208 }
209 block_data[block_id].flow = flow;
210
211 // Compute shortest paths to all reachable nodes in the forward graph.
212 block_data[block_id].shortest_paths = dijkstra(&fwd_cfg, block_id, None, |_| 1usize);
213 }
214
215 Ok(CfgInfo {
216 cfg,
217 fwd_cfg,
218 loop_entries,
219 switch_blocks,
220 dominator_tree,
221 block_data,
222 })
223 }
224
225 fn block_data(&self, block_id: BlockId) -> &BlockData {
226 &self.block_data[block_id]
227 }
228 // fn can_reach(&self, src: BlockId, tgt: BlockId) -> bool {
229 // self.block_data[src].shortest_paths.contains_key(&tgt)
230 // }
231 fn topo_rank(&self, block_id: BlockId) -> u32 {
232 self.block_data[block_id].reverse_postorder.unwrap()
233 }
234 /// Create an [OrdBlockId] from a block id and a rank given by a map giving
235 /// a sort (topological in our use cases) over the graph.
236 fn make_ord_block_id(&self, block_id: BlockId) -> OrdBlockId {
237 OrdBlockId {
238 id: block_id,
239 rank: self.topo_rank(block_id),
240 }
241 }
242 fn is_backward_edge(&self, src: BlockId, tgt: BlockId) -> bool {
243 self.block_data[src].reverse_postorder >= self.block_data[tgt].reverse_postorder
244 && self.cfg.contains_edge(src, tgt)
245 }
246}
247
248impl BlockData {
249 fn shortest_paths_including_self(&self) -> impl Iterator<Item = (BlockId, usize)> {
250 self.shortest_paths.iter().map(|(bid, d)| (*bid, *d))
251 }
252 fn shortest_paths_excluding_self(&self) -> impl Iterator<Item = (BlockId, usize)> {
253 self.shortest_paths_including_self()
254 .filter(move |&(bid, _)| bid != self.id)
255 }
256 #[expect(unused)]
257 fn reachable_including_self(&self) -> impl Iterator<Item = BlockId> {
258 self.shortest_paths_including_self().map(|(bid, _)| bid)
259 }
260 fn reachable_excluding_self(&self) -> impl Iterator<Item = BlockId> {
261 self.shortest_paths_excluding_self().map(|(bid, _)| bid)
262 }
263}
264
265#[derive(Debug, Clone)]
266struct LoopExitCandidateInfo {
267 /// The occurrences going to this exit.
268 /// For every occurrence, we register the distance between the loop entry
269 /// and the node from which the edge going to the exit starts.
270 /// If later we have to choose between candidates with the same number
271 /// of occurrences, we choose the one with the smallest distances.
272 ///
273 /// Note that it *may* happen that we have several exit candidates referenced
274 /// more than once for one loop. This comes from the fact that whenever we
275 /// reach a node from which the loop entry is not reachable (without using a
276 /// backward edge leading to an outer loop entry), we register this node
277 /// as well as all its children as exit candidates.
278 /// Consider the following example:
279 /// ```text
280 /// while i < max {
281 /// if cond {
282 /// break;
283 /// }
284 /// s += i;
285 /// i += 1
286 /// }
287 /// // All the below nodes are exit candidates (each of them is referenced twice)
288 /// s += 1;
289 /// return s;
290 /// ```
291 pub occurrences: Vec<usize>,
292}
293
294struct FilteredLoopParents {
295 remaining_parents: Vec<(src::BlockId, usize)>,
296 removed_parents: Vec<(src::BlockId, usize)>,
297}
298
299#[derive(Debug, Default, Clone)]
300struct ExitInfo {
301 is_loop_entry: bool,
302 is_switch_entry: bool,
303 /// The loop exit
304 loop_exit: Option<src::BlockId>,
305 /// Some loop exits actually belong to outer switches. We still need
306 /// to track them in the loop exits, in order to know when we should
307 /// insert a break. However, we need to make sure we don't add the
308 /// corresponding block in a sequence, after having translated the
309 /// loop, like so:
310 /// ```text
311 /// loop {
312 /// loop_body
313 /// };
314 /// exit_blocks // OK if the exit "belongs" to the loop
315 /// ```
316 ///
317 /// In case the exit doesn't belong to the loop:
318 /// ```text
319 /// if b {
320 /// loop {
321 /// loop_body
322 /// } // no exit blocks after the loop
323 /// }
324 /// else {
325 /// ...
326 /// };
327 /// exit_blocks // the exit blocks are here
328 /// ```
329 owned_loop_exit: Option<src::BlockId>,
330 /// The switch exit.
331 /// Note that the switch exits are always owned.
332 owned_switch_exit: Option<src::BlockId>,
333}
334
335/// The exits of a graph
336#[derive(Debug, Clone)]
337struct ExitsInfo {
338 exits: IndexVec<BlockId, ExitInfo>,
339}
340
341impl ExitsInfo {
342 /// Check if a loop entry is reachable from a node, in a graph where we remove
343 /// the backward edges going directly to the loop entry.
344 ///
345 /// If the loop entry is not reachable, it means that:
346 /// - the loop entry is not reachable at all
347 /// - or it is only reachable through an outer loop
348 ///
349 /// The starting node should be a (transitive) child of the loop entry.
350 /// We use this to find candidates for loop exits.
351 fn loop_entry_is_reachable_from_inner(
352 cfg: &CfgInfo,
353 loop_entry: src::BlockId,
354 block_id: src::BlockId,
355 ) -> bool {
356 // It is reachable in the complete graph. Check if it is reachable by not
357 // going through backward edges which go to outer loops. In practice, we
358 // just need to forbid the use of any backward edges at the exception of
359 // those which go directly to the current loop's entry. This means that we
360 // ignore backward edges to outer loops of course, but also backward edges
361 // to inner loops because we shouldn't need to follow those (there should be
362 // more direct paths).
363 Dfs::new(&cfg.fwd_cfg, block_id)
364 .iter(&cfg.fwd_cfg)
365 .any(|bid| cfg.is_backward_edge(bid, loop_entry))
366 }
367
368 fn filter_loop_parents(
369 cfg: &CfgInfo,
370 parent_loops: &Vec<(src::BlockId, usize)>,
371 block_id: src::BlockId,
372 ) -> Option<FilteredLoopParents> {
373 let mut eliminate: usize = 0;
374 for (i, (loop_id, _)) in parent_loops.iter().enumerate().rev() {
375 if Self::loop_entry_is_reachable_from_inner(cfg, *loop_id, block_id) {
376 eliminate = i + 1;
377 break;
378 }
379 }
380
381 // Split the vector of parents
382 let (remaining_parents, removed_parents) = parent_loops.split_at(eliminate);
383 if !removed_parents.is_empty() {
384 let (mut remaining_parents, removed_parents) =
385 (remaining_parents.to_vec(), removed_parents.to_vec());
386
387 // Update the distance to the last loop - we just increment the distance
388 // by 1, because from the point of view of the parent loop, we just exited
389 // a block and go to the next sequence of instructions.
390 if !remaining_parents.is_empty() {
391 remaining_parents.last_mut().unwrap().1 += 1;
392 }
393
394 Some(FilteredLoopParents {
395 remaining_parents,
396 removed_parents,
397 })
398 } else {
399 None
400 }
401 }
402
403 /// Auxiliary helper
404 ///
405 /// Check if it is possible to reach the exit of an outer switch from `start_bid` without going
406 /// through the `exit_candidate`. We use the forward graph.
407 fn can_reach_outer_exit(
408 cfg: &CfgInfo,
409 outer_exits: &HashSet<src::BlockId>,
410 start_bid: src::BlockId,
411 exit_candidate: src::BlockId,
412 ) -> bool {
413 /// Graph that is identical to `Cfg` except that a chosen node is considered to have no neighbors.
414 struct GraphWithoutEdgesFrom<'a> {
415 graph: &'a Cfg,
416 special_node: BlockId,
417 }
418 impl GraphBase for GraphWithoutEdgesFrom<'_> {
419 type EdgeId = <Cfg as GraphBase>::EdgeId;
420 type NodeId = <Cfg as GraphBase>::NodeId;
421 }
422 impl IntoNeighbors for &GraphWithoutEdgesFrom<'_> {
423 type Neighbors = impl Iterator<Item = Self::NodeId>;
424 fn neighbors(self, a: Self::NodeId) -> Self::Neighbors {
425 if a == self.special_node {
426 None
427 } else {
428 Some(self.graph.neighbors(a))
429 }
430 .into_iter()
431 .flatten()
432 }
433 }
434 impl Visitable for GraphWithoutEdgesFrom<'_> {
435 type Map = <Cfg as Visitable>::Map;
436 fn visit_map(self: &Self) -> Self::Map {
437 self.graph.visit_map()
438 }
439 fn reset_map(self: &Self, map: &mut Self::Map) {
440 self.graph.reset_map(map);
441 }
442 }
443
444 // Do a DFS over the forward graph where we pretend that the exit candidate has no outgoing
445 // edges. If we reach an outer exit candidate in that graph then the exit candidate does not
446 // dominate the outer exit candidates in the forward graph starting from `start_bid`.
447 let graph = GraphWithoutEdgesFrom {
448 graph: &cfg.fwd_cfg,
449 special_node: exit_candidate,
450 };
451 Dfs::new(&graph, start_bid)
452 .iter(&graph)
453 .any(|bid| outer_exits.contains(&bid))
454 }
455 /// Register a node and its children as exit candidates for a list of
456 /// parent loops.
457 fn register_children_as_loop_exit_candidates(
458 cfg: &CfgInfo,
459 loop_exits: &mut HashMap<src::BlockId, SeqHashMap<src::BlockId, LoopExitCandidateInfo>>,
460 removed_parent_loops: &Vec<(src::BlockId, usize)>,
461 block_id: src::BlockId,
462 ) {
463 let mut base_dist = 0;
464 // For every parent loop, in reverse order (we go from last to first in
465 // order to correctly compute the distances)
466 for (loop_id, loop_dist) in removed_parent_loops.iter().rev() {
467 // Update the distance to the loop entry
468 base_dist += *loop_dist;
469
470 // Retrieve the candidates
471 let candidates = loop_exits.get_mut(loop_id).unwrap();
472
473 // Update them
474 for (bid, dist) in cfg.block_data(block_id).shortest_paths_including_self() {
475 let distance = base_dist + dist;
476 match candidates.get_mut(&bid) {
477 None => {
478 candidates.insert(
479 bid,
480 LoopExitCandidateInfo {
481 occurrences: vec![distance],
482 },
483 );
484 }
485 Some(c) => {
486 c.occurrences.push(distance);
487 }
488 }
489 }
490 }
491 }
492
493 /// Compute the loop exit candidates.
494 ///
495 /// There may be several candidates with the same "optimality" (same number of
496 /// occurrences, etc.), in which case we choose the first one which was registered
497 /// (the order in which we explore the graph is deterministic): this is why we
498 /// store the candidates in a linked hash map.
499 fn compute_loop_exit_candidates(
500 cfg: &CfgInfo,
501 explored: &mut HashSet<src::BlockId>,
502 ordered_loops: &mut Vec<src::BlockId>,
503 loop_exits: &mut HashMap<src::BlockId, SeqHashMap<src::BlockId, LoopExitCandidateInfo>>,
504 // List of parent loops, with the distance to the entry of the loop (the distance
505 // is the distance between the current node and the loop entry for the last parent,
506 // and the distance between the parents for the others).
507 mut parent_loops: Vec<(src::BlockId, usize)>,
508 block_id: src::BlockId,
509 ) {
510 if explored.contains(&block_id) {
511 return;
512 }
513 explored.insert(block_id);
514
515 // Check if we enter a loop - add the corresponding node if necessary
516 if cfg.loop_entries.contains(&block_id) {
517 parent_loops.push((block_id, 1));
518 ordered_loops.push(block_id);
519 } else {
520 // Increase the distance with the parent loop
521 if !parent_loops.is_empty() {
522 parent_loops.last_mut().unwrap().1 += 1;
523 }
524 };
525
526 // Retrieve the children - note that we ignore the back edges
527 for child in cfg.fwd_cfg.neighbors(block_id) {
528 // If the parent loop entry is not reachable from the child without going
529 // through a backward edge which goes directly to the loop entry, consider
530 // this node a potential exit.
531 ensure_sufficient_stack(|| {
532 let new_parent_loops = match Self::filter_loop_parents(cfg, &parent_loops, child) {
533 None => parent_loops.clone(),
534 Some(fparent_loops) => {
535 // We filtered some parent loops: it means this child and its
536 // children are loop exit candidates for all those loops: we must
537 // thus register them.
538 // Note that we register the child *and* its children: the reason
539 // is that we might do something *then* actually jump to the exit.
540 // For instance, the following block of code:
541 // ```
542 // if cond { break; } else { ... }
543 // ```
544 //
545 // Gets translated in MIR to something like this:
546 // ```
547 // bb1: {
548 // if cond -> bb2 else -> bb3; // bb2 is not the real exit
549 // }
550 //
551 // bb2: {
552 // goto bb4; // bb4 is the real exit
553 // }
554 // ```
555 Self::register_children_as_loop_exit_candidates(
556 cfg,
557 loop_exits,
558 &fparent_loops.removed_parents,
559 child,
560 );
561 fparent_loops.remaining_parents
562 }
563 };
564 // Explore, with the filtered parents
565 Self::compute_loop_exit_candidates(
566 cfg,
567 explored,
568 ordered_loops,
569 loop_exits,
570 new_parent_loops,
571 child,
572 );
573 })
574 }
575 }
576
577 /// See [`compute_loop_switch_exits`] for
578 /// explanations about what "exits" are.
579 ///
580 /// The following function computes the loop exits. It acts as follows.
581 ///
582 /// We keep track of a stack of the loops in which we entered.
583 /// It is very easy to check when we enter a loop: loop entries are destinations
584 /// of backward edges, which can be spotted with a simple graph exploration (see
585 /// [`build_cfg_partial_info_edges`].
586 /// The criteria to consider whether we exit a loop is the following:
587 /// - we exit a loop if we go to a block from which we can't reach the loop
588 /// entry at all
589 /// - or if we can reach the loop entry, but must use a backward edge which goes
590 /// to an outer loop
591 ///
592 /// It is better explained on the following example:
593 /// ```text
594 /// 'outer while i < max {
595 /// 'inner while j < max {
596 /// j += 1;
597 /// }
598 /// // (i)
599 /// i += 1;
600 /// }
601 /// ```
602 /// If we enter the inner loop then go to (i) from the inner loop, we consider
603 /// that we exited the outer loop because:
604 /// - we can reach the entry of the inner loop from (i) (by finishing then
605 /// starting again an iteration of the outer loop)
606 /// - but doing this requires taking a backward edge which goes to the outer loop
607 ///
608 /// Whenever we exit a loop, we save the block we went to as an exit candidate
609 /// for this loop. Note that there may by many exit candidates. For instance,
610 /// in the below example:
611 /// ```text
612 /// while ... {
613 /// ...
614 /// if ... {
615 /// // We can't reach the loop entry from here: this is an exit
616 /// // candidate
617 /// return;
618 /// }
619 /// }
620 /// // This is another exit candidate - and this is the one we want to use
621 /// // as the "real" exit...
622 /// ...
623 /// ```
624 ///
625 /// Also note that it may happen that we go several times to the same exit (if
626 /// we use breaks for instance): we record the number of times an exit candidate
627 /// is used.
628 ///
629 /// Once we listed all the exit candidates, we find the "best" one for every
630 /// loop, starting with the outer loops. We start with outer loops because
631 /// inner loops might use breaks to exit to the exit of outer loops: if we
632 /// start with the inner loops, the exit which is "natural" for the outer loop
633 /// might end up being used for one of the inner loops...
634 ///
635 /// The best exit is the following one:
636 /// - it is the one which is used the most times (note that there can be
637 /// several candidates which are referenced strictly more than once: see the
638 /// comment below)
639 /// - if several exits have the same number of occurrences, we choose the one
640 /// for which we goto the "earliest" (earliest meaning that the goto is close to
641 /// the loop entry node in the AST). The reason is that all the loops should
642 /// have an outer if ... then ... else ... which executes the loop body or goes
643 /// to the exit (note that this is not necessarily the first
644 /// if ... then ... else ... we find: loop conditions can be arbitrary
645 /// expressions, containing branchings).
646 ///
647 /// # Several candidates for a loop exit:
648 /// =====================================
649 /// There used to be a sanity check to ensure there are no two different
650 /// candidates with exactly the same number of occurrences and distance from
651 /// the entry of the loop, if the number of occurrences is > 1.
652 ///
653 /// We removed it because it does happen, for instance here (the match
654 /// introduces an `unreachable` node, and it has the same number of
655 /// occurrences and the same distance to the loop entry as the `panic`
656 /// node):
657 ///
658 /// ```text
659 /// pub fn list_nth_mut_loop_pair<'a, T>(
660 /// mut ls: &'a mut List<T>,
661 /// mut i: u32,
662 /// ) -> &'a mut T {
663 /// loop {
664 /// match ls {
665 /// List::Nil => {
666 /// panic!() // <-- best candidate
667 /// }
668 /// List::Cons(x, tl) => {
669 /// if i == 0 {
670 /// return x;
671 /// } else {
672 /// ls = tl;
673 /// i -= 1;
674 /// }
675 /// }
676 /// _ => {
677 /// // Note that Rustc always introduces an unreachable branch after
678 /// // desugaring matches.
679 /// unreachable!(), // <-- best candidate
680 /// }
681 /// }
682 /// }
683 /// }
684 /// ```
685 ///
686 /// When this happens we choose an exit candidate whose edges don't necessarily
687 /// lead to an error (above there are none, so we don't choose any exits). Note
688 /// that this last condition is important to prevent loops from being unnecessarily
689 /// nested:
690 ///
691 /// ```text
692 /// pub fn nested_loops_enum(step_out: usize, step_in: usize) -> usize {
693 /// let mut s = 0;
694 ///
695 /// for _ in 0..128 { // We don't want this loop to be nested with the loops below
696 /// s += 1;
697 /// }
698 ///
699 /// for _ in 0..(step_out) {
700 /// for _ in 0..(step_in) {
701 /// s += 1;
702 /// }
703 /// }
704 ///
705 /// s
706 /// }
707 /// ```
708 fn compute_loop_exits(
709 ctx: &mut TransformCtx,
710 body: &src::ExprBody,
711 cfg: &CfgInfo,
712 ) -> HashMap<src::BlockId, Option<src::BlockId>> {
713 let mut explored = HashSet::new();
714 let mut ordered_loops = Vec::new();
715 let mut loop_exits = HashMap::new();
716
717 // Initialize the loop exits candidates
718 for loop_id in &cfg.loop_entries {
719 loop_exits.insert(*loop_id, SeqHashMap::new());
720 }
721
722 // Compute the candidates
723 Self::compute_loop_exit_candidates(
724 cfg,
725 &mut explored,
726 &mut ordered_loops,
727 &mut loop_exits,
728 Vec::new(),
729 src::BlockId::ZERO,
730 );
731
732 {
733 // Debugging
734 let candidates: Vec<String> = loop_exits
735 .iter()
736 .map(|(loop_id, candidates)| format!("{loop_id} -> {candidates:?}"))
737 .collect();
738 trace!("Loop exit candidates:\n{}", candidates.join("\n"));
739 }
740
741 // Choose one candidate among the potential candidates.
742 let mut exits: HashSet<src::BlockId> = HashSet::new();
743 let mut chosen_loop_exits: HashMap<src::BlockId, Option<src::BlockId>> = HashMap::new();
744 // For every loop
745 for loop_id in ordered_loops {
746 // Check the candidates.
747 // Ignore the candidates which have already been chosen as exits for other
748 // loops (which should be outer loops).
749 // We choose the exit with:
750 // - the most occurrences
751 // - the least total distance (if there are several possibilities)
752 // - doesn't necessarily lead to an error (panic, unreachable)
753
754 // First:
755 // - filter the candidates
756 // - compute the number of occurrences
757 // - compute the sum of distances
758 // TODO: we could simply order by using a lexicographic order
759 let loop_exits = loop_exits
760 .get(&loop_id)
761 .unwrap()
762 .iter()
763 // If candidate already selected for another loop: ignore
764 .filter(|(candidate_id, _)| !exits.contains(candidate_id))
765 .map(|(candidate_id, candidate_info)| {
766 let num_occurrences = candidate_info.occurrences.len();
767 let dist_sum = candidate_info.occurrences.iter().sum();
768 (*candidate_id, num_occurrences, dist_sum)
769 })
770 .collect_vec();
771
772 trace!(
773 "Loop {}: possible exits:\n{}",
774 loop_id,
775 loop_exits
776 .iter()
777 .map(|(bid, occs, dsum)| format!(
778 "{bid} -> {{ occurrences: {occs}, dist_sum: {dsum} }}",
779 ))
780 .collect::<Vec<String>>()
781 .join("\n")
782 );
783
784 // Second: actually select the proper candidate.
785
786 // We find the one with the highest occurrence and the smallest distance
787 // from the entry of the loop (note that we take care of listing the exit
788 // candidates in a deterministic order).
789 let mut best_exit: Option<src::BlockId> = None;
790 let mut best_occurrences = 0;
791 let mut best_dist_sum = std::usize::MAX;
792 for (candidate_id, occurrences, dist_sum) in &loop_exits {
793 if (*occurrences > best_occurrences)
794 || (*occurrences == best_occurrences && *dist_sum < best_dist_sum)
795 {
796 best_exit = Some(*candidate_id);
797 best_occurrences = *occurrences;
798 best_dist_sum = *dist_sum;
799 }
800 }
801
802 let possible_candidates: Vec<_> = loop_exits
803 .iter()
804 .filter_map(|(bid, occs, dsum)| {
805 if *occs == best_occurrences && *dsum == best_dist_sum {
806 Some(*bid)
807 } else {
808 None
809 }
810 })
811 .collect();
812 let num_possible_candidates = loop_exits.len();
813
814 // If there is exactly one one best candidate, it is easy.
815 // Otherwise we need to split further.
816 if num_possible_candidates > 1 {
817 trace!("Best candidates: {:?}", possible_candidates);
818 // TODO: if we use a lexicographic order we can merge this with the code
819 // above.
820 // Remove the candidates which only lead to errors (panic or unreachable).
821 let candidates: Vec<_> = possible_candidates
822 .iter()
823 .filter(|&&bid| !cfg.block_data[bid].only_reach_error)
824 .collect();
825 // If there is exactly one candidate we select it
826 if candidates.len() == 1 {
827 let exit_id = *candidates[0];
828 exits.insert(exit_id);
829 trace!("Loop {loop_id}: selected the best exit candidate {exit_id}");
830 chosen_loop_exits.insert(loop_id, Some(exit_id));
831 } else {
832 // Otherwise we do not select any exit.
833 // We don't want to select any exit if we are in the below situation
834 // (all paths lead to errors). We added a sanity check below to
835 // catch the situations where there are several exits which don't
836 // lead to errors.
837 //
838 // Example:
839 // ========
840 // ```
841 // loop {
842 // match ls {
843 // List::Nil => {
844 // panic!() // <-- best candidate
845 // }
846 // List::Cons(x, tl) => {
847 // if i == 0 {
848 // return x;
849 // } else {
850 // ls = tl;
851 // i -= 1;
852 // }
853 // }
854 // _ => {
855 // unreachable!(); // <-- best candidate (Rustc introduces an `unreachable` case)
856 // }
857 // }
858 // }
859 // ```
860 //
861 // Adding this sanity check so that we can see when there are
862 // several candidates.
863 let span = body.body[loop_id].terminator.span; // Taking *a* span from the block
864 sanity_check!(ctx, span, candidates.is_empty());
865 trace!(
866 "Loop {loop_id}: did not select an exit candidate because they all lead to panics"
867 );
868 chosen_loop_exits.insert(loop_id, None);
869 }
870 } else {
871 // Register the exit, if there is one
872 if let Some(exit_id) = best_exit {
873 exits.insert(exit_id);
874 }
875 chosen_loop_exits.insert(loop_id, best_exit);
876 }
877 }
878
879 // Return the chosen exits
880 trace!("Chosen loop exits: {:?}", chosen_loop_exits);
881 chosen_loop_exits
882 }
883
884 /// See [`compute_loop_switch_exits`] for
885 /// explanations about what "exits" are.
886 ///
887 /// In order to compute the switch exits, we simply recursively compute a
888 /// topologically ordered set of "filtered successors" as follows (note
889 /// that we work in the CFG *without* back edges):
890 /// - for a block which doesn't branch (only one successor), the filtered
891 /// successors is the set of reachable nodes.
892 /// - for a block which branches, we compute the nodes reachable from all
893 /// the children, and find the "best" intersection between those.
894 /// Note that we find the "best" intersection (a pair of branches which
895 /// maximize the intersection of filtered successors) because some branches
896 /// might never join the control-flow of the other branches, if they contain
897 /// a `break`, `return`, `panic`, etc., like here:
898 /// ```text
899 /// if b { x = 3; } { return; }
900 /// y += x;
901 /// ...
902 /// ```
903 /// Note that with nested switches, the branches of the inner switches might
904 /// goto the exits of the outer switches: for this reason, we give precedence
905 /// to the outer switches.
906 fn compute_switch_exits(cfg: &CfgInfo) -> HashMap<src::BlockId, Option<src::BlockId>> {
907 // Compute the successors info map, starting at the root node
908 trace!(
909 "- cfg.cfg:\n{:?}\n- cfg.cfg_no_be:\n{:?}\n- cfg.switch_blocks:\n{:?}",
910 cfg.cfg, cfg.fwd_cfg, cfg.switch_blocks
911 );
912
913 // We need to give precedence to the outer switches: we thus iterate
914 // over the switch blocks in topological order.
915 let mut sorted_switch_blocks: BTreeSet<OrdBlockId> = BTreeSet::new();
916 for bid in cfg.switch_blocks.iter() {
917 sorted_switch_blocks.insert(cfg.make_ord_block_id(*bid));
918 }
919 trace!("sorted_switch_blocks: {:?}", sorted_switch_blocks);
920
921 // Debugging: print all the successors
922 {
923 trace!("Successors info:\n{}\n", {
924 let mut out = vec![];
925 for (bid, info) in cfg.block_data.iter_indexed() {
926 out.push(format!("{} -> {{flow: {:?}}}", bid, &info.flow).to_string());
927 }
928 out.join("\n")
929 });
930 }
931
932 // For every node which is a switch, retrieve the exit.
933 // As the set of intersection of successors is topologically sorted, the
934 // exit should be the first node in the set (if the set is non empty).
935 // Also, we need to explore the nodes in topological order, to give
936 // precedence to the outer switches.
937 let mut exits_set = HashSet::new();
938 let mut exits = HashMap::new();
939 for bid in sorted_switch_blocks {
940 trace!("Finding exit candidate for: {bid:?}");
941 let bid = bid.id;
942 let block_data = &cfg.block_data[bid];
943 // Find the best successor: this is the node with the highest flow, and the
944 // highest reverse topological rank.
945 //
946 // Remark: in order to rank the nodes, we also use the negation of the
947 // rank given by the topological order. The last elements of the set
948 // have the highest flow, that is they are the nodes to which the maximum
949 // number of paths converge. If several nodes have the same flow, we want
950 // to take the highest one in the hierarchy: hence the use of the inverse
951 // of the topological rank.
952 //
953 // Ex.:
954 // ```text
955 // A -- we start here
956 // |
957 // |---------------------------------------
958 // | | | |
959 // B:(0.25,-1) C:(0.25,-2) D:(0.25,-3) E:(0.25,-4)
960 // | | |
961 // |--------------------------
962 // |
963 // F:(0.75,-5)
964 // |
965 // |
966 // G:(0.75,-6)
967 // ```
968 // The "best" node (with the highest (flow, rank) in the graph above is F.
969 let switch_exit: Option<BlockWithRank<(BigRational, isize)>> = block_data
970 .reachable_excluding_self()
971 .map(|id| {
972 let flow = block_data.flow[id].clone();
973 let rank = -isize::try_from(cfg.topo_rank(id)).unwrap();
974 BlockWithRank {
975 rank: (flow, rank),
976 id,
977 }
978 })
979 .max();
980 let exit = if let Some(exit) = switch_exit {
981 // We have an exit candidate: check that it was not already
982 // taken by an external switch
983 trace!("{bid:?} has an exit candidate: {exit:?}");
984 if exits_set.contains(&exit.id) {
985 trace!(
986 "Ignoring the exit candidate because already taken by an external switch"
987 );
988 None
989 } else {
990 // It was not taken by an external switch.
991 //
992 // We must check that we can't reach the exit of an external
993 // switch from one of the branches, without going through the
994 // exit candidate.
995 // We do this by simply checking that we can't reach any exits
996 // (and use the fact that we explore the switch by using a
997 // topological order to not discard valid exit candidates).
998 //
999 // The reason is that it can lead to code like the following:
1000 // ```
1001 // if ... { // if #1
1002 // if ... { // if #2
1003 // ...
1004 // // here, we have a `goto b1`, where b1 is the exit
1005 // // of if #2: we thus stop translating the blocks.
1006 // }
1007 // else {
1008 // ...
1009 // // here, we have a `goto b2`, where b2 is the exit
1010 // // of if #1: we thus stop translating the blocks.
1011 // }
1012 // // We insert code for the block b1 here (which is the exit of
1013 // // the exit of if #2). However, this block should only
1014 // // be executed in the branch "then" of the if #2, not in
1015 // // the branch "else".
1016 // ...
1017 // }
1018 // else {
1019 // ...
1020 // }
1021 // ```
1022
1023 // First: we do a quick check (does the set of all successors
1024 // intersect the set of exits for outer blocks?). If yes, we do
1025 // a more precise analysis: we check if we can reach the exit
1026 // *without going through* the exit candidate.
1027 let can_reach_exit = block_data
1028 .reachable_excluding_self()
1029 .any(|bid| exits_set.contains(&bid));
1030 if !can_reach_exit || !Self::can_reach_outer_exit(cfg, &exits_set, bid, exit.id)
1031 {
1032 trace!("Keeping the exit candidate");
1033 // No intersection: ok
1034 exits_set.insert(exit.id);
1035 Some(exit.id)
1036 } else {
1037 trace!(
1038 "Ignoring the exit candidate because of an intersection with external switches"
1039 );
1040 None
1041 }
1042 }
1043 } else {
1044 trace!("{bid:?} has no successors");
1045 None
1046 };
1047
1048 exits.insert(bid, exit);
1049 }
1050
1051 exits
1052 }
1053
1054 /// Compute the exits for the loops and the switches (switch on integer and
1055 /// if ... then ... else ...). We need to do this because control-flow in MIR
1056 /// is destructured: we have gotos everywhere.
1057 ///
1058 /// Let's consider the following piece of code:
1059 /// ```text
1060 /// if cond1 { ... } else { ... };
1061 /// if cond2 { ... } else { ... };
1062 /// ```
1063 /// Once converted to MIR, the control-flow is destructured, which means we
1064 /// have gotos everywhere. When reconstructing the control-flow, we have
1065 /// to be careful about the point where we should join the two branches of
1066 /// the first if.
1067 /// For instance, if we don't notice they should be joined at some point (i.e,
1068 /// whatever the branch we take, there is a moment when we go to the exact
1069 /// same place, just before the second if), we might generate code like
1070 /// this, with some duplicata:
1071 /// ```text
1072 /// if cond1 { ...; if cond2 { ... } else { ...} }
1073 /// else { ...; if cond2 { ... } else { ...} }
1074 /// ```
1075 ///
1076 /// Such a reconstructed program is valid, but it is definitely non-optimal:
1077 /// it is very different from the original program (making it less clean and
1078 /// clear), more bloated, and might involve duplicating the proof effort.
1079 ///
1080 /// For this reason, we need to find the "exit" of the first loop, which is
1081 /// the point where the two branches join. Note that this can be a bit tricky,
1082 /// because there may be more than two branches (if we do `switch(x) { ... }`),
1083 /// and some of them might not join (if they contain a `break`, `panic`,
1084 /// `return`, etc.).
1085 ///
1086 /// Finally, some similar issues arise for loops. For instance, let's consider
1087 /// the following piece of code:
1088 /// ```text
1089 /// while cond1 {
1090 /// e1;
1091 /// if cond2 {
1092 /// break;
1093 /// }
1094 /// e2;
1095 /// }
1096 /// e3;
1097 /// return;
1098 /// ```
1099 ///
1100 /// Note that in MIR, the loop gets desugared to an if ... then ... else ....
1101 /// From the MIR, We want to generate something like this:
1102 /// ```text
1103 /// loop {
1104 /// if cond1 {
1105 /// e1;
1106 /// if cond2 {
1107 /// break;
1108 /// }
1109 /// e2;
1110 /// continue;
1111 /// }
1112 /// else {
1113 /// break;
1114 /// }
1115 /// };
1116 /// e3;
1117 /// return;
1118 /// ```
1119 ///
1120 /// But if we don't pay attention, we might end up with that, once again with
1121 /// duplications:
1122 /// ```text
1123 /// loop {
1124 /// if cond1 {
1125 /// e1;
1126 /// if cond2 {
1127 /// e3;
1128 /// return;
1129 /// }
1130 /// e2;
1131 /// continue;
1132 /// }
1133 /// else {
1134 /// e3;
1135 /// return;
1136 /// }
1137 /// }
1138 /// ```
1139 /// We thus have to notice that if the loop condition is false, we goto the same
1140 /// block as when following the goto introduced by the break inside the loop, and
1141 /// this block is dubbed the "loop exit".
1142 ///
1143 /// The following function thus computes the "exits" for loops and switches, which
1144 /// are basically the points where control-flow joins.
1145 fn compute(ctx: &mut TransformCtx, body: &src::ExprBody, cfg_info: &CfgInfo) -> Self {
1146 // Compute the loop exits
1147 let loop_exits = Self::compute_loop_exits(ctx, body, cfg_info);
1148 trace!("loop_exits:\n{:?}", loop_exits);
1149
1150 // Compute the switch exits
1151 let switch_exits = Self::compute_switch_exits(cfg_info);
1152 trace!("switch_exits:\n{:?}", switch_exits);
1153
1154 // Compute the exit info
1155 let mut exits_info = ExitsInfo {
1156 exits: body.body.map_ref(|_| Default::default()),
1157 };
1158
1159 // We need to give precedence to the outer switches and loops: we thus iterate
1160 // over the blocks in topological order.
1161 let mut sorted_blocks: BTreeSet<OrdBlockId> = BTreeSet::new();
1162 for bid in cfg_info
1163 .loop_entries
1164 .iter()
1165 .chain(cfg_info.switch_blocks.iter())
1166 {
1167 sorted_blocks.insert(cfg_info.make_ord_block_id(*bid));
1168 }
1169
1170 // Keep track of the exits which were already attributed
1171 let mut all_exits = HashSet::new();
1172
1173 // Put all this together
1174 for bid in sorted_blocks {
1175 let bid = bid.id;
1176 // Check if loop or switch block
1177 let exit_info = &mut exits_info.exits[bid];
1178 if cfg_info.loop_entries.contains(&bid) {
1179 exit_info.is_loop_entry = true;
1180 // This is a loop.
1181 //
1182 // For loops, we always register the exit (if there is one).
1183 // However, the exit may be owned by an outer switch (note
1184 // that we already took care of spreading the exits between
1185 // the inner/outer loops)
1186 let exit_id = loop_exits.get(&bid).unwrap();
1187 exit_info.loop_exit = *exit_id;
1188 // Check if we "own" the exit
1189 let exit_id = exit_id.filter(|exit_id| !all_exits.contains(exit_id));
1190 if let Some(exit_id) = exit_id {
1191 all_exits.insert(exit_id);
1192 }
1193 exit_info.owned_loop_exit = exit_id;
1194 } else {
1195 exit_info.is_switch_entry = true;
1196 // For switches: check that the exit was not already given to a
1197 // loop
1198 let exit_id = switch_exits.get(&bid).unwrap();
1199 // Check if we "own" the exit
1200 let exit_id = exit_id.filter(|exit_id| !all_exits.contains(exit_id));
1201 if let Some(exit_id) = exit_id {
1202 all_exits.insert(exit_id);
1203 }
1204 exit_info.owned_switch_exit = exit_id;
1205 }
1206 }
1207
1208 exits_info
1209 }
1210}
1211
1212enum GotoKind {
1213 Break(usize),
1214 Continue(usize),
1215 ExitBlock,
1216 Goto,
1217}
1218
1219struct ReconstructCtx<'a> {
1220 body: &'a src::ExprBody,
1221 exits_info: ExitsInfo,
1222 explored: HashSet<src::BlockId>,
1223 parent_loops: Vec<src::BlockId>,
1224 switch_exit_blocks: HashSet<src::BlockId>,
1225}
1226
1227impl<'a> ReconstructCtx<'a> {
1228 fn build(ctx: &mut TransformCtx, src_body: &'a src::ExprBody) -> Result<Self, Irreducible> {
1229 // Explore the function body to create the control-flow graph without backward
1230 // edges, and identify the loop entries (which are destinations of backward edges).
1231 let cfg_info = CfgInfo::build(&src_body.body)?;
1232
1233 // Find the exit block for all the loops and switches, if such an exit point exists.
1234 let exits_info = ExitsInfo::compute(ctx, src_body, &cfg_info);
1235
1236 // Translate the body by reconstructing the loops and the
1237 // conditional branchings.
1238 // Note that we shouldn't get `None`.
1239 Ok(ReconstructCtx {
1240 body: src_body,
1241 exits_info: exits_info,
1242 explored: HashSet::new(),
1243 parent_loops: Vec::new(),
1244 switch_exit_blocks: HashSet::new(),
1245 })
1246 }
1247
1248 fn get_goto_kind(&self, next_block_id: src::BlockId) -> GotoKind {
1249 // First explore the parent loops in revert order
1250 for (i, &loop_id) in self.parent_loops.iter().rev().enumerate() {
1251 // If we goto a loop entry node: this is a 'continue'
1252 if next_block_id == loop_id {
1253 return GotoKind::Continue(i);
1254 } else {
1255 // If we goto a loop exit node: this is a 'break'
1256 if let Some(exit_id) = self.exits_info.exits[loop_id].loop_exit {
1257 if next_block_id == exit_id {
1258 return GotoKind::Break(i);
1259 }
1260 }
1261 }
1262 }
1263
1264 // Check if the goto exits the current block
1265 if self.switch_exit_blocks.contains(&next_block_id) {
1266 return GotoKind::ExitBlock;
1267 }
1268
1269 // Default
1270 GotoKind::Goto
1271 }
1272
1273 /// `parent_span`: we need some span data for the new statement.
1274 /// We use the one for the parent terminator.
1275 fn translate_child_block(&mut self, parent_span: Span, child_id: src::BlockId) -> tgt::Block {
1276 // Check if this is a backward call
1277 match self.get_goto_kind(child_id) {
1278 GotoKind::Break(index) => {
1279 let st = tgt::StatementKind::Break(index);
1280 tgt::Statement::new(parent_span, st).into_block()
1281 }
1282 GotoKind::Continue(index) => {
1283 let st = tgt::StatementKind::Continue(index);
1284 tgt::Statement::new(parent_span, st).into_block()
1285 }
1286 // If we are going to an exit block we simply ignore the goto
1287 GotoKind::ExitBlock => {
1288 let st = tgt::StatementKind::Nop;
1289 tgt::Statement::new(parent_span, st).into_block()
1290 }
1291 GotoKind::Goto => {
1292 // "Standard" goto: just recursively translate
1293 ensure_sufficient_stack(|| self.translate_block(child_id))
1294 }
1295 }
1296 }
1297
1298 fn translate_statement(&self, st: &src::Statement) -> Option<tgt::Statement> {
1299 let src_span = st.span;
1300 let st = match st.kind.clone() {
1301 src::StatementKind::Assign(place, rvalue) => tgt::StatementKind::Assign(place, rvalue),
1302 src::StatementKind::SetDiscriminant(place, variant_id) => {
1303 tgt::StatementKind::SetDiscriminant(place, variant_id)
1304 }
1305 src::StatementKind::CopyNonOverlapping(copy) => {
1306 tgt::StatementKind::CopyNonOverlapping(copy)
1307 }
1308 src::StatementKind::StorageLive(var_id) => tgt::StatementKind::StorageLive(var_id),
1309 src::StatementKind::StorageDead(var_id) => tgt::StatementKind::StorageDead(var_id),
1310 src::StatementKind::Deinit(place) => tgt::StatementKind::Deinit(place),
1311 src::StatementKind::Assert(assert) => tgt::StatementKind::Assert(assert),
1312 src::StatementKind::Nop => tgt::StatementKind::Nop,
1313 };
1314 Some(tgt::Statement::new(src_span, st))
1315 }
1316
1317 fn translate_terminator(&mut self, terminator: &src::Terminator) -> tgt::Block {
1318 let src_span = terminator.span;
1319
1320 match &terminator.kind {
1321 src::TerminatorKind::Abort(kind) => {
1322 tgt::Statement::new(src_span, tgt::StatementKind::Abort(kind.clone())).into_block()
1323 }
1324 src::TerminatorKind::Return => {
1325 tgt::Statement::new(src_span, tgt::StatementKind::Return).into_block()
1326 }
1327 src::TerminatorKind::UnwindResume => {
1328 tgt::Statement::new(src_span, tgt::StatementKind::Abort(AbortKind::Panic(None)))
1329 .into_block()
1330 }
1331 src::TerminatorKind::Call {
1332 call,
1333 target,
1334 on_unwind: _,
1335 } => {
1336 // TODO: Have unwinds in the LLBC
1337 let st = tgt::Statement::new(src_span, tgt::StatementKind::Call(call.clone()));
1338 let mut block = self.translate_child_block(terminator.span, *target);
1339 block.statements.insert(0, st);
1340 block
1341 }
1342 src::TerminatorKind::Drop {
1343 kind,
1344 place,
1345 tref,
1346 target,
1347 on_unwind: _,
1348 } => {
1349 // TODO: Have unwinds in the LLBC
1350 let st = tgt::Statement::new(
1351 src_span,
1352 tgt::StatementKind::Drop(place.clone(), tref.clone(), kind.clone()),
1353 );
1354 let mut block = self.translate_child_block(terminator.span, *target);
1355 block.statements.insert(0, st);
1356 block
1357 }
1358 src::TerminatorKind::Goto { target } => {
1359 self.translate_child_block(terminator.span, *target)
1360 }
1361 src::TerminatorKind::Switch { discr, targets } => {
1362 // Translate the target expressions
1363 let switch = match &targets {
1364 src::SwitchTargets::If(then_tgt, else_tgt) => {
1365 let then_block = self.translate_child_block(terminator.span, *then_tgt);
1366 let else_block = self.translate_child_block(terminator.span, *else_tgt);
1367 tgt::Switch::If(discr.clone(), then_block, else_block)
1368 }
1369 src::SwitchTargets::SwitchInt(int_ty, targets, otherwise) => {
1370 // Note that some branches can be grouped together, like
1371 // here:
1372 // ```
1373 // match e {
1374 // E::V1 | E::V2 => ..., // Grouped
1375 // E::V3 => ...
1376 // }
1377 // ```
1378 // We detect this by checking if a block has already been
1379 // translated as one of the branches of the switch.
1380 //
1381 // Rk.: note there may be intermediate gotos depending
1382 // on the MIR we use. Typically, we manage to detect the
1383 // grouped branches with Optimized MIR, but not with Promoted
1384 // MIR. See the comment in "tests/src/matches.rs".
1385
1386 // We link block ids to:
1387 // - vector of matched integer values
1388 // - translated blocks
1389 let mut branches: SeqHashMap<src::BlockId, (Vec<Literal>, tgt::Block)> =
1390 SeqHashMap::new();
1391
1392 // Translate the children expressions
1393 for (v, bid) in targets.iter() {
1394 // Check if the block has already been translated:
1395 // if yes, it means we need to group branches
1396 if branches.contains_key(bid) {
1397 // Already translated: add the matched value to
1398 // the list of values
1399 let branch = branches.get_mut(bid).unwrap();
1400 branch.0.push(v.clone());
1401 } else {
1402 // Not translated: translate it
1403 let block = self.translate_child_block(terminator.span, *bid);
1404 // We use the terminator span information in case then
1405 // then statement is `None`
1406 branches.insert(*bid, (vec![v.clone()], block));
1407 }
1408 }
1409 let targets_blocks: Vec<(Vec<Literal>, tgt::Block)> =
1410 branches.into_iter().map(|(_, x)| x).collect();
1411
1412 let otherwise_block =
1413 self.translate_child_block(terminator.span, *otherwise);
1414
1415 // Translate
1416 tgt::Switch::SwitchInt(
1417 discr.clone(),
1418 *int_ty,
1419 targets_blocks,
1420 otherwise_block,
1421 )
1422 }
1423 };
1424
1425 // Return
1426 let span = tgt::combine_switch_targets_span(&switch);
1427 let span = combine_span(&src_span, &span);
1428 let st = tgt::StatementKind::Switch(switch);
1429 tgt::Statement::new(span, st).into_block()
1430 }
1431 }
1432 }
1433
1434 /// Remark: some values are boxed (here, the returned statement) so that they
1435 /// are allocated on the heap. This reduces stack usage (we had problems with
1436 /// stack overflows in the past). A more efficient solution would be to use loops
1437 /// to make this code constant space, but that would require a serious rewriting.
1438 fn translate_block(&mut self, block_id: src::BlockId) -> tgt::Block {
1439 // If the user activated this check: check that we didn't already translate
1440 // this block, and insert the block id in the set of already translated blocks.
1441 self.explored.insert(block_id);
1442
1443 let block = &self.body.body[block_id];
1444
1445 let exit_info = &self.exits_info.exits[block_id];
1446 // Check if we enter a loop: if so, update parent_loops and the current_exit_block
1447 let is_loop = exit_info.is_loop_entry;
1448 // If we enter a switch or a loop, we need to check if we own the exit
1449 // block, in which case we need to append it to the loop/switch body
1450 // in a sequence
1451 let is_switch = exit_info.is_switch_entry;
1452 let next_block = if is_loop {
1453 exit_info.owned_loop_exit
1454 } else if is_switch {
1455 exit_info.owned_switch_exit
1456 } else {
1457 None
1458 };
1459
1460 let terminator = {
1461 if is_loop {
1462 self.parent_loops.push(block_id);
1463 }
1464 // If we enter a switch, add the exit block to the set of outer exit blocks.
1465 if is_switch && let Some(bid) = next_block {
1466 assert!(!self.switch_exit_blocks.contains(&bid));
1467 self.switch_exit_blocks.insert(bid);
1468 }
1469
1470 // Translate the terminator and the subsequent blocks.
1471 // Note that this terminator is an option: we might ignore it
1472 // (if it is an exit).
1473 let terminator = self.translate_terminator(&block.terminator);
1474
1475 // Reset the state to what it was previously.
1476 if is_loop {
1477 self.parent_loops.pop();
1478 }
1479 if is_switch && let Some(bid) = next_block {
1480 self.switch_exit_blocks.remove(&bid);
1481 }
1482 terminator
1483 };
1484
1485 // Translate the statements inside the block
1486 let statements = block
1487 .statements
1488 .iter()
1489 .filter_map(|st| self.translate_statement(st))
1490 .collect_vec();
1491
1492 // Prepend the statements to the terminator.
1493 let mut block = if let Some(st) = tgt::Block::from_seq(statements) {
1494 st.merge(terminator)
1495 } else {
1496 terminator
1497 };
1498
1499 if is_loop {
1500 // Put the loop body inside a `Loop`.
1501 block = tgt::Statement::new(block.span, tgt::StatementKind::Loop(block)).into_block()
1502 } else if !is_switch {
1503 assert!(next_block.is_none());
1504 }
1505
1506 // Concatenate the exit expression, if needs be
1507 if let Some(exit_block_id) = next_block {
1508 let next_block = ensure_sufficient_stack(|| self.translate_block(exit_block_id));
1509 block = block.merge(next_block);
1510 }
1511 block
1512 }
1513}
1514
1515fn translate_body(ctx: &mut TransformCtx, body: &mut gast::Body) {
1516 use gast::Body::{Structured, Unstructured};
1517 let Unstructured(src_body) = body else {
1518 panic!("Called `ullbc_to_llbc` on an already restructured body")
1519 };
1520 trace!("About to translate to ullbc: {:?}", src_body.span);
1521
1522 // Calculate info about the graph and heuristically determine loop and switch exit blocks.
1523 let mut ctx = match ReconstructCtx::build(ctx, src_body) {
1524 Ok(ctx) => ctx,
1525 Err(Irreducible(bid)) => {
1526 let span = src_body.body[bid].terminator.span;
1527 register_error!(
1528 ctx,
1529 span,
1530 "the control-flow graph of this function is not reducible"
1531 );
1532 panic!("can't reconstruct irreducible control-flow")
1533 }
1534 };
1535 // Translate the blocks using the computed data.
1536 let tgt_body = ctx.translate_block(src::BlockId::ZERO);
1537
1538 let tgt_body = tgt::ExprBody {
1539 span: src_body.span,
1540 locals: src_body.locals.clone(),
1541 bound_body_regions: src_body.bound_body_regions,
1542 body: tgt_body,
1543 comments: src_body.comments.clone(),
1544 };
1545 *body = Structured(tgt_body);
1546}
1547
1548pub struct Transform;
1549impl TransformPass for Transform {
1550 fn transform_ctx(&self, ctx: &mut TransformCtx) {
1551 // Translate the bodies one at a time.
1552 ctx.for_each_body(|ctx, body| {
1553 translate_body(ctx, body);
1554 });
1555
1556 if ctx.options.print_built_llbc {
1557 eprintln!("# LLBC resulting from control-flow reconstruction:\n\n{ctx}\n",);
1558 } else {
1559 trace!("# LLBC resulting from control-flow reconstruction:\n\n{ctx}\n",);
1560 }
1561 }
1562}