charon_lib/transform/control_flow/ullbc_to_llbc.rs
1//! ULLBC to LLBC
2//!
3//! We reconstruct the control-flow in the Unstructured LLBC.
4//!
5//! The reconstruction algorithm is not written to be efficient (its complexity
6//! is probably very bad), but it was not written to be: this is still an early
7//! stage and we want the algorithm to generate the best reconstruction as
8//! possible. We still need to test the algorithm on more interesting examples,
9//! and will consider making it more efficient once it is a bit mature and well
10//! tested.
11//! Also note that we more importantly focus on making the algorithm sound: the
12//! reconstructed program must always be equivalent to the original MIR program,
13//! and the fact that the reconstruction preserves this property must be obvious.
14//!
15//! Finally, we conjecture the execution time shouldn't be too much a problem:
16//! we don't expect the translation mechanism to be applied on super huge functions
17//! (which will be difficult to formally analyze), and the MIR graphs are actually
18//! not that big because statements are grouped into code blocks (a block is made
19//! of a list of statements, followed by a terminator - branchings and jumps can
20//! only be performed by terminators -, meaning that MIR graphs don't have that
21//! many nodes and edges).
22
23use indexmap::IndexMap;
24use itertools::Itertools;
25use petgraph::algo::dijkstra;
26use petgraph::algo::dominators::{Dominators, simple_fast};
27use petgraph::graphmap::DiGraphMap;
28use petgraph::visit::{Dfs, DfsPostOrder, GraphBase, IntoNeighbors, Visitable, Walker};
29use std::collections::{BTreeSet, HashMap, HashSet};
30use std::mem;
31
32use crate::common::ensure_sufficient_stack;
33use crate::errors::sanity_check;
34use crate::formatter::IntoFormatter;
35use crate::llbc_ast as tgt;
36use crate::meta::{Span, combine_span};
37use crate::pretty::FmtWithCtx;
38use crate::transform::TransformCtx;
39use crate::transform::ctx::TransformPass;
40use crate::ullbc_ast::{self as src, BlockId};
41use crate::{ast::*, register_error};
42
43/// Arbitrary-precision numbers
44type BigUint = fraction::DynaInt<u64, fraction::BigUint>;
45type BigRational = fraction::Ratio<BigUint>;
46
47/// Control-Flow Graph
48type Cfg = DiGraphMap<src::BlockId, ()>;
49
50/// Information precomputed about a function's CFG.
51#[derive(Debug)]
52struct CfgInfo {
53 /// The CFG
54 pub cfg: Cfg,
55 /// The CFG where all the backward edges have been removed. Aka "forward CFG".
56 pub fwd_cfg: Cfg,
57 /// We consider the destination of the backward edges to be loop entries and
58 /// store them here.
59 pub loop_entries: HashSet<src::BlockId>,
60 /// The blocks whose terminators are a switch are stored here.
61 pub switch_blocks: HashSet<src::BlockId>,
62 /// Tree of which nodes dominates which other nodes.
63 #[expect(unused)]
64 pub dominator_tree: Dominators<BlockId>,
65 /// Computed data about each block.
66 pub block_data: Vector<BlockId, BlockData>,
67}
68
69#[derive(Debug)]
70struct BlockData {
71 pub id: BlockId,
72 /// Order in a reverse postorder numbering. `None` if the block is unreachable.
73 pub reverse_postorder: Option<u32>,
74 /// Node from where we can only reach error nodes (panic, etc.)
75 pub only_reach_error: bool,
76 /// List of reachable nodes, with the length of shortest path to them. Includes the current
77 /// node.
78 pub shortest_paths: hashbrown::HashMap<BlockId, usize>,
79 /// Let's say we put a quantity of water equal to 1 on the block, and the water flows downards.
80 /// Whenever there is a branching, the quantity of water gets equally divided between the
81 /// branches. When the control flows join, we put the water back together. The set below
82 /// computes the amount of water received by each descendant of the node.
83 ///
84 /// TODO: there must be a known algorithm which computes this, right?...
85 /// This is exactly this problems:
86 /// <https://stackoverflow.com/questions/78221666/algorithm-for-total-flow-through-weighted-directed-acyclic-graph>
87 /// TODO: the way I compute this is not efficient.
88 pub flow: Vector<src::BlockId, BigRational>,
89}
90
91#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
92struct BlockWithRank<T> {
93 /// A "rank" quantity that we use to order the nodes.
94 /// By placing the `rank` field before the `id`, we make sure that
95 /// we use the rank first in the lexicographic order.
96 rank: T,
97 id: src::BlockId,
98}
99type OrdBlockId = BlockWithRank<u32>;
100
101/// Error indicating that the control-flow graph is not reducible. The contained block id is a
102/// block involved in an irreducible subgraph.
103struct Irreducible(BlockId);
104
105impl CfgInfo {
106 /// Build the CFGs (the "regular" CFG and the CFG without backward edges) and
107 /// compute some information like the loop entries and the switch blocks.
108 fn build(body: &src::BodyContents) -> Result<Self, Irreducible> {
109 let start_block = BlockId::ZERO;
110
111 // Build the node graph (we ignore unwind paths for now).
112 let mut cfg = Cfg::new();
113 for (block_id, block) in body.iter_indexed() {
114 cfg.add_node(block_id);
115 for tgt in block.targets_ignoring_unwind() {
116 cfg.add_edge(block_id, tgt, ());
117 }
118 }
119
120 let empty_flow = body.map_ref(|_| BigRational::new(0u64.into(), 1u64.into()));
121 let mut block_data: Vector<BlockId, BlockData> = body.map_ref_indexed(|id, _| BlockData {
122 id,
123 // Default values will stay for unreachable nodes, which are irrelevant.
124 reverse_postorder: None,
125 only_reach_error: false,
126 shortest_paths: Default::default(),
127 flow: empty_flow.clone(),
128 });
129
130 // Compute the dominator tree.
131 let dominator_tree = simple_fast(&cfg, start_block);
132
133 // Compute reverse postorder numbering.
134 for (i, block_id) in DfsPostOrder::new(&cfg, start_block).iter(&cfg).enumerate() {
135 let rev_post_id = body.slot_count() - i;
136 assert!(rev_post_id <= u32::MAX as usize);
137 block_data[block_id].reverse_postorder = Some(rev_post_id as u32);
138 }
139
140 // Compute the forward graph (without backward edges).
141 let mut fwd_cfg = Cfg::new();
142 let mut loop_entries = HashSet::new();
143 let mut switch_blocks = HashSet::new();
144 for src in cfg.nodes() {
145 if block_data[src].reverse_postorder.is_none() {
146 // Unreachable block
147 continue;
148 }
149 fwd_cfg.add_node(src);
150 if body[src].terminator.kind.is_switch() {
151 switch_blocks.insert(src);
152 }
153 for tgt in cfg.neighbors(src) {
154 // Check if the edge is a backward edge.
155 if block_data[src].reverse_postorder >= block_data[tgt].reverse_postorder {
156 // This is a backward edge
157 loop_entries.insert(tgt);
158 // A cfg is reducible iff the target of every back edge dominates the
159 // edge's source.
160 if !dominator_tree.dominators(src).unwrap().contains(&tgt) {
161 return Err(Irreducible(src));
162 }
163 } else {
164 fwd_cfg.add_edge(src, tgt, ());
165 }
166 }
167 }
168
169 for block_id in DfsPostOrder::new(&fwd_cfg, start_block).iter(&fwd_cfg) {
170 let block = &body[block_id];
171 let targets = cfg.neighbors(block_id).collect_vec();
172 let fwd_targets = fwd_cfg.neighbors(block_id).collect_vec();
173
174 // Compute the nodes that can only reach error nodes.
175 // The node can only reach error nodes if:
176 // - it is an error node;
177 // - or it has neighbors and they all lead to errors.
178 // Note that if there is a backward edge, `only_reach_error` cannot contain this
179 // node yet. In other words, this does not consider infinite loops as reaching an
180 // error node.
181 if block.terminator.is_error()
182 || (!targets.is_empty()
183 && targets.iter().all(|&tgt| block_data[tgt].only_reach_error))
184 {
185 block_data[block_id].only_reach_error = true;
186 }
187
188 // Compute the flows between each pair of nodes.
189 let mut flow: Vector<src::BlockId, BigRational> =
190 mem::take(&mut block_data[block_id].flow);
191 if !fwd_targets.is_empty() {
192 // We need to divide the initial flow equally between the children
193 let factor = BigRational::new(1u64.into(), fwd_targets.len().into());
194
195 // For each child, multiply the flows of its own children by the ratio,
196 // and add.
197 for &child_id in &fwd_targets {
198 // First, add the child itself
199 flow[child_id] += factor.clone();
200
201 // Then add its successors
202 let child = &block_data[child_id];
203 for grandchild in child.reachable_excluding_self() {
204 // Flow from `child` to `grandchild`
205 let child_flow = child.flow[grandchild].clone();
206 flow[grandchild] += factor.clone() * child_flow;
207 }
208 }
209 }
210 block_data[block_id].flow = flow;
211
212 // Compute shortest paths to all reachable nodes in the forward graph.
213 block_data[block_id].shortest_paths = dijkstra(&fwd_cfg, block_id, None, |_| 1usize);
214 }
215
216 Ok(CfgInfo {
217 cfg,
218 fwd_cfg,
219 loop_entries,
220 switch_blocks,
221 dominator_tree,
222 block_data,
223 })
224 }
225
226 fn block_data(&self, block_id: BlockId) -> &BlockData {
227 &self.block_data[block_id]
228 }
229 // fn can_reach(&self, src: BlockId, tgt: BlockId) -> bool {
230 // self.block_data[src].shortest_paths.contains_key(&tgt)
231 // }
232 fn topo_rank(&self, block_id: BlockId) -> u32 {
233 self.block_data[block_id].reverse_postorder.unwrap()
234 }
235 /// Create an [OrdBlockId] from a block id and a rank given by a map giving
236 /// a sort (topological in our use cases) over the graph.
237 fn make_ord_block_id(&self, block_id: BlockId) -> OrdBlockId {
238 OrdBlockId {
239 id: block_id,
240 rank: self.topo_rank(block_id),
241 }
242 }
243 fn is_backward_edge(&self, src: BlockId, tgt: BlockId) -> bool {
244 self.block_data[src].reverse_postorder >= self.block_data[tgt].reverse_postorder
245 && self.cfg.contains_edge(src, tgt)
246 }
247}
248
249impl BlockData {
250 fn shortest_paths_including_self(&self) -> impl Iterator<Item = (BlockId, usize)> {
251 self.shortest_paths.iter().map(|(bid, d)| (*bid, *d))
252 }
253 fn shortest_paths_excluding_self(&self) -> impl Iterator<Item = (BlockId, usize)> {
254 self.shortest_paths_including_self()
255 .filter(move |&(bid, _)| bid != self.id)
256 }
257 #[expect(unused)]
258 fn reachable_including_self(&self) -> impl Iterator<Item = BlockId> {
259 self.shortest_paths_including_self().map(|(bid, _)| bid)
260 }
261 fn reachable_excluding_self(&self) -> impl Iterator<Item = BlockId> {
262 self.shortest_paths_excluding_self().map(|(bid, _)| bid)
263 }
264}
265
266#[derive(Debug, Clone)]
267struct LoopExitCandidateInfo {
268 /// The occurrences going to this exit.
269 /// For every occurrence, we register the distance between the loop entry
270 /// and the node from which the edge going to the exit starts.
271 /// If later we have to choose between candidates with the same number
272 /// of occurrences, we choose the one with the smallest distances.
273 ///
274 /// Note that it *may* happen that we have several exit candidates referenced
275 /// more than once for one loop. This comes from the fact that whenever we
276 /// reach a node from which the loop entry is not reachable (without using a
277 /// backward edge leading to an outer loop entry), we register this node
278 /// as well as all its children as exit candidates.
279 /// Consider the following example:
280 /// ```text
281 /// while i < max {
282 /// if cond {
283 /// break;
284 /// }
285 /// s += i;
286 /// i += 1
287 /// }
288 /// // All the below nodes are exit candidates (each of them is referenced twice)
289 /// s += 1;
290 /// return s;
291 /// ```
292 pub occurrences: Vec<usize>,
293}
294
295struct FilteredLoopParents {
296 remaining_parents: Vec<(src::BlockId, usize)>,
297 removed_parents: Vec<(src::BlockId, usize)>,
298}
299
300#[derive(Debug, Default, Clone)]
301struct ExitInfo {
302 is_loop_entry: bool,
303 is_switch_entry: bool,
304 /// The loop exit
305 loop_exit: Option<src::BlockId>,
306 /// Some loop exits actually belong to outer switches. We still need
307 /// to track them in the loop exits, in order to know when we should
308 /// insert a break. However, we need to make sure we don't add the
309 /// corresponding block in a sequence, after having translated the
310 /// loop, like so:
311 /// ```text
312 /// loop {
313 /// loop_body
314 /// };
315 /// exit_blocks // OK if the exit "belongs" to the loop
316 /// ```
317 ///
318 /// In case the exit doesn't belong to the loop:
319 /// ```text
320 /// if b {
321 /// loop {
322 /// loop_body
323 /// } // no exit blocks after the loop
324 /// }
325 /// else {
326 /// ...
327 /// };
328 /// exit_blocks // the exit blocks are here
329 /// ```
330 owned_loop_exit: Option<src::BlockId>,
331 /// The switch exit.
332 /// Note that the switch exits are always owned.
333 owned_switch_exit: Option<src::BlockId>,
334}
335
336/// The exits of a graph
337#[derive(Debug, Clone)]
338struct ExitsInfo {
339 exits: Vector<BlockId, ExitInfo>,
340}
341
342impl ExitsInfo {
343 /// Check if a loop entry is reachable from a node, in a graph where we remove
344 /// the backward edges going directly to the loop entry.
345 ///
346 /// If the loop entry is not reachable, it means that:
347 /// - the loop entry is not reachable at all
348 /// - or it is only reachable through an outer loop
349 ///
350 /// The starting node should be a (transitive) child of the loop entry.
351 /// We use this to find candidates for loop exits.
352 fn loop_entry_is_reachable_from_inner(
353 cfg: &CfgInfo,
354 loop_entry: src::BlockId,
355 block_id: src::BlockId,
356 ) -> bool {
357 // It is reachable in the complete graph. Check if it is reachable by not
358 // going through backward edges which go to outer loops. In practice, we
359 // just need to forbid the use of any backward edges at the exception of
360 // those which go directly to the current loop's entry. This means that we
361 // ignore backward edges to outer loops of course, but also backward edges
362 // to inner loops because we shouldn't need to follow those (there should be
363 // more direct paths).
364 Dfs::new(&cfg.fwd_cfg, block_id)
365 .iter(&cfg.fwd_cfg)
366 .any(|bid| cfg.is_backward_edge(bid, loop_entry))
367 }
368
369 fn filter_loop_parents(
370 cfg: &CfgInfo,
371 parent_loops: &Vec<(src::BlockId, usize)>,
372 block_id: src::BlockId,
373 ) -> Option<FilteredLoopParents> {
374 let mut eliminate: usize = 0;
375 for (i, (loop_id, _)) in parent_loops.iter().enumerate().rev() {
376 if Self::loop_entry_is_reachable_from_inner(cfg, *loop_id, block_id) {
377 eliminate = i + 1;
378 break;
379 }
380 }
381
382 // Split the vector of parents
383 let (remaining_parents, removed_parents) = parent_loops.split_at(eliminate);
384 if !removed_parents.is_empty() {
385 let (mut remaining_parents, removed_parents) =
386 (remaining_parents.to_vec(), removed_parents.to_vec());
387
388 // Update the distance to the last loop - we just increment the distance
389 // by 1, because from the point of view of the parent loop, we just exited
390 // a block and go to the next sequence of instructions.
391 if !remaining_parents.is_empty() {
392 remaining_parents.last_mut().unwrap().1 += 1;
393 }
394
395 Some(FilteredLoopParents {
396 remaining_parents,
397 removed_parents,
398 })
399 } else {
400 None
401 }
402 }
403
404 /// Auxiliary helper
405 ///
406 /// Check if it is possible to reach the exit of an outer switch from `start_bid` without going
407 /// through the `exit_candidate`. We use the forward graph.
408 fn can_reach_outer_exit(
409 cfg: &CfgInfo,
410 outer_exits: &HashSet<src::BlockId>,
411 start_bid: src::BlockId,
412 exit_candidate: src::BlockId,
413 ) -> bool {
414 /// Graph that is identical to `Cfg` except that a chosen node is considered to have no neighbors.
415 struct GraphWithoutEdgesFrom<'a> {
416 graph: &'a Cfg,
417 special_node: BlockId,
418 }
419 impl GraphBase for GraphWithoutEdgesFrom<'_> {
420 type EdgeId = <Cfg as GraphBase>::EdgeId;
421 type NodeId = <Cfg as GraphBase>::NodeId;
422 }
423 impl IntoNeighbors for &GraphWithoutEdgesFrom<'_> {
424 type Neighbors = impl Iterator<Item = Self::NodeId>;
425 fn neighbors(self, a: Self::NodeId) -> Self::Neighbors {
426 if a == self.special_node {
427 None
428 } else {
429 Some(self.graph.neighbors(a))
430 }
431 .into_iter()
432 .flatten()
433 }
434 }
435 impl Visitable for GraphWithoutEdgesFrom<'_> {
436 type Map = <Cfg as Visitable>::Map;
437 fn visit_map(self: &Self) -> Self::Map {
438 self.graph.visit_map()
439 }
440 fn reset_map(self: &Self, map: &mut Self::Map) {
441 self.graph.reset_map(map);
442 }
443 }
444
445 // Do a DFS over the forward graph where we pretend that the exit candidate has no outgoing
446 // edges. If we reach an outer exit candidate in that graph then the exit candidate does not
447 // dominate the outer exit candidates in the forward graph starting from `start_bid`.
448 let graph = GraphWithoutEdgesFrom {
449 graph: &cfg.fwd_cfg,
450 special_node: exit_candidate,
451 };
452 Dfs::new(&graph, start_bid)
453 .iter(&graph)
454 .any(|bid| outer_exits.contains(&bid))
455 }
456 /// Register a node and its children as exit candidates for a list of
457 /// parent loops.
458 fn register_children_as_loop_exit_candidates(
459 cfg: &CfgInfo,
460 loop_exits: &mut HashMap<src::BlockId, IndexMap<src::BlockId, LoopExitCandidateInfo>>,
461 removed_parent_loops: &Vec<(src::BlockId, usize)>,
462 block_id: src::BlockId,
463 ) {
464 let mut base_dist = 0;
465 // For every parent loop, in reverse order (we go from last to first in
466 // order to correctly compute the distances)
467 for (loop_id, loop_dist) in removed_parent_loops.iter().rev() {
468 // Update the distance to the loop entry
469 base_dist += *loop_dist;
470
471 // Retrieve the candidates
472 let candidates = loop_exits.get_mut(loop_id).unwrap();
473
474 // Update them
475 for (bid, dist) in cfg.block_data(block_id).shortest_paths_including_self() {
476 let distance = base_dist + dist;
477 match candidates.get_mut(&bid) {
478 None => {
479 candidates.insert(
480 bid,
481 LoopExitCandidateInfo {
482 occurrences: vec![distance],
483 },
484 );
485 }
486 Some(c) => {
487 c.occurrences.push(distance);
488 }
489 }
490 }
491 }
492 }
493
494 /// Compute the loop exit candidates.
495 ///
496 /// There may be several candidates with the same "optimality" (same number of
497 /// occurrences, etc.), in which case we choose the first one which was registered
498 /// (the order in which we explore the graph is deterministic): this is why we
499 /// store the candidates in a linked hash map.
500 fn compute_loop_exit_candidates(
501 cfg: &CfgInfo,
502 explored: &mut HashSet<src::BlockId>,
503 ordered_loops: &mut Vec<src::BlockId>,
504 loop_exits: &mut HashMap<src::BlockId, IndexMap<src::BlockId, LoopExitCandidateInfo>>,
505 // List of parent loops, with the distance to the entry of the loop (the distance
506 // is the distance between the current node and the loop entry for the last parent,
507 // and the distance between the parents for the others).
508 mut parent_loops: Vec<(src::BlockId, usize)>,
509 block_id: src::BlockId,
510 ) {
511 if explored.contains(&block_id) {
512 return;
513 }
514 explored.insert(block_id);
515
516 // Check if we enter a loop - add the corresponding node if necessary
517 if cfg.loop_entries.contains(&block_id) {
518 parent_loops.push((block_id, 1));
519 ordered_loops.push(block_id);
520 } else {
521 // Increase the distance with the parent loop
522 if !parent_loops.is_empty() {
523 parent_loops.last_mut().unwrap().1 += 1;
524 }
525 };
526
527 // Retrieve the children - note that we ignore the back edges
528 for child in cfg.fwd_cfg.neighbors(block_id) {
529 // If the parent loop entry is not reachable from the child without going
530 // through a backward edge which goes directly to the loop entry, consider
531 // this node a potential exit.
532 ensure_sufficient_stack(|| {
533 let new_parent_loops = match Self::filter_loop_parents(cfg, &parent_loops, child) {
534 None => parent_loops.clone(),
535 Some(fparent_loops) => {
536 // We filtered some parent loops: it means this child and its
537 // children are loop exit candidates for all those loops: we must
538 // thus register them.
539 // Note that we register the child *and* its children: the reason
540 // is that we might do something *then* actually jump to the exit.
541 // For instance, the following block of code:
542 // ```
543 // if cond { break; } else { ... }
544 // ```
545 //
546 // Gets translated in MIR to something like this:
547 // ```
548 // bb1: {
549 // if cond -> bb2 else -> bb3; // bb2 is not the real exit
550 // }
551 //
552 // bb2: {
553 // goto bb4; // bb4 is the real exit
554 // }
555 // ```
556 Self::register_children_as_loop_exit_candidates(
557 cfg,
558 loop_exits,
559 &fparent_loops.removed_parents,
560 child,
561 );
562 fparent_loops.remaining_parents
563 }
564 };
565 // Explore, with the filtered parents
566 Self::compute_loop_exit_candidates(
567 cfg,
568 explored,
569 ordered_loops,
570 loop_exits,
571 new_parent_loops,
572 child,
573 );
574 })
575 }
576 }
577
578 /// See [`compute_loop_switch_exits`] for
579 /// explanations about what "exits" are.
580 ///
581 /// The following function computes the loop exits. It acts as follows.
582 ///
583 /// We keep track of a stack of the loops in which we entered.
584 /// It is very easy to check when we enter a loop: loop entries are destinations
585 /// of backward edges, which can be spotted with a simple graph exploration (see
586 /// [`build_cfg_partial_info_edges`].
587 /// The criteria to consider whether we exit a loop is the following:
588 /// - we exit a loop if we go to a block from which we can't reach the loop
589 /// entry at all
590 /// - or if we can reach the loop entry, but must use a backward edge which goes
591 /// to an outer loop
592 ///
593 /// It is better explained on the following example:
594 /// ```text
595 /// 'outer while i < max {
596 /// 'inner while j < max {
597 /// j += 1;
598 /// }
599 /// // (i)
600 /// i += 1;
601 /// }
602 /// ```
603 /// If we enter the inner loop then go to (i) from the inner loop, we consider
604 /// that we exited the outer loop because:
605 /// - we can reach the entry of the inner loop from (i) (by finishing then
606 /// starting again an iteration of the outer loop)
607 /// - but doing this requires taking a backward edge which goes to the outer loop
608 ///
609 /// Whenever we exit a loop, we save the block we went to as an exit candidate
610 /// for this loop. Note that there may by many exit candidates. For instance,
611 /// in the below example:
612 /// ```text
613 /// while ... {
614 /// ...
615 /// if ... {
616 /// // We can't reach the loop entry from here: this is an exit
617 /// // candidate
618 /// return;
619 /// }
620 /// }
621 /// // This is another exit candidate - and this is the one we want to use
622 /// // as the "real" exit...
623 /// ...
624 /// ```
625 ///
626 /// Also note that it may happen that we go several times to the same exit (if
627 /// we use breaks for instance): we record the number of times an exit candidate
628 /// is used.
629 ///
630 /// Once we listed all the exit candidates, we find the "best" one for every
631 /// loop, starting with the outer loops. We start with outer loops because
632 /// inner loops might use breaks to exit to the exit of outer loops: if we
633 /// start with the inner loops, the exit which is "natural" for the outer loop
634 /// might end up being used for one of the inner loops...
635 ///
636 /// The best exit is the following one:
637 /// - it is the one which is used the most times (note that there can be
638 /// several candidates which are referenced strictly more than once: see the
639 /// comment below)
640 /// - if several exits have the same number of occurrences, we choose the one
641 /// for which we goto the "earliest" (earliest meaning that the goto is close to
642 /// the loop entry node in the AST). The reason is that all the loops should
643 /// have an outer if ... then ... else ... which executes the loop body or goes
644 /// to the exit (note that this is not necessarily the first
645 /// if ... then ... else ... we find: loop conditions can be arbitrary
646 /// expressions, containing branchings).
647 ///
648 /// # Several candidates for a loop exit:
649 /// =====================================
650 /// There used to be a sanity check to ensure there are no two different
651 /// candidates with exactly the same number of occurrences and distance from
652 /// the entry of the loop, if the number of occurrences is > 1.
653 ///
654 /// We removed it because it does happen, for instance here (the match
655 /// introduces an `unreachable` node, and it has the same number of
656 /// occurrences and the same distance to the loop entry as the `panic`
657 /// node):
658 ///
659 /// ```text
660 /// pub fn list_nth_mut_loop_pair<'a, T>(
661 /// mut ls: &'a mut List<T>,
662 /// mut i: u32,
663 /// ) -> &'a mut T {
664 /// loop {
665 /// match ls {
666 /// List::Nil => {
667 /// panic!() // <-- best candidate
668 /// }
669 /// List::Cons(x, tl) => {
670 /// if i == 0 {
671 /// return x;
672 /// } else {
673 /// ls = tl;
674 /// i -= 1;
675 /// }
676 /// }
677 /// _ => {
678 /// // Note that Rustc always introduces an unreachable branch after
679 /// // desugaring matches.
680 /// unreachable!(), // <-- best candidate
681 /// }
682 /// }
683 /// }
684 /// }
685 /// ```
686 ///
687 /// When this happens we choose an exit candidate whose edges don't necessarily
688 /// lead to an error (above there are none, so we don't choose any exits). Note
689 /// that this last condition is important to prevent loops from being unnecessarily
690 /// nested:
691 ///
692 /// ```text
693 /// pub fn nested_loops_enum(step_out: usize, step_in: usize) -> usize {
694 /// let mut s = 0;
695 ///
696 /// for _ in 0..128 { // We don't want this loop to be nested with the loops below
697 /// s += 1;
698 /// }
699 ///
700 /// for _ in 0..(step_out) {
701 /// for _ in 0..(step_in) {
702 /// s += 1;
703 /// }
704 /// }
705 ///
706 /// s
707 /// }
708 /// ```
709 fn compute_loop_exits(
710 ctx: &mut TransformCtx,
711 body: &src::ExprBody,
712 cfg: &CfgInfo,
713 ) -> HashMap<src::BlockId, Option<src::BlockId>> {
714 let mut explored = HashSet::new();
715 let mut ordered_loops = Vec::new();
716 let mut loop_exits = HashMap::new();
717
718 // Initialize the loop exits candidates
719 for loop_id in &cfg.loop_entries {
720 loop_exits.insert(*loop_id, IndexMap::new());
721 }
722
723 // Compute the candidates
724 Self::compute_loop_exit_candidates(
725 cfg,
726 &mut explored,
727 &mut ordered_loops,
728 &mut loop_exits,
729 Vec::new(),
730 src::BlockId::ZERO,
731 );
732
733 {
734 // Debugging
735 let candidates: Vec<String> = loop_exits
736 .iter()
737 .map(|(loop_id, candidates)| format!("{loop_id} -> {candidates:?}"))
738 .collect();
739 trace!("Loop exit candidates:\n{}", candidates.join("\n"));
740 }
741
742 // Choose one candidate among the potential candidates.
743 let mut exits: HashSet<src::BlockId> = HashSet::new();
744 let mut chosen_loop_exits: HashMap<src::BlockId, Option<src::BlockId>> = HashMap::new();
745 // For every loop
746 for loop_id in ordered_loops {
747 // Check the candidates.
748 // Ignore the candidates which have already been chosen as exits for other
749 // loops (which should be outer loops).
750 // We choose the exit with:
751 // - the most occurrences
752 // - the least total distance (if there are several possibilities)
753 // - doesn't necessarily lead to an error (panic, unreachable)
754
755 // First:
756 // - filter the candidates
757 // - compute the number of occurrences
758 // - compute the sum of distances
759 // TODO: we could simply order by using a lexicographic order
760 let loop_exits = loop_exits
761 .get(&loop_id)
762 .unwrap()
763 .iter()
764 // If candidate already selected for another loop: ignore
765 .filter(|(candidate_id, _)| !exits.contains(candidate_id))
766 .map(|(candidate_id, candidate_info)| {
767 let num_occurrences = candidate_info.occurrences.len();
768 let dist_sum = candidate_info.occurrences.iter().sum();
769 (*candidate_id, num_occurrences, dist_sum)
770 })
771 .collect_vec();
772
773 trace!(
774 "Loop {}: possible exits:\n{}",
775 loop_id,
776 loop_exits
777 .iter()
778 .map(|(bid, occs, dsum)| format!(
779 "{bid} -> {{ occurrences: {occs}, dist_sum: {dsum} }}",
780 ))
781 .collect::<Vec<String>>()
782 .join("\n")
783 );
784
785 // Second: actually select the proper candidate.
786
787 // We find the one with the highest occurrence and the smallest distance
788 // from the entry of the loop (note that we take care of listing the exit
789 // candidates in a deterministic order).
790 let mut best_exit: Option<src::BlockId> = None;
791 let mut best_occurrences = 0;
792 let mut best_dist_sum = std::usize::MAX;
793 for (candidate_id, occurrences, dist_sum) in &loop_exits {
794 if (*occurrences > best_occurrences)
795 || (*occurrences == best_occurrences && *dist_sum < best_dist_sum)
796 {
797 best_exit = Some(*candidate_id);
798 best_occurrences = *occurrences;
799 best_dist_sum = *dist_sum;
800 }
801 }
802
803 let possible_candidates: Vec<_> = loop_exits
804 .iter()
805 .filter_map(|(bid, occs, dsum)| {
806 if *occs == best_occurrences && *dsum == best_dist_sum {
807 Some(*bid)
808 } else {
809 None
810 }
811 })
812 .collect();
813 let num_possible_candidates = loop_exits.len();
814
815 // If there is exactly one one best candidate, it is easy.
816 // Otherwise we need to split further.
817 if num_possible_candidates > 1 {
818 trace!("Best candidates: {:?}", possible_candidates);
819 // TODO: if we use a lexicographic order we can merge this with the code
820 // above.
821 // Remove the candidates which only lead to errors (panic or unreachable).
822 let candidates: Vec<_> = possible_candidates
823 .iter()
824 .filter(|&&bid| !cfg.block_data[bid].only_reach_error)
825 .collect();
826 // If there is exactly one candidate we select it
827 if candidates.len() == 1 {
828 let exit_id = *candidates[0];
829 exits.insert(exit_id);
830 trace!("Loop {loop_id}: selected the best exit candidate {exit_id}");
831 chosen_loop_exits.insert(loop_id, Some(exit_id));
832 } else {
833 // Otherwise we do not select any exit.
834 // We don't want to select any exit if we are in the below situation
835 // (all paths lead to errors). We added a sanity check below to
836 // catch the situations where there are several exits which don't
837 // lead to errors.
838 //
839 // Example:
840 // ========
841 // ```
842 // loop {
843 // match ls {
844 // List::Nil => {
845 // panic!() // <-- best candidate
846 // }
847 // List::Cons(x, tl) => {
848 // if i == 0 {
849 // return x;
850 // } else {
851 // ls = tl;
852 // i -= 1;
853 // }
854 // }
855 // _ => {
856 // unreachable!(); // <-- best candidate (Rustc introduces an `unreachable` case)
857 // }
858 // }
859 // }
860 // ```
861 //
862 // Adding this sanity check so that we can see when there are
863 // several candidates.
864 let span = body.body[loop_id].terminator.span; // Taking *a* span from the block
865 sanity_check!(ctx, span, candidates.is_empty());
866 trace!(
867 "Loop {loop_id}: did not select an exit candidate because they all lead to panics"
868 );
869 chosen_loop_exits.insert(loop_id, None);
870 }
871 } else {
872 // Register the exit, if there is one
873 if let Some(exit_id) = best_exit {
874 exits.insert(exit_id);
875 }
876 chosen_loop_exits.insert(loop_id, best_exit);
877 }
878 }
879
880 // Return the chosen exits
881 trace!("Chosen loop exits: {:?}", chosen_loop_exits);
882 chosen_loop_exits
883 }
884
885 /// See [`compute_loop_switch_exits`] for
886 /// explanations about what "exits" are.
887 ///
888 /// In order to compute the switch exits, we simply recursively compute a
889 /// topologically ordered set of "filtered successors" as follows (note
890 /// that we work in the CFG *without* back edges):
891 /// - for a block which doesn't branch (only one successor), the filtered
892 /// successors is the set of reachable nodes.
893 /// - for a block which branches, we compute the nodes reachable from all
894 /// the children, and find the "best" intersection between those.
895 /// Note that we find the "best" intersection (a pair of branches which
896 /// maximize the intersection of filtered successors) because some branches
897 /// might never join the control-flow of the other branches, if they contain
898 /// a `break`, `return`, `panic`, etc., like here:
899 /// ```text
900 /// if b { x = 3; } { return; }
901 /// y += x;
902 /// ...
903 /// ```
904 /// Note that with nested switches, the branches of the inner switches might
905 /// goto the exits of the outer switches: for this reason, we give precedence
906 /// to the outer switches.
907 fn compute_switch_exits(cfg: &CfgInfo) -> HashMap<src::BlockId, Option<src::BlockId>> {
908 // Compute the successors info map, starting at the root node
909 trace!(
910 "- cfg.cfg:\n{:?}\n- cfg.cfg_no_be:\n{:?}\n- cfg.switch_blocks:\n{:?}",
911 cfg.cfg, cfg.fwd_cfg, cfg.switch_blocks
912 );
913
914 // We need to give precedence to the outer switches: we thus iterate
915 // over the switch blocks in topological order.
916 let mut sorted_switch_blocks: BTreeSet<OrdBlockId> = BTreeSet::new();
917 for bid in cfg.switch_blocks.iter() {
918 sorted_switch_blocks.insert(cfg.make_ord_block_id(*bid));
919 }
920 trace!("sorted_switch_blocks: {:?}", sorted_switch_blocks);
921
922 // Debugging: print all the successors
923 {
924 trace!("Successors info:\n{}\n", {
925 let mut out = vec![];
926 for (bid, info) in cfg.block_data.iter_indexed() {
927 out.push(format!("{} -> {{flow: {:?}}}", bid, &info.flow).to_string());
928 }
929 out.join("\n")
930 });
931 }
932
933 // For every node which is a switch, retrieve the exit.
934 // As the set of intersection of successors is topologically sorted, the
935 // exit should be the first node in the set (if the set is non empty).
936 // Also, we need to explore the nodes in topological order, to give
937 // precedence to the outer switches.
938 let mut exits_set = HashSet::new();
939 let mut exits = HashMap::new();
940 for bid in sorted_switch_blocks {
941 trace!("Finding exit candidate for: {bid:?}");
942 let bid = bid.id;
943 let block_data = &cfg.block_data[bid];
944 // Find the best successor: this is the node with the highest flow, and the
945 // highest reverse topological rank.
946 //
947 // Remark: in order to rank the nodes, we also use the negation of the
948 // rank given by the topological order. The last elements of the set
949 // have the highest flow, that is they are the nodes to which the maximum
950 // number of paths converge. If several nodes have the same flow, we want
951 // to take the highest one in the hierarchy: hence the use of the inverse
952 // of the topological rank.
953 //
954 // Ex.:
955 // ```text
956 // A -- we start here
957 // |
958 // |---------------------------------------
959 // | | | |
960 // B:(0.25,-1) C:(0.25,-2) D:(0.25,-3) E:(0.25,-4)
961 // | | |
962 // |--------------------------
963 // |
964 // F:(0.75,-5)
965 // |
966 // |
967 // G:(0.75,-6)
968 // ```
969 // The "best" node (with the highest (flow, rank) in the graph above is F.
970 let switch_exit: Option<BlockWithRank<(BigRational, isize)>> = block_data
971 .reachable_excluding_self()
972 .map(|id| {
973 let flow = block_data.flow[id].clone();
974 let rank = -isize::try_from(cfg.topo_rank(id)).unwrap();
975 BlockWithRank {
976 rank: (flow, rank),
977 id,
978 }
979 })
980 .max();
981 let exit = if let Some(exit) = switch_exit {
982 // We have an exit candidate: check that it was not already
983 // taken by an external switch
984 trace!("{bid:?} has an exit candidate: {exit:?}");
985 if exits_set.contains(&exit.id) {
986 trace!(
987 "Ignoring the exit candidate because already taken by an external switch"
988 );
989 None
990 } else {
991 // It was not taken by an external switch.
992 //
993 // We must check that we can't reach the exit of an external
994 // switch from one of the branches, without going through the
995 // exit candidate.
996 // We do this by simply checking that we can't reach any exits
997 // (and use the fact that we explore the switch by using a
998 // topological order to not discard valid exit candidates).
999 //
1000 // The reason is that it can lead to code like the following:
1001 // ```
1002 // if ... { // if #1
1003 // if ... { // if #2
1004 // ...
1005 // // here, we have a `goto b1`, where b1 is the exit
1006 // // of if #2: we thus stop translating the blocks.
1007 // }
1008 // else {
1009 // ...
1010 // // here, we have a `goto b2`, where b2 is the exit
1011 // // of if #1: we thus stop translating the blocks.
1012 // }
1013 // // We insert code for the block b1 here (which is the exit of
1014 // // the exit of if #2). However, this block should only
1015 // // be executed in the branch "then" of the if #2, not in
1016 // // the branch "else".
1017 // ...
1018 // }
1019 // else {
1020 // ...
1021 // }
1022 // ```
1023
1024 // First: we do a quick check (does the set of all successors
1025 // intersect the set of exits for outer blocks?). If yes, we do
1026 // a more precise analysis: we check if we can reach the exit
1027 // *without going through* the exit candidate.
1028 let can_reach_exit = block_data
1029 .reachable_excluding_self()
1030 .any(|bid| exits_set.contains(&bid));
1031 if !can_reach_exit || !Self::can_reach_outer_exit(cfg, &exits_set, bid, exit.id)
1032 {
1033 trace!("Keeping the exit candidate");
1034 // No intersection: ok
1035 exits_set.insert(exit.id);
1036 Some(exit.id)
1037 } else {
1038 trace!(
1039 "Ignoring the exit candidate because of an intersection with external switches"
1040 );
1041 None
1042 }
1043 }
1044 } else {
1045 trace!("{bid:?} has no successors");
1046 None
1047 };
1048
1049 exits.insert(bid, exit);
1050 }
1051
1052 exits
1053 }
1054
1055 /// Compute the exits for the loops and the switches (switch on integer and
1056 /// if ... then ... else ...). We need to do this because control-flow in MIR
1057 /// is destructured: we have gotos everywhere.
1058 ///
1059 /// Let's consider the following piece of code:
1060 /// ```text
1061 /// if cond1 { ... } else { ... };
1062 /// if cond2 { ... } else { ... };
1063 /// ```
1064 /// Once converted to MIR, the control-flow is destructured, which means we
1065 /// have gotos everywhere. When reconstructing the control-flow, we have
1066 /// to be careful about the point where we should join the two branches of
1067 /// the first if.
1068 /// For instance, if we don't notice they should be joined at some point (i.e,
1069 /// whatever the branch we take, there is a moment when we go to the exact
1070 /// same place, just before the second if), we might generate code like
1071 /// this, with some duplicata:
1072 /// ```text
1073 /// if cond1 { ...; if cond2 { ... } else { ...} }
1074 /// else { ...; if cond2 { ... } else { ...} }
1075 /// ```
1076 ///
1077 /// Such a reconstructed program is valid, but it is definitely non-optimal:
1078 /// it is very different from the original program (making it less clean and
1079 /// clear), more bloated, and might involve duplicating the proof effort.
1080 ///
1081 /// For this reason, we need to find the "exit" of the first loop, which is
1082 /// the point where the two branches join. Note that this can be a bit tricky,
1083 /// because there may be more than two branches (if we do `switch(x) { ... }`),
1084 /// and some of them might not join (if they contain a `break`, `panic`,
1085 /// `return`, etc.).
1086 ///
1087 /// Finally, some similar issues arise for loops. For instance, let's consider
1088 /// the following piece of code:
1089 /// ```text
1090 /// while cond1 {
1091 /// e1;
1092 /// if cond2 {
1093 /// break;
1094 /// }
1095 /// e2;
1096 /// }
1097 /// e3;
1098 /// return;
1099 /// ```
1100 ///
1101 /// Note that in MIR, the loop gets desugared to an if ... then ... else ....
1102 /// From the MIR, We want to generate something like this:
1103 /// ```text
1104 /// loop {
1105 /// if cond1 {
1106 /// e1;
1107 /// if cond2 {
1108 /// break;
1109 /// }
1110 /// e2;
1111 /// continue;
1112 /// }
1113 /// else {
1114 /// break;
1115 /// }
1116 /// };
1117 /// e3;
1118 /// return;
1119 /// ```
1120 ///
1121 /// But if we don't pay attention, we might end up with that, once again with
1122 /// duplications:
1123 /// ```text
1124 /// loop {
1125 /// if cond1 {
1126 /// e1;
1127 /// if cond2 {
1128 /// e3;
1129 /// return;
1130 /// }
1131 /// e2;
1132 /// continue;
1133 /// }
1134 /// else {
1135 /// e3;
1136 /// return;
1137 /// }
1138 /// }
1139 /// ```
1140 /// We thus have to notice that if the loop condition is false, we goto the same
1141 /// block as when following the goto introduced by the break inside the loop, and
1142 /// this block is dubbed the "loop exit".
1143 ///
1144 /// The following function thus computes the "exits" for loops and switches, which
1145 /// are basically the points where control-flow joins.
1146 fn compute(ctx: &mut TransformCtx, body: &src::ExprBody, cfg_info: &CfgInfo) -> Self {
1147 // Compute the loop exits
1148 let loop_exits = Self::compute_loop_exits(ctx, body, cfg_info);
1149 trace!("loop_exits:\n{:?}", loop_exits);
1150
1151 // Compute the switch exits
1152 let switch_exits = Self::compute_switch_exits(cfg_info);
1153 trace!("switch_exits:\n{:?}", switch_exits);
1154
1155 // Compute the exit info
1156 let mut exits_info = ExitsInfo {
1157 exits: body.body.map_ref(|_| Default::default()),
1158 };
1159
1160 // We need to give precedence to the outer switches and loops: we thus iterate
1161 // over the blocks in topological order.
1162 let mut sorted_blocks: BTreeSet<OrdBlockId> = BTreeSet::new();
1163 for bid in cfg_info
1164 .loop_entries
1165 .iter()
1166 .chain(cfg_info.switch_blocks.iter())
1167 {
1168 sorted_blocks.insert(cfg_info.make_ord_block_id(*bid));
1169 }
1170
1171 // Keep track of the exits which were already attributed
1172 let mut all_exits = HashSet::new();
1173
1174 // Put all this together
1175 for bid in sorted_blocks {
1176 let bid = bid.id;
1177 // Check if loop or switch block
1178 let exit_info = &mut exits_info.exits[bid];
1179 if cfg_info.loop_entries.contains(&bid) {
1180 exit_info.is_loop_entry = true;
1181 // This is a loop.
1182 //
1183 // For loops, we always register the exit (if there is one).
1184 // However, the exit may be owned by an outer switch (note
1185 // that we already took care of spreading the exits between
1186 // the inner/outer loops)
1187 let exit_id = loop_exits.get(&bid).unwrap();
1188 exit_info.loop_exit = *exit_id;
1189 // Check if we "own" the exit
1190 let exit_id = exit_id.filter(|exit_id| !all_exits.contains(exit_id));
1191 if let Some(exit_id) = exit_id {
1192 all_exits.insert(exit_id);
1193 }
1194 exit_info.owned_loop_exit = exit_id;
1195 } else {
1196 exit_info.is_switch_entry = true;
1197 // For switches: check that the exit was not already given to a
1198 // loop
1199 let exit_id = switch_exits.get(&bid).unwrap();
1200 // Check if we "own" the exit
1201 let exit_id = exit_id.filter(|exit_id| !all_exits.contains(exit_id));
1202 if let Some(exit_id) = exit_id {
1203 all_exits.insert(exit_id);
1204 }
1205 exit_info.owned_switch_exit = exit_id;
1206 }
1207 }
1208
1209 exits_info
1210 }
1211}
1212
1213enum GotoKind {
1214 Break(usize),
1215 Continue(usize),
1216 ExitBlock,
1217 Goto,
1218}
1219
1220struct ReconstructCtx<'a> {
1221 body: &'a src::ExprBody,
1222 exits_info: ExitsInfo,
1223 explored: HashSet<src::BlockId>,
1224 parent_loops: Vec<src::BlockId>,
1225 switch_exit_blocks: HashSet<src::BlockId>,
1226}
1227
1228impl<'a> ReconstructCtx<'a> {
1229 fn build(ctx: &mut TransformCtx, src_body: &'a src::ExprBody) -> Result<Self, Irreducible> {
1230 // Explore the function body to create the control-flow graph without backward
1231 // edges, and identify the loop entries (which are destinations of backward edges).
1232 let cfg_info = CfgInfo::build(&src_body.body)?;
1233
1234 // Find the exit block for all the loops and switches, if such an exit point exists.
1235 let exits_info = ExitsInfo::compute(ctx, src_body, &cfg_info);
1236
1237 // Translate the body by reconstructing the loops and the
1238 // conditional branchings.
1239 // Note that we shouldn't get `None`.
1240 Ok(ReconstructCtx {
1241 body: src_body,
1242 exits_info: exits_info,
1243 explored: HashSet::new(),
1244 parent_loops: Vec::new(),
1245 switch_exit_blocks: HashSet::new(),
1246 })
1247 }
1248
1249 fn get_goto_kind(&self, next_block_id: src::BlockId) -> GotoKind {
1250 // First explore the parent loops in revert order
1251 for (i, &loop_id) in self.parent_loops.iter().rev().enumerate() {
1252 // If we goto a loop entry node: this is a 'continue'
1253 if next_block_id == loop_id {
1254 return GotoKind::Continue(i);
1255 } else {
1256 // If we goto a loop exit node: this is a 'break'
1257 if let Some(exit_id) = self.exits_info.exits[loop_id].loop_exit {
1258 if next_block_id == exit_id {
1259 return GotoKind::Break(i);
1260 }
1261 }
1262 }
1263 }
1264
1265 // Check if the goto exits the current block
1266 if self.switch_exit_blocks.contains(&next_block_id) {
1267 return GotoKind::ExitBlock;
1268 }
1269
1270 // Default
1271 GotoKind::Goto
1272 }
1273
1274 /// `parent_span`: we need some span data for the new statement.
1275 /// We use the one for the parent terminator.
1276 fn translate_child_block(&mut self, parent_span: Span, child_id: src::BlockId) -> tgt::Block {
1277 // Check if this is a backward call
1278 match self.get_goto_kind(child_id) {
1279 GotoKind::Break(index) => {
1280 let st = tgt::StatementKind::Break(index);
1281 tgt::Statement::new(parent_span, st).into_block()
1282 }
1283 GotoKind::Continue(index) => {
1284 let st = tgt::StatementKind::Continue(index);
1285 tgt::Statement::new(parent_span, st).into_block()
1286 }
1287 // If we are going to an exit block we simply ignore the goto
1288 GotoKind::ExitBlock => {
1289 let st = tgt::StatementKind::Nop;
1290 tgt::Statement::new(parent_span, st).into_block()
1291 }
1292 GotoKind::Goto => {
1293 // "Standard" goto: just recursively translate
1294 ensure_sufficient_stack(|| self.translate_block(child_id))
1295 }
1296 }
1297 }
1298
1299 fn translate_statement(&self, st: &src::Statement) -> Option<tgt::Statement> {
1300 let src_span = st.span;
1301 let st = match st.kind.clone() {
1302 src::StatementKind::Assign(place, rvalue) => tgt::StatementKind::Assign(place, rvalue),
1303 src::StatementKind::SetDiscriminant(place, variant_id) => {
1304 tgt::StatementKind::SetDiscriminant(place, variant_id)
1305 }
1306 src::StatementKind::CopyNonOverlapping(copy) => {
1307 tgt::StatementKind::CopyNonOverlapping(copy)
1308 }
1309 src::StatementKind::StorageLive(var_id) => tgt::StatementKind::StorageLive(var_id),
1310 src::StatementKind::StorageDead(var_id) => tgt::StatementKind::StorageDead(var_id),
1311 src::StatementKind::Deinit(place) => tgt::StatementKind::Deinit(place),
1312 src::StatementKind::Assert(assert) => tgt::StatementKind::Assert(assert),
1313 src::StatementKind::Nop => tgt::StatementKind::Nop,
1314 src::StatementKind::Error(s) => tgt::StatementKind::Error(s),
1315 };
1316 Some(tgt::Statement::new(src_span, st))
1317 }
1318
1319 fn translate_terminator(&mut self, terminator: &src::Terminator) -> tgt::Block {
1320 let src_span = terminator.span;
1321
1322 match &terminator.kind {
1323 src::TerminatorKind::Abort(kind) => {
1324 tgt::Statement::new(src_span, tgt::StatementKind::Abort(kind.clone())).into_block()
1325 }
1326 src::TerminatorKind::Return => {
1327 tgt::Statement::new(src_span, tgt::StatementKind::Return).into_block()
1328 }
1329 src::TerminatorKind::UnwindResume => {
1330 tgt::Statement::new(src_span, tgt::StatementKind::Abort(AbortKind::Panic(None)))
1331 .into_block()
1332 }
1333 src::TerminatorKind::Call {
1334 call,
1335 target,
1336 on_unwind: _,
1337 } => {
1338 // TODO: Have unwinds in the LLBC
1339 let st = tgt::Statement::new(src_span, tgt::StatementKind::Call(call.clone()));
1340 let mut block = self.translate_child_block(terminator.span, *target);
1341 block.statements.insert(0, st);
1342 block
1343 }
1344 src::TerminatorKind::Drop {
1345 kind,
1346 place,
1347 tref,
1348 target,
1349 on_unwind: _,
1350 } => {
1351 // TODO: Have unwinds in the LLBC
1352 let st = tgt::Statement::new(
1353 src_span,
1354 tgt::StatementKind::Drop(place.clone(), tref.clone(), kind.clone()),
1355 );
1356 let mut block = self.translate_child_block(terminator.span, *target);
1357 block.statements.insert(0, st);
1358 block
1359 }
1360 src::TerminatorKind::Goto { target } => {
1361 self.translate_child_block(terminator.span, *target)
1362 }
1363 src::TerminatorKind::Switch { discr, targets } => {
1364 // Translate the target expressions
1365 let switch = match &targets {
1366 src::SwitchTargets::If(then_tgt, else_tgt) => {
1367 let then_block = self.translate_child_block(terminator.span, *then_tgt);
1368 let else_block = self.translate_child_block(terminator.span, *else_tgt);
1369 tgt::Switch::If(discr.clone(), then_block, else_block)
1370 }
1371 src::SwitchTargets::SwitchInt(int_ty, targets, otherwise) => {
1372 // Note that some branches can be grouped together, like
1373 // here:
1374 // ```
1375 // match e {
1376 // E::V1 | E::V2 => ..., // Grouped
1377 // E::V3 => ...
1378 // }
1379 // ```
1380 // We detect this by checking if a block has already been
1381 // translated as one of the branches of the switch.
1382 //
1383 // Rk.: note there may be intermediate gotos depending
1384 // on the MIR we use. Typically, we manage to detect the
1385 // grouped branches with Optimized MIR, but not with Promoted
1386 // MIR. See the comment in "tests/src/matches.rs".
1387
1388 // We link block ids to:
1389 // - vector of matched integer values
1390 // - translated blocks
1391 let mut branches: IndexMap<src::BlockId, (Vec<Literal>, tgt::Block)> =
1392 IndexMap::new();
1393
1394 // Translate the children expressions
1395 for (v, bid) in targets.iter() {
1396 // Check if the block has already been translated:
1397 // if yes, it means we need to group branches
1398 if branches.contains_key(bid) {
1399 // Already translated: add the matched value to
1400 // the list of values
1401 let branch = branches.get_mut(bid).unwrap();
1402 branch.0.push(v.clone());
1403 } else {
1404 // Not translated: translate it
1405 let block = self.translate_child_block(terminator.span, *bid);
1406 // We use the terminator span information in case then
1407 // then statement is `None`
1408 branches.insert(*bid, (vec![v.clone()], block));
1409 }
1410 }
1411 let targets_blocks: Vec<(Vec<Literal>, tgt::Block)> =
1412 branches.into_iter().map(|(_, x)| x).collect();
1413
1414 let otherwise_block =
1415 self.translate_child_block(terminator.span, *otherwise);
1416
1417 // Translate
1418 tgt::Switch::SwitchInt(
1419 discr.clone(),
1420 *int_ty,
1421 targets_blocks,
1422 otherwise_block,
1423 )
1424 }
1425 };
1426
1427 // Return
1428 let span = tgt::combine_switch_targets_span(&switch);
1429 let span = combine_span(&src_span, &span);
1430 let st = tgt::StatementKind::Switch(switch);
1431 tgt::Statement::new(span, st).into_block()
1432 }
1433 }
1434 }
1435
1436 /// Remark: some values are boxed (here, the returned statement) so that they
1437 /// are allocated on the heap. This reduces stack usage (we had problems with
1438 /// stack overflows in the past). A more efficient solution would be to use loops
1439 /// to make this code constant space, but that would require a serious rewriting.
1440 fn translate_block(&mut self, block_id: src::BlockId) -> tgt::Block {
1441 // If the user activated this check: check that we didn't already translate
1442 // this block, and insert the block id in the set of already translated blocks.
1443 self.explored.insert(block_id);
1444
1445 let block = &self.body.body[block_id];
1446
1447 let exit_info = &self.exits_info.exits[block_id];
1448 // Check if we enter a loop: if so, update parent_loops and the current_exit_block
1449 let is_loop = exit_info.is_loop_entry;
1450 // If we enter a switch or a loop, we need to check if we own the exit
1451 // block, in which case we need to append it to the loop/switch body
1452 // in a sequence
1453 let is_switch = exit_info.is_switch_entry;
1454 let next_block = if is_loop {
1455 exit_info.owned_loop_exit
1456 } else if is_switch {
1457 exit_info.owned_switch_exit
1458 } else {
1459 None
1460 };
1461
1462 let terminator = {
1463 if is_loop {
1464 self.parent_loops.push(block_id);
1465 }
1466 // If we enter a switch, add the exit block to the set of outer exit blocks.
1467 if is_switch && let Some(bid) = next_block {
1468 assert!(!self.switch_exit_blocks.contains(&bid));
1469 self.switch_exit_blocks.insert(bid);
1470 }
1471
1472 // Translate the terminator and the subsequent blocks.
1473 // Note that this terminator is an option: we might ignore it
1474 // (if it is an exit).
1475 let terminator = self.translate_terminator(&block.terminator);
1476
1477 // Reset the state to what it was previously.
1478 if is_loop {
1479 self.parent_loops.pop();
1480 }
1481 if is_switch && let Some(bid) = next_block {
1482 self.switch_exit_blocks.remove(&bid);
1483 }
1484 terminator
1485 };
1486
1487 // Translate the statements inside the block
1488 let statements = block
1489 .statements
1490 .iter()
1491 .filter_map(|st| self.translate_statement(st))
1492 .collect_vec();
1493
1494 // Prepend the statements to the terminator.
1495 let mut block = if let Some(st) = tgt::Block::from_seq(statements) {
1496 st.merge(terminator)
1497 } else {
1498 terminator
1499 };
1500
1501 if is_loop {
1502 // Put the loop body inside a `Loop`.
1503 block = tgt::Statement::new(block.span, tgt::StatementKind::Loop(block)).into_block()
1504 } else if !is_switch {
1505 assert!(next_block.is_none());
1506 }
1507
1508 // Concatenate the exit expression, if needs be
1509 if let Some(exit_block_id) = next_block {
1510 let next_block = ensure_sufficient_stack(|| self.translate_block(exit_block_id));
1511 block = block.merge(next_block);
1512 }
1513 block
1514 }
1515}
1516
1517fn translate_body(ctx: &mut TransformCtx, body: &mut gast::Body) {
1518 use gast::Body::{Structured, Unstructured};
1519 let Unstructured(src_body) = body else {
1520 panic!("Called `ullbc_to_llbc` on an already restructured body")
1521 };
1522 trace!("About to translate to ullbc: {:?}", src_body.span);
1523
1524 // Calculate info about the graph and heuristically determine loop and switch exit blocks.
1525 let mut ctx = match ReconstructCtx::build(ctx, src_body) {
1526 Ok(ctx) => ctx,
1527 Err(Irreducible(bid)) => {
1528 let span = src_body.body[bid].terminator.span;
1529 register_error!(
1530 ctx,
1531 span,
1532 "the control-flow graph of this function is not reducible"
1533 );
1534 panic!("can't reconstruct irreducible control-flow")
1535 }
1536 };
1537 // Translate the blocks using the computed data.
1538 let tgt_body = ctx.translate_block(src::BlockId::ZERO);
1539
1540 let tgt_body = tgt::ExprBody {
1541 span: src_body.span,
1542 locals: src_body.locals.clone(),
1543 comments: src_body.comments.clone(),
1544 body: tgt_body,
1545 };
1546 *body = Structured(tgt_body);
1547}
1548
1549pub struct Transform;
1550impl TransformPass for Transform {
1551 fn transform_ctx(&self, ctx: &mut TransformCtx) {
1552 // Translate the bodies one at a time.
1553 ctx.for_each_body(|ctx, body| {
1554 translate_body(ctx, body);
1555 });
1556
1557 // Print the functions
1558 let fmt_ctx = &ctx.into_fmt();
1559 for fun in &ctx.translated.fun_decls {
1560 trace!(
1561 "# Signature:\n{}\n\n# Function definition:\n{}\n",
1562 fun.signature.with_ctx(fmt_ctx),
1563 fun.with_ctx(fmt_ctx),
1564 );
1565 }
1566
1567 if ctx.options.print_built_llbc {
1568 eprintln!("# LLBC resulting from control-flow reconstruction:\n\n{ctx}\n",);
1569 }
1570 }
1571}