charon_lib/transform/ullbc_to_llbc.rs
1//! ULLBC to LLBC
2//!
3//! We reconstruct the control-flow in the Unstructured LLBC.
4//!
5//! The reconstruction algorithm is not written to be efficient (its complexity
6//! is probably very bad), but it was not written to be: this is still an early
7//! stage and we want the algorithm to generate the best reconstruction as
8//! possible. We still need to test the algorithm on more interesting examples,
9//! and will consider making it more efficient once it is a bit mature and well
10//! tested.
11//! Also note that we more importantly focus on making the algorithm sound: the
12//! reconstructed program must always be equivalent to the original MIR program,
13//! and the fact that the reconstruction preserves this property must be obvious.
14//!
15//! Finally, we conjecture the execution time shouldn't be too much a problem:
16//! we don't expect the translation mechanism to be applied on super huge functions
17//! (which will be difficult to formally analyze), and the MIR graphs are actually
18//! not that big because statements are grouped into code blocks (a block is made
19//! of a list of statements, followed by a terminator - branchings and jumps can
20//! only be performed by terminators -, meaning that MIR graphs don't have that
21//! many nodes and edges).
22
23use crate::ast::*;
24use crate::common::ensure_sufficient_stack;
25use crate::errors::sanity_check;
26use crate::formatter::{Formatter, IntoFormatter};
27use crate::llbc_ast as tgt;
28use crate::meta::{combine_span, Span};
29use crate::transform::TransformCtx;
30use crate::ullbc_ast::{self as src};
31use indexmap::IndexMap;
32use itertools::Itertools;
33use num_bigint::BigInt;
34use num_rational::BigRational;
35use petgraph::algo::toposort;
36use petgraph::graphmap::DiGraphMap;
37use petgraph::Direction;
38use std::collections::{BTreeSet, HashMap, HashSet, VecDeque};
39
40use super::ctx::TransformPass;
41
42/// Control-Flow Graph
43type Cfg = DiGraphMap<src::BlockId, ()>;
44
45/// Small utility
46struct BlockInfo<'a> {
47 cfg: &'a CfgInfo,
48 body: &'a src::ExprBody,
49 exits_info: &'a ExitInfo,
50 explored: &'a mut HashSet<src::BlockId>,
51}
52
53/// This structure contains various information about a function's CFG.
54#[derive(Debug)]
55struct CfgInfo {
56 /// The CFG
57 pub cfg: Cfg,
58 /// The CFG where all the backward edges have been removed
59 pub cfg_no_be: Cfg,
60 /// We consider the destination of the backward edges to be loop entries and
61 /// store them here.
62 pub loop_entries: HashSet<src::BlockId>,
63 /// The backward edges
64 pub backward_edges: HashSet<(src::BlockId, src::BlockId)>,
65 /// The blocks whose terminators are a switch are stored here.
66 pub switch_blocks: HashSet<src::BlockId>,
67 /// The set of nodes from where we can only reach error nodes (panic, etc.)
68 pub only_reach_error: HashSet<src::BlockId>,
69}
70
71/// Build the CFGs (the "regular" CFG and the CFG without backward edges) and
72/// compute some information like the loop entries and the switch blocks.
73fn build_cfg_info(body: &src::ExprBody) -> CfgInfo {
74 let mut cfg = CfgInfo {
75 cfg: Cfg::new(),
76 cfg_no_be: Cfg::new(),
77 loop_entries: HashSet::new(),
78 backward_edges: HashSet::new(),
79 switch_blocks: HashSet::new(),
80 only_reach_error: HashSet::new(),
81 };
82
83 // Add the nodes
84 for block_id in body.body.iter_indices() {
85 cfg.cfg.add_node(block_id);
86 cfg.cfg_no_be.add_node(block_id);
87 }
88
89 // Add the edges
90 let ancestors = HashSet::new();
91 let mut explored = HashSet::new();
92 build_cfg_partial_info_edges(
93 &mut cfg,
94 &ancestors,
95 &mut explored,
96 body,
97 src::BlockId::ZERO,
98 );
99
100 cfg
101}
102
103fn block_is_switch(body: &src::ExprBody, block_id: src::BlockId) -> bool {
104 let block = body.body.get(block_id).unwrap();
105 block.terminator.content.is_switch()
106}
107
108/// The terminator of the block is a panic, etc.
109fn block_is_error(body: &src::ExprBody, block_id: src::BlockId) -> bool {
110 let block = body.body.get(block_id).unwrap();
111 use src::RawTerminator::*;
112 match &block.terminator.content {
113 Abort(..) => true,
114 Goto { .. } | Switch { .. } | Return | Call { .. } => false,
115 }
116}
117
118fn build_cfg_partial_info_edges(
119 cfg: &mut CfgInfo,
120 ancestors: &HashSet<src::BlockId>,
121 explored: &mut HashSet<src::BlockId>,
122 body: &src::ExprBody,
123 block_id: src::BlockId,
124) {
125 // Check if we already explored the current node
126 if explored.contains(&block_id) {
127 return;
128 }
129 explored.insert(block_id);
130
131 // Insert the current block in the set of ancestors blocks
132 let mut ancestors = ancestors.clone();
133 ancestors.insert(block_id);
134
135 // Check if it is a switch
136 if block_is_switch(body, block_id) {
137 cfg.switch_blocks.insert(block_id);
138 }
139
140 // Retrieve the block targets
141 let targets = body.body.get(block_id).unwrap().targets();
142 let mut has_backward_edge = false;
143
144 // Add edges for all the targets and explore them, if they are not predecessors
145 for tgt in &targets {
146 // Insert the edge in the "regular" CFG
147 cfg.cfg.add_edge(block_id, *tgt, ());
148
149 // We need to check if it is a backward edge before inserting it in the
150 // CFG without backward edges and exploring it
151 if ancestors.contains(tgt) {
152 // This is a backward edge
153 has_backward_edge = true;
154 cfg.loop_entries.insert(*tgt);
155 cfg.backward_edges.insert((block_id, *tgt));
156 } else {
157 // Not a backward edge: insert the edge and explore
158 cfg.cfg_no_be.add_edge(block_id, *tgt, ());
159 ensure_sufficient_stack(|| {
160 build_cfg_partial_info_edges(cfg, &ancestors, explored, body, *tgt);
161 })
162 }
163 }
164
165 // Check if this node can only reach error nodes:
166 // - we check if the current node ends with an error terminator
167 // - or check that all the targets lead to error nodes
168 // Note that if there is a backward edge, we consider that we don't necessarily
169 // go to error.
170 if !has_backward_edge
171 && (block_is_error(body, block_id)
172 || (
173 // The targets are empty if this is an error node *or* a return node
174 !targets.is_empty() && targets.iter().all(|tgt| cfg.only_reach_error.contains(tgt))
175 ))
176 {
177 cfg.only_reach_error.insert(block_id);
178 }
179}
180
181#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
182struct BlockWithRank<T> {
183 /// A "rank" quantity that we use to order the nodes.
184 /// By placing the `rank` field before the `id`, we make sure that
185 /// we use the rank first in the lexicographic order.
186 rank: T,
187 id: src::BlockId,
188}
189
190type OrdBlockId = BlockWithRank<usize>;
191
192/// For the rank we use:
193/// - a "flow" quantity (see [BlocksInfo])
194/// - the *inverse* rank in the topological sort (i.e., `- topo_rank`)
195type FlowBlockId = BlockWithRank<(BigRational, isize)>;
196
197#[derive(Debug, Clone)]
198struct LoopExitCandidateInfo {
199 /// The occurrences going to this exit.
200 /// For every occurrence, we register the distance between the loop entry
201 /// and the node from which the edge going to the exit starts.
202 /// If later we have to choose between candidates with the same number
203 /// of occurrences, we choose the one with the smallest distances.
204 ///
205 /// Note that it *may* happen that we have several exit candidates referenced
206 /// more than once for one loop. This comes from the fact that whenever we
207 /// reach a node from which the loop entry is not reachable (without using a
208 /// backward edge leading to an outer loop entry), we register this node
209 /// as well as all its children as exit candidates.
210 /// Consider the following example:
211 /// ```text
212 /// while i < max {
213 /// if cond {
214 /// break;
215 /// }
216 /// s += i;
217 /// i += 1
218 /// }
219 /// // All the below nodes are exit candidates (each of them is referenced twice)
220 /// s += 1;
221 /// return s;
222 /// ```
223 pub occurrences: Vec<usize>,
224}
225
226/// Check if a loop entry is reachable from a node, in a graph where we remove
227/// the backward edges going directly to the loop entry.
228///
229/// If the loop entry is not reachable, it means that:
230/// - the loop entry is not reachable at all
231/// - or it is only reachable through an outer loop
232///
233/// The starting node should be a (transitive) child of the loop entry.
234/// We use this to find candidates for loop exits.
235fn loop_entry_is_reachable_from_inner(
236 cfg: &CfgInfo,
237 loop_entry: src::BlockId,
238 block_id: src::BlockId,
239) -> bool {
240 // It is reachable in the complete graph. Check if it is reachable by not
241 // going through backward edges which go to outer loops. In practice, we
242 // just need to forbid the use of any backward edges at the exception of
243 // those which go directly to the current loop's entry. This means that we
244 // ignore backward edges to outer loops of course, but also backward edges
245 // to inner loops because we shouldn't need to follow those (there should be
246 // more direct paths).
247
248 // Explore the graph starting at block_id
249 let mut explored: HashSet<src::BlockId> = HashSet::new();
250 let mut stack: VecDeque<src::BlockId> = VecDeque::new();
251 stack.push_back(block_id);
252 while !stack.is_empty() {
253 let bid = stack.pop_front().unwrap();
254 if explored.contains(&bid) {
255 continue;
256 }
257 explored.insert(bid);
258
259 let next_ids = cfg.cfg.neighbors_directed(bid, Direction::Outgoing);
260 for next_id in next_ids {
261 // Check if this is a backward edge
262 if cfg.backward_edges.contains(&(bid, next_id)) {
263 // Backward edge: only allow those going directly to the current
264 // loop's entry
265 if next_id == loop_entry {
266 // The loop entry is reachable
267 return true;
268 } else {
269 // Forbidden edge: ignore
270 continue;
271 }
272 } else {
273 // Nothing special: add the node to the stack for further
274 // exploration
275 stack.push_back(next_id);
276 }
277 }
278 }
279
280 // The loop entry is not reachable
281 false
282}
283
284struct FilteredLoopParents {
285 remaining_parents: Vec<(src::BlockId, usize)>,
286 removed_parents: Vec<(src::BlockId, usize)>,
287}
288
289fn filter_loop_parents(
290 cfg: &CfgInfo,
291 parent_loops: &Vec<(src::BlockId, usize)>,
292 block_id: src::BlockId,
293) -> Option<FilteredLoopParents> {
294 let mut eliminate: usize = 0;
295 for (loop_id, _ldist) in parent_loops.iter().rev() {
296 if !loop_entry_is_reachable_from_inner(cfg, *loop_id, block_id) {
297 eliminate += 1;
298 } else {
299 break;
300 }
301 }
302
303 if eliminate > 0 {
304 // Split the vector of parents
305 let (remaining_parents, removed_parents) =
306 parent_loops.split_at(parent_loops.len() - eliminate);
307 let (mut remaining_parents, removed_parents) =
308 (remaining_parents.to_vec(), removed_parents.to_vec());
309
310 // Update the distance to the last loop - we just increment the distance
311 // by 1, because from the point of view of the parent loop, we just exited
312 // a block and go to the next sequence of instructions.
313 if !remaining_parents.is_empty() {
314 remaining_parents.last_mut().unwrap().1 += 1;
315 }
316
317 Some(FilteredLoopParents {
318 remaining_parents,
319 removed_parents,
320 })
321 } else {
322 None
323 }
324}
325
326/// List the nodes reachable from a starting point.
327/// We list the nodes and the depth (in the AST) at which they were found.
328fn list_reachable(cfg: &Cfg, start: src::BlockId) -> HashMap<src::BlockId, usize> {
329 let mut reachable: HashMap<src::BlockId, usize> = HashMap::new();
330 let mut stack: VecDeque<(src::BlockId, usize)> = VecDeque::new();
331 stack.push_back((start, 0));
332
333 while !stack.is_empty() {
334 let (bid, dist) = stack.pop_front().unwrap();
335
336 // Ignore this node if we already registered it with a better distance
337 match reachable.get(&bid) {
338 None => (),
339 Some(original_dist) => {
340 if *original_dist < dist {
341 continue;
342 }
343 }
344 }
345
346 // Inset the node with its distance
347 reachable.insert(bid, dist);
348
349 // Add the children to the stack
350 for child in cfg.neighbors(bid) {
351 stack.push_back((child, dist + 1));
352 }
353 }
354
355 // Return
356 reachable
357}
358
359/// Register a node and its children as exit candidates for a list of
360/// parent loops.
361fn register_children_as_loop_exit_candidates(
362 cfg: &CfgInfo,
363 loop_exits: &mut HashMap<src::BlockId, IndexMap<src::BlockId, LoopExitCandidateInfo>>,
364 removed_parent_loops: &Vec<(src::BlockId, usize)>,
365 block_id: src::BlockId,
366) {
367 // List the reachable nodes
368 let reachable = list_reachable(&cfg.cfg_no_be, block_id);
369
370 let mut base_dist = 0;
371 // For every parent loop, in reverse order (we go from last to first in
372 // order to correctly compute the distances)
373 for (loop_id, loop_dist) in removed_parent_loops.iter().rev() {
374 // Update the distance to the loop entry
375 base_dist += *loop_dist;
376
377 // Retrieve the candidates
378 let candidates = loop_exits.get_mut(loop_id).unwrap();
379
380 // Update them
381 for (bid, dist) in reachable.iter() {
382 let distance = base_dist + *dist;
383 match candidates.get_mut(bid) {
384 None => {
385 candidates.insert(
386 *bid,
387 LoopExitCandidateInfo {
388 occurrences: vec![distance],
389 },
390 );
391 }
392 Some(c) => {
393 c.occurrences.push(distance);
394 }
395 }
396 }
397 }
398}
399
400/// Compute the loop exit candidates.
401///
402/// There may be several candidates with the same "optimality" (same number of
403/// occurrences, etc.), in which case we choose the first one which was registered
404/// (the order in which we explore the graph is deterministic): this is why we
405/// store the candidates in a linked hash map.
406fn compute_loop_exit_candidates(
407 cfg: &CfgInfo,
408 explored: &mut HashSet<src::BlockId>,
409 ordered_loops: &mut Vec<src::BlockId>,
410 loop_exits: &mut HashMap<src::BlockId, IndexMap<src::BlockId, LoopExitCandidateInfo>>,
411 // List of parent loops, with the distance to the entry of the loop (the distance
412 // is the distance between the current node and the loop entry for the last parent,
413 // and the distance between the parents for the others).
414 mut parent_loops: Vec<(src::BlockId, usize)>,
415 block_id: src::BlockId,
416) {
417 if explored.contains(&block_id) {
418 return;
419 }
420 explored.insert(block_id);
421
422 // Check if we enter a loop - add the corresponding node if necessary
423 if cfg.loop_entries.contains(&block_id) {
424 parent_loops.push((block_id, 1));
425 ordered_loops.push(block_id);
426 } else {
427 // Increase the distance with the parent loop
428 if !parent_loops.is_empty() {
429 parent_loops.last_mut().unwrap().1 += 1;
430 }
431 };
432
433 // Retrieve the children - note that we ignore the back edges
434 let children = cfg.cfg_no_be.neighbors(block_id);
435 for child in children {
436 // If the parent loop entry is not reachable from the child without going
437 // through a backward edge which goes directly to the loop entry, consider
438 // this node a potential exit.
439 ensure_sufficient_stack(|| {
440 match filter_loop_parents(cfg, &parent_loops, child) {
441 None => {
442 compute_loop_exit_candidates(
443 cfg,
444 explored,
445 ordered_loops,
446 loop_exits,
447 parent_loops.clone(),
448 child,
449 );
450 }
451 Some(fparent_loops) => {
452 // We filtered some parent loops: it means this child and its
453 // children are loop exit candidates for all those loops: we must
454 // thus register them.
455 // Note that we register the child *and* its children: the reason
456 // is that we might do something *then* actually jump to the exit.
457 // For instance, the following block of code:
458 // ```
459 // if cond { break; } else { ... }
460 // ```
461 //
462 // Gets translated in MIR to something like this:
463 // ```
464 // bb1: {
465 // if cond -> bb2 else -> bb3; // bb2 is not the real exit
466 // }
467 //
468 // bb2: {
469 // goto bb4; // bb4 is the real exit
470 // }
471 // ```
472 register_children_as_loop_exit_candidates(
473 cfg,
474 loop_exits,
475 &fparent_loops.removed_parents,
476 child,
477 );
478
479 // Explore, with the filtered parents
480 compute_loop_exit_candidates(
481 cfg,
482 explored,
483 ordered_loops,
484 loop_exits,
485 fparent_loops.remaining_parents,
486 child,
487 );
488 }
489 }
490 })
491 }
492}
493
494/// See [`compute_loop_switch_exits`](compute_loop_switch_exits) for
495/// explanations about what "exits" are.
496///
497/// The following function computes the loop exits. It acts as follows.
498///
499/// We keep track of a stack of the loops in which we entered.
500/// It is very easy to check when we enter a loop: loop entries are destinations
501/// of backward edges, which can be spotted with a simple graph exploration (see
502/// [`build_cfg_partial_info`](build_cfg_partial_info).
503/// The criteria to consider whether we exit a loop is the following:
504/// - we exit a loop if we go to a block from which we can't reach the loop
505/// entry at all
506/// - or if we can reach the loop entry, but must use a backward edge which goes
507/// to an outer loop
508///
509/// It is better explained on the following example:
510/// ```text
511/// 'outer while i < max {
512/// 'inner while j < max {
513/// j += 1;
514/// }
515/// // (i)
516/// i += 1;
517/// }
518/// ```
519/// If we enter the inner loop then go to (i) from the inner loop, we consider
520/// that we exited the outer loop because:
521/// - we can reach the entry of the inner loop from (i) (by finishing then
522/// starting again an iteration of the outer loop)
523/// - but doing this requires taking a backward edge which goes to the outer loop
524///
525/// Whenever we exit a loop, we save the block we went to as an exit candidate
526/// for this loop. Note that there may by many exit candidates. For instance,
527/// in the below example:
528/// ```text
529/// while ... {
530/// ...
531/// if ... {
532/// // We can't reach the loop entry from here: this is an exit
533/// // candidate
534/// return;
535/// }
536/// }
537/// // This is another exit candidate - and this is the one we want to use
538/// // as the "real" exit...
539/// ...
540/// ```
541///
542/// Also note that it may happen that we go several times to the same exit (if
543/// we use breaks for instance): we record the number of times an exit candidate
544/// is used.
545///
546/// Once we listed all the exit candidates, we find the "best" one for every
547/// loop, starting with the outer loops. We start with outer loops because
548/// inner loops might use breaks to exit to the exit of outer loops: if we
549/// start with the inner loops, the exit which is "natural" for the outer loop
550/// might end up being used for one of the inner loops...
551///
552/// The best exit is the following one:
553/// - it is the one which is used the most times (note that there can be
554/// several candidates which are referenced strictly more than once: see the
555/// comment below)
556/// - if several exits have the same number of occurrences, we choose the one
557/// for which we goto the "earliest" (earliest meaning that the goto is close to
558/// the loop entry node in the AST). The reason is that all the loops should
559/// have an outer if ... then ... else ... which executes the loop body or goes
560/// to the exit (note that this is not necessarily the first
561/// if ... then ... else ... we find: loop conditions can be arbitrary
562/// expressions, containing branchings).
563///
564/// # Several candidates for a loop exit:
565/// =====================================
566/// There used to be a sanity check to ensure there are no two different
567/// candidates with exactly the same number of occurrences and distance from
568/// the entry of the loop, if the number of occurrences is > 1.
569///
570/// We removed it because it does happen, for instance here (the match
571/// introduces an `unreachable` node, and it has the same number of
572/// occurrences and the same distance to the loop entry as the `panic`
573/// node):
574///
575/// ```text
576/// pub fn list_nth_mut_loop_pair<'a, T>(
577/// mut ls: &'a mut List<T>,
578/// mut i: u32,
579/// ) -> &'a mut T {
580/// loop {
581/// match ls {
582/// List::Nil => {
583/// panic!() // <-- best candidate
584/// }
585/// List::Cons(x, tl) => {
586/// if i == 0 {
587/// return x;
588/// } else {
589/// ls = tl;
590/// i -= 1;
591/// }
592/// }
593/// _ => {
594/// // Note that Rustc always introduces an unreachable branch after
595/// // desugaring matches.
596/// unreachable!(), // <-- best candidate
597/// }
598/// }
599/// }
600/// }
601/// ```
602///
603/// When this happens we choose an exit candidate whose edges don't necessarily
604/// lead to an error (above there are none, so we don't choose any exits). Note
605/// that this last condition is important to prevent loops from being unnecessarily
606/// nested:
607///
608/// ```text
609/// pub fn nested_loops_enum(step_out: usize, step_in: usize) -> usize {
610/// let mut s = 0;
611///
612/// for _ in 0..128 { // We don't want this loop to be nested with the loops below
613/// s += 1;
614/// }
615///
616/// for _ in 0..(step_out) {
617/// for _ in 0..(step_in) {
618/// s += 1;
619/// }
620/// }
621///
622/// s
623/// }
624/// ```
625fn compute_loop_exits(
626 ctx: &mut TransformCtx,
627 body: &src::ExprBody,
628 cfg: &CfgInfo,
629) -> HashMap<src::BlockId, Option<src::BlockId>> {
630 let mut explored = HashSet::new();
631 let mut ordered_loops = Vec::new();
632 let mut loop_exits = HashMap::new();
633
634 // Initialize the loop exits candidates
635 for loop_id in &cfg.loop_entries {
636 loop_exits.insert(*loop_id, IndexMap::new());
637 }
638
639 // Compute the candidates
640 compute_loop_exit_candidates(
641 cfg,
642 &mut explored,
643 &mut ordered_loops,
644 &mut loop_exits,
645 Vec::new(),
646 src::BlockId::ZERO,
647 );
648
649 {
650 // Debugging
651 let candidates: Vec<String> = loop_exits
652 .iter()
653 .map(|(loop_id, candidates)| format!("{loop_id} -> {candidates:?}"))
654 .collect();
655 trace!("Loop exit candidates:\n{}", candidates.join("\n"));
656 }
657
658 // Choose one candidate among the potential candidates.
659 let mut exits: HashSet<src::BlockId> = HashSet::new();
660 let mut chosen_loop_exits: HashMap<src::BlockId, Option<src::BlockId>> = HashMap::new();
661 // For every loop
662 for loop_id in ordered_loops {
663 // Check the candidates.
664 // Ignore the candidates which have already been chosen as exits for other
665 // loops (which should be outer loops).
666 // We choose the exit with:
667 // - the most occurrences
668 // - the least total distance (if there are several possibilities)
669 // - doesn't necessarily lead to an error (panic, unreachable)
670
671 // First:
672 // - filter the candidates
673 // - compute the number of occurrences
674 // - compute the sum of distances
675 // TODO: we could simply order by using a lexicographic order
676 let loop_exits = loop_exits
677 .get(&loop_id)
678 .unwrap()
679 .iter()
680 // If candidate already selected for another loop: ignore
681 .filter(|(candidate_id, _)| !exits.contains(candidate_id))
682 .map(|(candidate_id, candidate_info)| {
683 let num_occurrences = candidate_info.occurrences.len();
684 let dist_sum = candidate_info.occurrences.iter().sum();
685 (*candidate_id, num_occurrences, dist_sum)
686 })
687 .collect_vec();
688
689 trace!(
690 "Loop {}: possible exits:\n{}",
691 loop_id,
692 loop_exits
693 .iter()
694 .map(|(bid, occs, dsum)| format!(
695 "{bid} -> {{ occurrences: {occs}, dist_sum: {dsum} }}",
696 ))
697 .collect::<Vec<String>>()
698 .join("\n")
699 );
700
701 // Second: actually select the proper candidate.
702
703 // We find the one with the highest occurrence and the smallest distance
704 // from the entry of the loop (note that we take care of listing the exit
705 // candidates in a deterministic order).
706 let mut best_exit: Option<src::BlockId> = None;
707 let mut best_occurrences = 0;
708 let mut best_dist_sum = std::usize::MAX;
709 for (candidate_id, occurrences, dist_sum) in &loop_exits {
710 if (*occurrences > best_occurrences)
711 || (*occurrences == best_occurrences && *dist_sum < best_dist_sum)
712 {
713 best_exit = Some(*candidate_id);
714 best_occurrences = *occurrences;
715 best_dist_sum = *dist_sum;
716 }
717 }
718
719 let possible_candidates: Vec<_> = loop_exits
720 .iter()
721 .filter_map(|(bid, occs, dsum)| {
722 if *occs == best_occurrences && *dsum == best_dist_sum {
723 Some(*bid)
724 } else {
725 None
726 }
727 })
728 .collect();
729 let num_possible_candidates = loop_exits.len();
730
731 // If there is exactly one one best candidate, it is easy.
732 // Otherwise we need to split further.
733 if num_possible_candidates > 1 {
734 trace!("Best candidates: {:?}", possible_candidates);
735 // TODO: if we use a lexicographic order we can merge this with the code
736 // above.
737 // Remove the candidates which only lead to errors (panic or unreachable).
738 let candidates: Vec<_> = possible_candidates
739 .iter()
740 .filter(|bid| !cfg.only_reach_error.contains(bid))
741 .collect();
742 // If there is exactly one candidate we select it
743 if candidates.len() == 1 {
744 let exit_id = *candidates[0];
745 exits.insert(exit_id);
746 trace!("Loop {loop_id}: selected the best exit candidate {exit_id}");
747 chosen_loop_exits.insert(loop_id, Some(exit_id));
748 } else {
749 // Otherwise we do not select any exit.
750 // We don't want to select any exit if we are in the below situation
751 // (all paths lead to errors). We added a sanity check below to
752 // catch the situations where there are several exits which don't
753 // lead to errors.
754 //
755 // Example:
756 // ========
757 // ```
758 // loop {
759 // match ls {
760 // List::Nil => {
761 // panic!() // <-- best candidate
762 // }
763 // List::Cons(x, tl) => {
764 // if i == 0 {
765 // return x;
766 // } else {
767 // ls = tl;
768 // i -= 1;
769 // }
770 // }
771 // _ => {
772 // unreachable!(); // <-- best candidate (Rustc introduces an `unreachable` case)
773 // }
774 // }
775 // }
776 // ```
777 //
778 // Adding this sanity check so that we can see when there are
779 // several candidates.
780 let span = body.body[loop_id].terminator.span; // Taking *a* span from the block
781 sanity_check!(ctx, span, candidates.is_empty());
782 trace!("Loop {loop_id}: did not select an exit candidate because they all lead to panics");
783 chosen_loop_exits.insert(loop_id, None);
784 }
785 } else {
786 // Register the exit, if there is one
787 match best_exit {
788 None => {
789 // No exit was found
790 trace!("Loop {loop_id}: could not find an exit candidate");
791 chosen_loop_exits.insert(loop_id, None);
792 }
793 Some(exit_id) => {
794 exits.insert(exit_id);
795 trace!("Loop {loop_id}: selected the unique exit candidate {exit_id}");
796 chosen_loop_exits.insert(loop_id, Some(exit_id));
797 }
798 }
799 }
800 }
801
802 // Return the chosen exits
803 trace!("Chosen loop exits: {:?}", chosen_loop_exits);
804 chosen_loop_exits
805}
806
807/// Information used to compute the switch exits.
808/// We compute this information for every block in the graph.
809/// Note that we make sure to use immutable sets because we rely a lot
810/// on cloning.
811#[derive(Debug, Clone)]
812struct BlocksInfo {
813 id: src::BlockId,
814 /// All the successors of the block
815 succs: BTreeSet<OrdBlockId>,
816 /// Let's say we put a quantity of water equal to 1 on the block, and the
817 /// water flows downards. Whenever there is a branching, the quantity of
818 /// water gets equally divided between the branches. When the control flows
819 /// join, we put the water back together. The set below computes the amount
820 /// of water received by each descendant of the node.
821 ///
822 /// TODO: there must be a known algorithm which computes this, right?...
823 /// This is exactly this problems:
824 /// https://stackoverflow.com/questions/78221666/algorithm-for-total-flow-through-weighted-directed-acyclic-graph
825 /// TODO: the way I compute this is not efficient.
826 ///
827 /// Remark: in order to rank the nodes, we also use the negation of the
828 /// rank given by the topological order. The last elements of the set
829 /// have the highest flow, that is they are the nodes to which the maximum
830 /// number of paths converge. If several nodes have the same flow, we want
831 /// to take the highest one in the hierarchy: hence the use of the inverse
832 /// of the topological rank.
833 ///
834 /// Ex.:
835 /// ```text
836 /// A -- we start here
837 /// |
838 /// |---------------------------------------
839 /// | | | |
840 /// B:(0.25,-1) C:(0.25,-2) D:(0.25,-3) E:(0.25,-4)
841 /// | | |
842 /// |--------------------------
843 /// |
844 /// F:(0.75,-5)
845 /// |
846 /// |
847 /// G:(0.75,-6)
848 /// ```
849 /// The "best" node (with the highest (flow, rank) in the graph above is F.
850 flow: BTreeSet<FlowBlockId>,
851}
852
853/// Create an [OrdBlockId] from a block id and a rank given by a map giving
854/// a sort (topological in our use cases) over the graph.
855fn make_ord_block_id(
856 block_id: src::BlockId,
857 sort_map: &HashMap<src::BlockId, usize>,
858) -> OrdBlockId {
859 let rank = *sort_map.get(&block_id).unwrap();
860 OrdBlockId { id: block_id, rank }
861}
862
863/// Compute [BlocksInfo] for every block in the graph.
864/// This information is then used to compute the switch exits.
865fn compute_switch_exits_explore(
866 cfg: &CfgInfo,
867 tsort_map: &HashMap<src::BlockId, usize>,
868 memoized: &mut HashMap<src::BlockId, BlocksInfo>,
869 block_id: src::BlockId,
870) {
871 // Check if we already computer the info
872 if memoized.get(&block_id).is_some() {
873 return;
874 }
875
876 // Compute the block information for the children
877 let children_ids: Vec<src::BlockId> = cfg.cfg_no_be.neighbors(block_id).collect_vec();
878 children_ids
879 .iter()
880 .for_each(|bid| compute_switch_exits_explore(cfg, tsort_map, memoized, *bid));
881
882 // Retrieve the information
883 let children: Vec<&BlocksInfo> = children_ids
884 .iter()
885 .map(|bid| memoized.get(bid).unwrap())
886 .collect();
887
888 // Compute the successors
889 // Add the children themselves in their sets of successors
890 let all_succs: BTreeSet<OrdBlockId> = children.iter().fold(BTreeSet::new(), |acc, s| {
891 // Add the successors to the accumulator
892 let mut acc: BTreeSet<_> = acc.union(&s.succs).copied().collect();
893 // Add the child itself
894 acc.insert(make_ord_block_id(s.id, tsort_map));
895 acc
896 });
897
898 // Compute the "flows" (if there are children)
899 // TODO: this is computationally expensive...
900 let mut flow: HashMap<src::BlockId, (BigRational, isize)> = HashMap::new();
901
902 if children.len() > 0 {
903 // We need to divide the initial flow equally between the children
904 let factor = BigRational::new(BigInt::from(1), BigInt::from(children.len()));
905
906 // Small helper
907 let mut add_to_flow = |(id, f0): (src::BlockId, (BigRational, isize))| match flow.get(&id) {
908 None => flow.insert(id, f0),
909 Some(f1) => {
910 let f = f0.0 + f1.0.clone();
911 assert!(f0.1 == f1.1);
912 flow.insert(id, (f, f0.1))
913 }
914 };
915
916 // For each child, multiply the flows of its own children by the ratio,
917 // and add.
918 for child in children {
919 // First, add the child itself
920 let rank = isize::try_from(*tsort_map.get(&child.id).unwrap()).unwrap();
921 add_to_flow((child.id, (factor.clone(), -rank)));
922
923 // Then add its successors
924 for child1 in &child.flow {
925 add_to_flow((
926 child1.id,
927 (factor.clone() * child1.rank.0.clone(), child1.rank.1),
928 ));
929 }
930 }
931 }
932
933 // Put everything in an ordered set: the first block id will be the one with
934 // the highest flow, and in case of equality it will be the one with the
935 // smallest block id.
936 let flow: BTreeSet<FlowBlockId> = flow
937 .into_iter()
938 .map(|(id, rank)| BlockWithRank { rank, id })
939 .collect();
940
941 trace!("block: {block_id}, all successors: {all_succs:?}, flow: {flow:?}");
942
943 // Memoize
944 let info = BlocksInfo {
945 id: block_id,
946 succs: all_succs,
947 flow,
948 };
949 memoized.insert(block_id, info.clone());
950}
951
952/// Auxiliary helper
953///
954/// Check if it is possible to reach the exit of an outer switch from [bid]
955/// without going through the [exit_candidate]. We use the graph without
956/// backward edges.
957fn can_reach_outer_exit(
958 cfg: &CfgInfo,
959 outer_exits: &HashSet<src::BlockId>,
960 start_bid: src::BlockId,
961 exit_candidate: src::BlockId,
962) -> bool {
963 // The stack of blocks
964 let mut stack: Vec<src::BlockId> = vec![start_bid];
965 let mut explored: HashSet<src::BlockId> = HashSet::new();
966
967 while let Some(bid) = stack.pop() {
968 // Check if already explored
969 if explored.contains(&bid) {
970 break;
971 }
972 explored.insert(bid);
973
974 // Check if this is the exit candidate
975 if bid == exit_candidate {
976 // Stop exploring
977 break;
978 }
979
980 // Check if this is an outer exit
981 if outer_exits.contains(&bid) {
982 return true;
983 }
984
985 // Add the children to the stack
986 for child in cfg.cfg_no_be.neighbors(bid) {
987 stack.push(child);
988 }
989 }
990
991 false
992}
993
994/// See [`compute_loop_switch_exits`](compute_loop_switch_exits) for
995/// explanations about what "exits" are.
996///
997/// In order to compute the switch exits, we simply recursively compute a
998/// topologically ordered set of "filtered successors" as follows (note
999/// that we work in the CFG *without* back edges):
1000/// - for a block which doesn't branch (only one successor), the filtered
1001/// successors is the set of reachable nodes.
1002/// - for a block which branches, we compute the nodes reachable from all
1003/// the children, and find the "best" intersection between those.
1004/// Note that we find the "best" intersection (a pair of branches which
1005/// maximize the intersection of filtered successors) because some branches
1006/// might never join the control-flow of the other branches, if they contain
1007/// a `break`, `return`, `panic`, etc., like here:
1008/// ```text
1009/// if b { x = 3; } { return; }
1010/// y += x;
1011/// ...
1012/// ```
1013/// Note that with nested switches, the branches of the inner switches might
1014/// goto the exits of the outer switches: for this reason, we give precedence
1015/// to the outer switches.
1016fn compute_switch_exits(
1017 cfg: &CfgInfo,
1018 tsort_map: &HashMap<src::BlockId, usize>,
1019) -> HashMap<src::BlockId, Option<src::BlockId>> {
1020 // Compute the successors info map, starting at the root node
1021 let mut succs_info_map = HashMap::new();
1022 trace!(
1023 "- cfg.cfg:\n{:?}\n- cfg.cfg_no_be:\n{:?}\n- cfg.switch_blocks:\n{:?}",
1024 cfg.cfg,
1025 cfg.cfg_no_be,
1026 cfg.switch_blocks
1027 );
1028 let _ = compute_switch_exits_explore(cfg, tsort_map, &mut succs_info_map, src::BlockId::ZERO);
1029
1030 // We need to give precedence to the outer switches: we thus iterate
1031 // over the switch blocks in topological order.
1032 let mut sorted_switch_blocks: BTreeSet<OrdBlockId> = BTreeSet::new();
1033 for bid in cfg.switch_blocks.iter() {
1034 sorted_switch_blocks.insert(make_ord_block_id(*bid, tsort_map));
1035 }
1036 trace!("sorted_switch_blocks: {:?}", sorted_switch_blocks);
1037
1038 // Debugging: print all the successors
1039 {
1040 trace!("Successors info:\n{}\n", {
1041 let mut out = vec![];
1042 for (bid, info) in &succs_info_map {
1043 out.push(
1044 format!(
1045 "{} -> {{succs: {:?}, flow: {:?}}}",
1046 bid, &info.succs, &info.flow
1047 )
1048 .to_string(),
1049 );
1050 }
1051 out.join("\n")
1052 });
1053 }
1054
1055 // For every node which is a switch, retrieve the exit.
1056 // As the set of intersection of successors is topologically sorted, the
1057 // exit should be the first node in the set (if the set is non empty).
1058 // Also, we need to explore the nodes in topological order, to give
1059 // precedence to the outer switches.
1060 let mut exits_set = HashSet::new();
1061 let mut ord_exits_set = BTreeSet::new();
1062 let mut exits = HashMap::new();
1063 for bid in sorted_switch_blocks {
1064 trace!("Finding exit candidate for: {bid:?}");
1065 let bid = bid.id;
1066 let info = succs_info_map.get(&bid).unwrap();
1067 let succs = &info.flow;
1068 // Find the best successor: this is the last one (with the highest flow,
1069 // and the highest reverse topological rank).
1070 if succs.is_empty() {
1071 trace!("{bid:?} has no successors");
1072 exits.insert(bid, None);
1073 } else {
1074 // We have an exit candidate: check that it was not already
1075 // taken by an external switch
1076 let exit = succs.last().unwrap();
1077 trace!("{bid:?} has an exit candidate: {exit:?}");
1078 if exits_set.contains(&exit.id) {
1079 trace!("Ignoring the exit candidate because already taken by an external switch");
1080 exits.insert(bid, None);
1081 } else {
1082 // It was not taken by an external switch.
1083 //
1084 // We must check that we can't reach the exit of an external
1085 // switch from one of the branches, without going through the
1086 // exit candidate.
1087 // We do this by simply checking that we can't reach any exits
1088 // (and use the fact that we explore the switch by using a
1089 // topological order to not discard valid exit candidates).
1090 //
1091 // The reason is that it can lead to code like the following:
1092 // ```
1093 // if ... { // if #1
1094 // if ... { // if #2
1095 // ...
1096 // // here, we have a `goto b1`, where b1 is the exit
1097 // // of if #2: we thus stop translating the blocks.
1098 // }
1099 // else {
1100 // ...
1101 // // here, we have a `goto b2`, where b2 is the exit
1102 // // of if #1: we thus stop translating the blocks.
1103 // }
1104 // // We insert code for the block b1 here (which is the exit of
1105 // // the exit of if #2). However, this block should only
1106 // // be executed in the branch "then" of the if #2, not in
1107 // // the branch "else".
1108 // ...
1109 // }
1110 // else {
1111 // ...
1112 // }
1113 // ```
1114
1115 // First: we do a quick check (does the set of all successors
1116 // intersect the set of exits for outer blocks?). If yes, we do
1117 // a more precise analysis: we check if we can reach the exit
1118 // *without going through* the exit candidate.
1119 if info.succs.intersection(&ord_exits_set).next().is_none()
1120 || !can_reach_outer_exit(cfg, &exits_set, bid, exit.id)
1121 {
1122 trace!("Keeping the exit candidate");
1123 // No intersection: ok
1124 exits_set.insert(exit.id);
1125 ord_exits_set.insert(make_ord_block_id(exit.id, tsort_map));
1126 exits.insert(bid, Some(exit.id));
1127 } else {
1128 trace!("Ignoring the exit candidate because of an intersection with external switches");
1129 exits.insert(bid, None);
1130 }
1131 }
1132 }
1133 }
1134
1135 exits
1136}
1137
1138/// The exits of a graph
1139#[derive(Debug, Clone)]
1140struct ExitInfo {
1141 /// The loop exits
1142 loop_exits: HashMap<src::BlockId, Option<src::BlockId>>,
1143 /// Some loop exits actually belong to outer switches. We still need
1144 /// to track them in the loop exits, in order to know when we should
1145 /// insert a break. However, we need to make sure we don't add the
1146 /// corresponding block in a sequence, after having translated the
1147 /// loop, like so:
1148 /// ```text
1149 /// loop {
1150 /// loop_body
1151 /// };
1152 /// exit_blocks // OK if the exit "belongs" to the loop
1153 /// ```
1154 ///
1155 /// In case the exit doesn't belong to the loop:
1156 /// ```text
1157 /// if b {
1158 /// loop {
1159 /// loop_body
1160 /// } // no exit blocks after the loop
1161 /// }
1162 /// else {
1163 /// ...
1164 /// };
1165 /// exit_blocks // the exit blocks are here
1166 /// ```
1167 owned_loop_exits: HashMap<src::BlockId, Option<src::BlockId>>,
1168 /// The switch exits.
1169 /// Note that the switch exits are always owned.
1170 owned_switch_exits: HashMap<src::BlockId, Option<src::BlockId>>,
1171}
1172
1173/// Compute the exits for the loops and the switches (switch on integer and
1174/// if ... then ... else ...). We need to do this because control-flow in MIR
1175/// is destructured: we have gotos everywhere.
1176///
1177/// Let's consider the following piece of code:
1178/// ```text
1179/// if cond1 { ... } else { ... };
1180/// if cond2 { ... } else { ... };
1181/// ```
1182/// Once converted to MIR, the control-flow is destructured, which means we
1183/// have gotos everywhere. When reconstructing the control-flow, we have
1184/// to be careful about the point where we should join the two branches of
1185/// the first if.
1186/// For instance, if we don't notice they should be joined at some point (i.e,
1187/// whatever the branch we take, there is a moment when we go to the exact
1188/// same place, just before the second if), we might generate code like
1189/// this, with some duplicata:
1190/// ```text
1191/// if cond1 { ...; if cond2 { ... } else { ...} }
1192/// else { ...; if cond2 { ... } else { ...} }
1193/// ```
1194///
1195/// Such a reconstructed program is valid, but it is definitely non-optimal:
1196/// it is very different from the original program (making it less clean and
1197/// clear), more bloated, and might involve duplicating the proof effort.
1198///
1199/// For this reason, we need to find the "exit" of the first loop, which is
1200/// the point where the two branches join. Note that this can be a bit tricky,
1201/// because there may be more than two branches (if we do `switch(x) { ... }`),
1202/// and some of them might not join (if they contain a `break`, `panic`,
1203/// `return`, etc.).
1204///
1205/// Finally, some similar issues arise for loops. For instance, let's consider
1206/// the following piece of code:
1207/// ```text
1208/// while cond1 {
1209/// e1;
1210/// if cond2 {
1211/// break;
1212/// }
1213/// e2;
1214/// }
1215/// e3;
1216/// return;
1217/// ```
1218///
1219/// Note that in MIR, the loop gets desugared to an if ... then ... else ....
1220/// From the MIR, We want to generate something like this:
1221/// ```text
1222/// loop {
1223/// if cond1 {
1224/// e1;
1225/// if cond2 {
1226/// break;
1227/// }
1228/// e2;
1229/// continue;
1230/// }
1231/// else {
1232/// break;
1233/// }
1234/// };
1235/// e3;
1236/// return;
1237/// ```
1238///
1239/// But if we don't pay attention, we might end up with that, once again with
1240/// duplications:
1241/// ```text
1242/// loop {
1243/// if cond1 {
1244/// e1;
1245/// if cond2 {
1246/// e3;
1247/// return;
1248/// }
1249/// e2;
1250/// continue;
1251/// }
1252/// else {
1253/// e3;
1254/// return;
1255/// }
1256/// }
1257/// ```
1258/// We thus have to notice that if the loop condition is false, we goto the same
1259/// block as when following the goto introduced by the break inside the loop, and
1260/// this block is dubbed the "loop exit".
1261///
1262/// The following function thus computes the "exits" for loops and switches, which
1263/// are basically the points where control-flow joins.
1264fn compute_loop_switch_exits(
1265 ctx: &mut TransformCtx,
1266 body: &src::ExprBody,
1267 cfg_info: &CfgInfo,
1268) -> ExitInfo {
1269 // Use the CFG without backward edges to topologically sort the nodes.
1270 // Note that `toposort` returns `Err` if and only if it finds cycles (which
1271 // can't happen).
1272 let tsorted: Vec<src::BlockId> = toposort(&cfg_info.cfg_no_be, None).unwrap();
1273
1274 // Build the map: block id -> topological sort rank
1275 let tsort_map: HashMap<src::BlockId, usize> = tsorted
1276 .into_iter()
1277 .enumerate()
1278 .map(|(i, block_id)| (block_id, i))
1279 .collect();
1280 trace!("tsort_map:\n{:?}", tsort_map);
1281
1282 // Compute the loop exits
1283 let loop_exits = compute_loop_exits(ctx, body, cfg_info);
1284 trace!("loop_exits:\n{:?}", loop_exits);
1285
1286 // Compute the switch exits
1287 let switch_exits = compute_switch_exits(cfg_info, &tsort_map);
1288 trace!("switch_exits:\n{:?}", switch_exits);
1289
1290 // Compute the exit info
1291 let mut exit_info = ExitInfo {
1292 loop_exits: HashMap::new(),
1293 owned_loop_exits: HashMap::new(),
1294 owned_switch_exits: HashMap::new(),
1295 };
1296
1297 // We need to give precedence to the outer switches and loops: we thus iterate
1298 // over the blocks in topological order.
1299 let mut sorted_blocks: BTreeSet<OrdBlockId> = BTreeSet::new();
1300 for bid in cfg_info
1301 .loop_entries
1302 .iter()
1303 .chain(cfg_info.switch_blocks.iter())
1304 {
1305 sorted_blocks.insert(make_ord_block_id(*bid, &tsort_map));
1306 }
1307
1308 // Keep track of the exits which were already attributed
1309 let mut all_exits = HashSet::new();
1310
1311 // Put all this together
1312 for bid in sorted_blocks {
1313 let bid = bid.id;
1314 // Check if loop or switch block
1315 if cfg_info.loop_entries.contains(&bid) {
1316 // This is a loop.
1317 //
1318 // For loops, we always register the exit (if there is one).
1319 // However, the exit may be owned by an outer switch (note
1320 // that we already took care of spreading the exits between
1321 // the inner/outer loops)
1322 let exit_id = loop_exits.get(&bid).unwrap();
1323 exit_info.loop_exits.insert(bid, *exit_id);
1324
1325 // Check if we "own" the exit
1326 match exit_id {
1327 None => {
1328 // No exit
1329 exit_info.owned_loop_exits.insert(bid, None);
1330 }
1331 Some(exit_id) => {
1332 if all_exits.contains(exit_id) {
1333 // We don't own it
1334 exit_info.owned_loop_exits.insert(bid, None);
1335 } else {
1336 // We own it
1337 exit_info.owned_loop_exits.insert(bid, Some(*exit_id));
1338 all_exits.insert(*exit_id);
1339 }
1340 }
1341 }
1342 } else {
1343 // For switches: check that the exit was not already given to a
1344 // loop
1345 let exit_id = switch_exits.get(&bid).unwrap();
1346
1347 match exit_id {
1348 None => {
1349 // No exit
1350 exit_info.owned_switch_exits.insert(bid, None);
1351 }
1352 Some(exit_id) => {
1353 if all_exits.contains(exit_id) {
1354 // We don't own it
1355 exit_info.owned_switch_exits.insert(bid, None);
1356 } else {
1357 // We own it
1358 exit_info.owned_switch_exits.insert(bid, Some(*exit_id));
1359 all_exits.insert(*exit_id);
1360 }
1361 }
1362 }
1363 }
1364 }
1365
1366 exit_info
1367}
1368
1369fn get_goto_kind(
1370 exits_info: &ExitInfo,
1371 parent_loops: &Vec<src::BlockId>,
1372 switch_exit_blocks: &HashSet<src::BlockId>,
1373 next_block_id: src::BlockId,
1374) -> GotoKind {
1375 // First explore the parent loops in revert order
1376 for (i, loop_id) in parent_loops.iter().rev().enumerate() {
1377 // If we goto a loop entry node: this is a 'continue'
1378 if next_block_id == *loop_id {
1379 return GotoKind::Continue(i);
1380 } else {
1381 // If we goto a loop exit node: this is a 'break'
1382 if let Some(exit_id) = exits_info.loop_exits.get(loop_id).unwrap() {
1383 if next_block_id == *exit_id {
1384 return GotoKind::Break(i);
1385 }
1386 }
1387 }
1388 }
1389
1390 // Check if the goto exits the current block
1391 if switch_exit_blocks.contains(&next_block_id) {
1392 return GotoKind::ExitBlock;
1393 }
1394
1395 // Default
1396 GotoKind::Goto
1397}
1398
1399enum GotoKind {
1400 Break(usize),
1401 Continue(usize),
1402 ExitBlock,
1403 Goto,
1404}
1405
1406/// `parent_span`: we need some span data for the new statement.
1407/// We use the one for the parent terminator.
1408fn translate_child_block(
1409 info: &mut BlockInfo<'_>,
1410 parent_loops: &Vec<src::BlockId>,
1411 switch_exit_blocks: &HashSet<src::BlockId>,
1412 parent_span: Span,
1413 child_id: src::BlockId,
1414) -> Option<tgt::Block> {
1415 // Check if this is a backward call
1416 match get_goto_kind(info.exits_info, parent_loops, switch_exit_blocks, child_id) {
1417 GotoKind::Break(index) => {
1418 let st = tgt::RawStatement::Break(index);
1419 Some(tgt::Statement::new(parent_span, st).into_block())
1420 }
1421 GotoKind::Continue(index) => {
1422 let st = tgt::RawStatement::Continue(index);
1423 Some(tgt::Statement::new(parent_span, st).into_block())
1424 }
1425 // If we are going to an exit block we simply ignore the goto
1426 GotoKind::ExitBlock => None,
1427 GotoKind::Goto => {
1428 // "Standard" goto: just recursively translate
1429 ensure_sufficient_stack(|| {
1430 Some(translate_block(
1431 info,
1432 parent_loops,
1433 switch_exit_blocks,
1434 child_id,
1435 ))
1436 })
1437 }
1438 }
1439}
1440
1441fn opt_block_unwrap_or_nop(span: Span, opt_block: Option<tgt::Block>) -> tgt::Block {
1442 opt_block.unwrap_or_else(|| tgt::Statement::new(span, tgt::RawStatement::Nop).into_block())
1443}
1444
1445fn translate_statement(st: &src::Statement) -> Option<tgt::Statement> {
1446 let src_span = st.span;
1447 let st = match st.content.clone() {
1448 src::RawStatement::Assign(place, rvalue) => tgt::RawStatement::Assign(place, rvalue),
1449 src::RawStatement::SetDiscriminant(place, variant_id) => {
1450 tgt::RawStatement::SetDiscriminant(place, variant_id)
1451 }
1452 src::RawStatement::CopyNonOverlapping(copy) => tgt::RawStatement::CopyNonOverlapping(copy),
1453 src::RawStatement::StorageLive(var_id) => tgt::RawStatement::StorageLive(var_id),
1454 src::RawStatement::StorageDead(var_id) => tgt::RawStatement::StorageDead(var_id),
1455 src::RawStatement::Deinit(place) => tgt::RawStatement::Deinit(place),
1456 src::RawStatement::Drop(place) => tgt::RawStatement::Drop(place),
1457 src::RawStatement::Assert(assert) => tgt::RawStatement::Assert(assert),
1458 src::RawStatement::Nop => tgt::RawStatement::Nop,
1459 src::RawStatement::Error(s) => tgt::RawStatement::Error(s),
1460 };
1461 Some(tgt::Statement::new(src_span, st))
1462}
1463
1464fn translate_terminator(
1465 info: &mut BlockInfo<'_>,
1466 parent_loops: &Vec<src::BlockId>,
1467 switch_exit_blocks: &HashSet<src::BlockId>,
1468 terminator: &src::Terminator,
1469) -> tgt::Block {
1470 let src_span = terminator.span;
1471
1472 match &terminator.content {
1473 src::RawTerminator::Abort(kind) => {
1474 tgt::Statement::new(src_span, tgt::RawStatement::Abort(kind.clone())).into_block()
1475 }
1476 src::RawTerminator::Return => {
1477 tgt::Statement::new(src_span, tgt::RawStatement::Return).into_block()
1478 }
1479 src::RawTerminator::Call { call, target } => {
1480 let target_block = translate_child_block(
1481 info,
1482 parent_loops,
1483 switch_exit_blocks,
1484 terminator.span,
1485 *target,
1486 );
1487 let mut block = opt_block_unwrap_or_nop(terminator.span, target_block);
1488 let st = tgt::Statement::new(src_span, tgt::RawStatement::Call(call.clone()));
1489 block.statements.insert(0, st);
1490 block
1491 }
1492 src::RawTerminator::Goto { target } => {
1493 let block = translate_child_block(
1494 info,
1495 parent_loops,
1496 switch_exit_blocks,
1497 terminator.span,
1498 *target,
1499 );
1500 let block = opt_block_unwrap_or_nop(terminator.span, block);
1501 block
1502 }
1503 src::RawTerminator::Switch { discr, targets } => {
1504 // Translate the target expressions
1505 let switch = match &targets {
1506 src::SwitchTargets::If(then_tgt, else_tgt) => {
1507 // Translate the children expressions
1508 let then_block = translate_child_block(
1509 info,
1510 parent_loops,
1511 switch_exit_blocks,
1512 terminator.span,
1513 *then_tgt,
1514 );
1515 // We use the terminator span information in case then
1516 // then statement is `None`
1517 let then_block = opt_block_unwrap_or_nop(terminator.span, then_block);
1518 let else_block = translate_child_block(
1519 info,
1520 parent_loops,
1521 switch_exit_blocks,
1522 terminator.span,
1523 *else_tgt,
1524 );
1525 let else_block = opt_block_unwrap_or_nop(terminator.span, else_block);
1526
1527 // Translate
1528 tgt::Switch::If(discr.clone(), then_block, else_block)
1529 }
1530 src::SwitchTargets::SwitchInt(int_ty, targets, otherwise) => {
1531 // Note that some branches can be grouped together, like
1532 // here:
1533 // ```
1534 // match e {
1535 // E::V1 | E::V2 => ..., // Grouped
1536 // E::V3 => ...
1537 // }
1538 // ```
1539 // We detect this by checking if a block has already been
1540 // translated as one of the branches of the switch.
1541 //
1542 // Rk.: note there may be intermediate gotos depending
1543 // on the MIR we use. Typically, we manage to detect the
1544 // grouped branches with Optimized MIR, but not with Promoted
1545 // MIR. See the comment in "tests/src/matches.rs".
1546
1547 // We link block ids to:
1548 // - vector of matched integer values
1549 // - translated blocks
1550 let mut branches: IndexMap<src::BlockId, (Vec<ScalarValue>, tgt::Block)> =
1551 IndexMap::new();
1552
1553 // Translate the children expressions
1554 for (v, bid) in targets.iter() {
1555 // Check if the block has already been translated:
1556 // if yes, it means we need to group branches
1557 if branches.contains_key(bid) {
1558 // Already translated: add the matched value to
1559 // the list of values
1560 let branch = branches.get_mut(bid).unwrap();
1561 branch.0.push(*v);
1562 } else {
1563 // Not translated: translate it
1564 let block = translate_child_block(
1565 info,
1566 parent_loops,
1567 switch_exit_blocks,
1568 terminator.span,
1569 *bid,
1570 );
1571 // We use the terminator span information in case then
1572 // then statement is `None`
1573 let block = opt_block_unwrap_or_nop(terminator.span, block);
1574 branches.insert(*bid, (vec![*v], block));
1575 }
1576 }
1577 let targets_blocks: Vec<(Vec<ScalarValue>, tgt::Block)> =
1578 branches.into_iter().map(|(_, x)| x).collect();
1579
1580 let otherwise_block = translate_child_block(
1581 info,
1582 parent_loops,
1583 switch_exit_blocks,
1584 terminator.span,
1585 *otherwise,
1586 );
1587 // We use the terminator span information in case then
1588 // then statement is `None`
1589 let otherwise_block = opt_block_unwrap_or_nop(terminator.span, otherwise_block);
1590
1591 // Translate
1592 tgt::Switch::SwitchInt(discr.clone(), *int_ty, targets_blocks, otherwise_block)
1593 }
1594 };
1595
1596 // Return
1597 let span = tgt::combine_switch_targets_span(&switch);
1598 let span = combine_span(&src_span, &span);
1599 let st = tgt::RawStatement::Switch(switch);
1600 tgt::Statement::new(span, st).into_block()
1601 }
1602 }
1603}
1604
1605/// Remark: some values are boxed (here, the returned statement) so that they
1606/// are allocated on the heap. This reduces stack usage (we had problems with
1607/// stack overflows in the past). A more efficient solution would be to use loops
1608/// to make this code constant space, but that would require a serious rewriting.
1609fn translate_block(
1610 info: &mut BlockInfo<'_>,
1611 parent_loops: &Vec<src::BlockId>,
1612 switch_exit_blocks: &HashSet<src::BlockId>,
1613 block_id: src::BlockId,
1614) -> tgt::Block {
1615 // If the user activated this check: check that we didn't already translate
1616 // this block, and insert the block id in the set of already translated blocks.
1617 trace!(
1618 "Parent loops: {:?}, Parent switch exits: {:?}, Block id: {}",
1619 parent_loops,
1620 switch_exit_blocks,
1621 block_id
1622 );
1623 info.explored.insert(block_id);
1624
1625 let block = info.body.body.get(block_id).unwrap();
1626
1627 // Check if we enter a loop: if so, update parent_loops and the current_exit_block
1628 let is_loop = info.cfg.loop_entries.contains(&block_id);
1629 let mut nparent_loops: Vec<src::BlockId>;
1630 let nparent_loops = if info.cfg.loop_entries.contains(&block_id) {
1631 nparent_loops = parent_loops.clone();
1632 nparent_loops.push(block_id);
1633 &nparent_loops
1634 } else {
1635 parent_loops
1636 };
1637
1638 // If we enter a switch or a loop, we need to check if we own the exit
1639 // block, in which case we need to append it to the loop/switch body
1640 // in a sequence
1641 let is_switch = block.terminator.content.is_switch();
1642 let next_block = if is_loop {
1643 *info.exits_info.owned_loop_exits.get(&block_id).unwrap()
1644 } else if is_switch {
1645 *info.exits_info.owned_switch_exits.get(&block_id).unwrap()
1646 } else {
1647 None
1648 };
1649
1650 // If we enter a switch, add the exit block to the set
1651 // of outer exit blocks
1652 let nswitch_exit_blocks = if is_switch {
1653 let mut nexit_blocks = switch_exit_blocks.clone();
1654 match next_block {
1655 None => nexit_blocks,
1656 Some(bid) => {
1657 nexit_blocks.insert(bid);
1658 nexit_blocks
1659 }
1660 }
1661 } else {
1662 switch_exit_blocks.clone()
1663 };
1664
1665 // Translate the terminator and the subsequent blocks.
1666 // Note that this terminator is an option: we might ignore it
1667 // (if it is an exit).
1668
1669 let terminator =
1670 translate_terminator(info, nparent_loops, &nswitch_exit_blocks, &block.terminator);
1671
1672 // Translate the statements inside the block
1673 let statements = block
1674 .statements
1675 .iter()
1676 .filter_map(|st| translate_statement(st))
1677 .collect_vec();
1678
1679 // Prepend the statements to the terminator.
1680 let mut block = if let Some(st) = tgt::Block::from_seq(statements) {
1681 st.merge(terminator)
1682 } else {
1683 terminator
1684 };
1685
1686 if is_loop {
1687 // Put the loop body inside a `Loop`.
1688 block = tgt::Statement::new(block.span, tgt::RawStatement::Loop(block)).into_block()
1689 } else if !is_switch {
1690 assert!(next_block.is_none());
1691 }
1692
1693 // Concatenate the exit expression, if needs be
1694 if let Some(exit_block_id) = next_block {
1695 let next_block = ensure_sufficient_stack(|| {
1696 translate_block(info, parent_loops, switch_exit_blocks, exit_block_id)
1697 });
1698 block = block.merge(next_block);
1699 }
1700
1701 block
1702}
1703
1704fn translate_body_aux(ctx: &mut TransformCtx, src_body: &src::ExprBody) -> tgt::ExprBody {
1705 // Explore the function body to create the control-flow graph without backward
1706 // edges, and identify the loop entries (which are destinations of backward edges).
1707 let cfg_info = build_cfg_info(src_body);
1708 trace!("cfg_info: {:?}", cfg_info);
1709
1710 // Find the exit block for all the loops and switches, if such an exit point
1711 // exists.
1712 let exits_info = compute_loop_switch_exits(ctx, src_body, &cfg_info);
1713
1714 // Debugging
1715 trace!("exits_info:\n{:?}", exits_info);
1716
1717 // Translate the body by reconstructing the loops and the
1718 // conditional branchings.
1719 // Note that we shouldn't get `None`.
1720 let mut explored = HashSet::new();
1721 let mut info = BlockInfo {
1722 cfg: &cfg_info,
1723 body: src_body,
1724 exits_info: &exits_info,
1725 explored: &mut explored,
1726 };
1727 let tgt_body = translate_block(&mut info, &Vec::new(), &HashSet::new(), src::BlockId::ZERO);
1728
1729 tgt::ExprBody {
1730 span: src_body.span,
1731 locals: src_body.locals.clone(),
1732 comments: src_body.comments.clone(),
1733 body: tgt_body,
1734 }
1735}
1736
1737fn translate_body(ctx: &mut TransformCtx, body: &mut gast::Body) {
1738 use gast::Body::{Structured, Unstructured};
1739 let Unstructured(src_body) = body else {
1740 panic!("Called `ullbc_to_llbc` on an already restructured body")
1741 };
1742 trace!("About to translate to ullbc: {:?}", src_body.span);
1743 let tgt_body = translate_body_aux(ctx, src_body);
1744 *body = Structured(tgt_body);
1745}
1746
1747pub struct Transform;
1748impl TransformPass for Transform {
1749 fn transform_ctx(&self, ctx: &mut TransformCtx) {
1750 // Translate the bodies one at a time.
1751 ctx.for_each_body(|ctx, body| {
1752 translate_body(ctx, body);
1753 });
1754
1755 // Print the functions
1756 let fmt_ctx = ctx.into_fmt();
1757 for fun in &ctx.translated.fun_decls {
1758 trace!(
1759 "# Signature:\n{}\n\n# Function definition:\n{}\n",
1760 fmt_ctx.format_object(&fun.signature),
1761 fmt_ctx.format_object(fun),
1762 );
1763 }
1764 // Print the global variables
1765 for global in &ctx.translated.global_decls {
1766 trace!(
1767 "# Type:\n{}\n\n# Global definition:\n{}\n",
1768 fmt_ctx.format_object(&global.ty),
1769 fmt_ctx.format_object(global)
1770 );
1771 }
1772
1773 if ctx.options.print_built_llbc {
1774 info!("# LLBC resulting from control-flow reconstruction:\n\n{ctx}\n",);
1775 }
1776 }
1777}