charon_lib/transform/ullbc_to_llbc.rs
1//! ULLBC to LLBC
2//!
3//! We reconstruct the control-flow in the Unstructured LLBC.
4//!
5//! The reconstruction algorithm is not written to be efficient (its complexity
6//! is probably very bad), but it was not written to be: this is still an early
7//! stage and we want the algorithm to generate the best reconstruction as
8//! possible. We still need to test the algorithm on more interesting examples,
9//! and will consider making it more efficient once it is a bit mature and well
10//! tested.
11//! Also note that we more importantly focus on making the algorithm sound: the
12//! reconstructed program must always be equivalent to the original MIR program,
13//! and the fact that the reconstruction preserves this property must be obvious.
14//!
15//! Finally, we conjecture the execution time shouldn't be too much a problem:
16//! we don't expect the translation mechanism to be applied on super huge functions
17//! (which will be difficult to formally analyze), and the MIR graphs are actually
18//! not that big because statements are grouped into code blocks (a block is made
19//! of a list of statements, followed by a terminator - branchings and jumps can
20//! only be performed by terminators -, meaning that MIR graphs don't have that
21//! many nodes and edges).
22
23use crate::ast::*;
24use crate::common::ensure_sufficient_stack;
25use crate::errors::sanity_check;
26use crate::formatter::{Formatter, IntoFormatter};
27use crate::llbc_ast as tgt;
28use crate::meta::{combine_span, Span};
29use crate::transform::TransformCtx;
30use crate::ullbc_ast::{self as src};
31use indexmap::IndexMap;
32use itertools::Itertools;
33use num_bigint::BigInt;
34use num_rational::BigRational;
35use petgraph::algo::toposort;
36use petgraph::graphmap::DiGraphMap;
37use petgraph::Direction;
38use std::collections::{BTreeSet, HashMap, HashSet, VecDeque};
39
40use super::ctx::TransformPass;
41
42/// Control-Flow Graph
43type Cfg = DiGraphMap<src::BlockId, ()>;
44
45/// Small utility
46struct BlockInfo<'a> {
47 /// `no_code_duplication`: if true, check that no block is translated twice (this
48 /// can be a sign that the reconstruction is of poor quality, but sometimes
49 /// code duplication is necessary, in the presence of "fused" match branches for
50 /// instance, like in `match ... { Foo | Bar => { ... }}`).
51 no_code_duplication: bool,
52 cfg: &'a CfgInfo,
53 body: &'a src::ExprBody,
54 exits_info: &'a ExitInfo,
55 explored: &'a mut HashSet<src::BlockId>,
56}
57
58/// This structure contains various information about a function's CFG.
59#[derive(Debug)]
60struct CfgInfo {
61 /// The CFG
62 pub cfg: Cfg,
63 /// The CFG where all the backward edges have been removed
64 pub cfg_no_be: Cfg,
65 /// We consider the destination of the backward edges to be loop entries and
66 /// store them here.
67 pub loop_entries: HashSet<src::BlockId>,
68 /// The backward edges
69 pub backward_edges: HashSet<(src::BlockId, src::BlockId)>,
70 /// The blocks whose terminators are a switch are stored here.
71 pub switch_blocks: HashSet<src::BlockId>,
72 /// The set of nodes from where we can only reach error nodes (panic, etc.)
73 pub only_reach_error: HashSet<src::BlockId>,
74}
75
76/// Build the CFGs (the "regular" CFG and the CFG without backward edges) and
77/// compute some information like the loop entries and the switch blocks.
78fn build_cfg_info(body: &src::ExprBody) -> CfgInfo {
79 let mut cfg = CfgInfo {
80 cfg: Cfg::new(),
81 cfg_no_be: Cfg::new(),
82 loop_entries: HashSet::new(),
83 backward_edges: HashSet::new(),
84 switch_blocks: HashSet::new(),
85 only_reach_error: HashSet::new(),
86 };
87
88 // Add the nodes
89 for block_id in body.body.iter_indices() {
90 cfg.cfg.add_node(block_id);
91 cfg.cfg_no_be.add_node(block_id);
92 }
93
94 // Add the edges
95 let ancestors = HashSet::new();
96 let mut explored = HashSet::new();
97 build_cfg_partial_info_edges(
98 &mut cfg,
99 &ancestors,
100 &mut explored,
101 body,
102 src::BlockId::ZERO,
103 );
104
105 cfg
106}
107
108fn block_is_switch(body: &src::ExprBody, block_id: src::BlockId) -> bool {
109 let block = body.body.get(block_id).unwrap();
110 block.terminator.content.is_switch()
111}
112
113/// The terminator of the block is a panic, etc.
114fn block_is_error(body: &src::ExprBody, block_id: src::BlockId) -> bool {
115 let block = body.body.get(block_id).unwrap();
116 use src::RawTerminator::*;
117 match &block.terminator.content {
118 Abort(..) => true,
119 Goto { .. } | Switch { .. } | Return { .. } => false,
120 }
121}
122
123fn build_cfg_partial_info_edges(
124 cfg: &mut CfgInfo,
125 ancestors: &HashSet<src::BlockId>,
126 explored: &mut HashSet<src::BlockId>,
127 body: &src::ExprBody,
128 block_id: src::BlockId,
129) {
130 // Check if we already explored the current node
131 if explored.contains(&block_id) {
132 return;
133 }
134 explored.insert(block_id);
135
136 // Insert the current block in the set of ancestors blocks
137 let mut ancestors = ancestors.clone();
138 ancestors.insert(block_id);
139
140 // Check if it is a switch
141 if block_is_switch(body, block_id) {
142 cfg.switch_blocks.insert(block_id);
143 }
144
145 // Retrieve the block targets
146 let targets = body.body.get(block_id).unwrap().targets();
147 let mut has_backward_edge = false;
148
149 // Add edges for all the targets and explore them, if they are not predecessors
150 for tgt in &targets {
151 // Insert the edge in the "regular" CFG
152 cfg.cfg.add_edge(block_id, *tgt, ());
153
154 // We need to check if it is a backward edge before inserting it in the
155 // CFG without backward edges and exploring it
156 if ancestors.contains(tgt) {
157 // This is a backward edge
158 has_backward_edge = true;
159 cfg.loop_entries.insert(*tgt);
160 cfg.backward_edges.insert((block_id, *tgt));
161 } else {
162 // Not a backward edge: insert the edge and explore
163 cfg.cfg_no_be.add_edge(block_id, *tgt, ());
164 ensure_sufficient_stack(|| {
165 build_cfg_partial_info_edges(cfg, &ancestors, explored, body, *tgt);
166 })
167 }
168 }
169
170 // Check if this node can only reach error nodes:
171 // - we check if the current node ends with an error terminator
172 // - or check that all the targets lead to error nodes
173 // Note that if there is a backward edge, we consider that we don't necessarily
174 // go to error.
175 if !has_backward_edge
176 && (block_is_error(body, block_id)
177 || (
178 // The targets are empty if this is an error node *or* a return node
179 !targets.is_empty() && targets.iter().all(|tgt| cfg.only_reach_error.contains(tgt))
180 ))
181 {
182 cfg.only_reach_error.insert(block_id);
183 }
184}
185
186#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
187struct BlockWithRank<T> {
188 /// A "rank" quantity that we use to order the nodes.
189 /// By placing the `rank` field before the `id`, we make sure that
190 /// we use the rank first in the lexicographic order.
191 rank: T,
192 id: src::BlockId,
193}
194
195type OrdBlockId = BlockWithRank<usize>;
196
197/// For the rank we use:
198/// - a "flow" quantity (see [BlocksInfo])
199/// - the *inverse* rank in the topological sort (i.e., `- topo_rank`)
200type FlowBlockId = BlockWithRank<(BigRational, isize)>;
201
202#[derive(Debug, Clone)]
203struct LoopExitCandidateInfo {
204 /// The occurrences going to this exit.
205 /// For every occurrence, we register the distance between the loop entry
206 /// and the node from which the edge going to the exit starts.
207 /// If later we have to choose between candidates with the same number
208 /// of occurrences, we choose the one with the smallest distances.
209 ///
210 /// Note that it *may* happen that we have several exit candidates referenced
211 /// more than once for one loop. This comes from the fact that whenever we
212 /// reach a node from which the loop entry is not reachable (without using a
213 /// backward edge leading to an outer loop entry), we register this node
214 /// as well as all its children as exit candidates.
215 /// Consider the following example:
216 /// ```text
217 /// while i < max {
218 /// if cond {
219 /// break;
220 /// }
221 /// s += i;
222 /// i += 1
223 /// }
224 /// // All the below nodes are exit candidates (each of them is referenced twice)
225 /// s += 1;
226 /// return s;
227 /// ```
228 pub occurrences: Vec<usize>,
229}
230
231/// Check if a loop entry is reachable from a node, in a graph where we remove
232/// the backward edges going directly to the loop entry.
233///
234/// If the loop entry is not reachable, it means that:
235/// - the loop entry is not reachable at all
236/// - or it is only reachable through an outer loop
237///
238/// The starting node should be a (transitive) child of the loop entry.
239/// We use this to find candidates for loop exits.
240fn loop_entry_is_reachable_from_inner(
241 cfg: &CfgInfo,
242 loop_entry: src::BlockId,
243 block_id: src::BlockId,
244) -> bool {
245 // It is reachable in the complete graph. Check if it is reachable by not
246 // going through backward edges which go to outer loops. In practice, we
247 // just need to forbid the use of any backward edges at the exception of
248 // those which go directly to the current loop's entry. This means that we
249 // ignore backward edges to outer loops of course, but also backward edges
250 // to inner loops because we shouldn't need to follow those (there should be
251 // more direct paths).
252
253 // Explore the graph starting at block_id
254 let mut explored: HashSet<src::BlockId> = HashSet::new();
255 let mut stack: VecDeque<src::BlockId> = VecDeque::new();
256 stack.push_back(block_id);
257 while !stack.is_empty() {
258 let bid = stack.pop_front().unwrap();
259 if explored.contains(&bid) {
260 continue;
261 }
262 explored.insert(bid);
263
264 let next_ids = cfg.cfg.neighbors_directed(bid, Direction::Outgoing);
265 for next_id in next_ids {
266 // Check if this is a backward edge
267 if cfg.backward_edges.contains(&(bid, next_id)) {
268 // Backward edge: only allow those going directly to the current
269 // loop's entry
270 if next_id == loop_entry {
271 // The loop entry is reachable
272 return true;
273 } else {
274 // Forbidden edge: ignore
275 continue;
276 }
277 } else {
278 // Nothing special: add the node to the stack for further
279 // exploration
280 stack.push_back(next_id);
281 }
282 }
283 }
284
285 // The loop entry is not reachable
286 false
287}
288
289struct FilteredLoopParents {
290 remaining_parents: Vec<(src::BlockId, usize)>,
291 removed_parents: Vec<(src::BlockId, usize)>,
292}
293
294fn filter_loop_parents(
295 cfg: &CfgInfo,
296 parent_loops: &Vec<(src::BlockId, usize)>,
297 block_id: src::BlockId,
298) -> Option<FilteredLoopParents> {
299 let mut eliminate: usize = 0;
300 for (loop_id, _ldist) in parent_loops.iter().rev() {
301 if !loop_entry_is_reachable_from_inner(cfg, *loop_id, block_id) {
302 eliminate += 1;
303 } else {
304 break;
305 }
306 }
307
308 if eliminate > 0 {
309 // Split the vector of parents
310 let (remaining_parents, removed_parents) =
311 parent_loops.split_at(parent_loops.len() - eliminate);
312 let (mut remaining_parents, removed_parents) =
313 (remaining_parents.to_vec(), removed_parents.to_vec());
314
315 // Update the distance to the last loop - we just increment the distance
316 // by 1, because from the point of view of the parent loop, we just exited
317 // a block and go to the next sequence of instructions.
318 if !remaining_parents.is_empty() {
319 remaining_parents.last_mut().unwrap().1 += 1;
320 }
321
322 Some(FilteredLoopParents {
323 remaining_parents,
324 removed_parents,
325 })
326 } else {
327 None
328 }
329}
330
331/// List the nodes reachable from a starting point.
332/// We list the nodes and the depth (in the AST) at which they were found.
333fn list_reachable(cfg: &Cfg, start: src::BlockId) -> HashMap<src::BlockId, usize> {
334 let mut reachable: HashMap<src::BlockId, usize> = HashMap::new();
335 let mut stack: VecDeque<(src::BlockId, usize)> = VecDeque::new();
336 stack.push_back((start, 0));
337
338 while !stack.is_empty() {
339 let (bid, dist) = stack.pop_front().unwrap();
340
341 // Ignore this node if we already registered it with a better distance
342 match reachable.get(&bid) {
343 None => (),
344 Some(original_dist) => {
345 if *original_dist < dist {
346 continue;
347 }
348 }
349 }
350
351 // Inset the node with its distance
352 reachable.insert(bid, dist);
353
354 // Add the children to the stack
355 for child in cfg.neighbors(bid) {
356 stack.push_back((child, dist + 1));
357 }
358 }
359
360 // Return
361 reachable
362}
363
364/// Register a node and its children as exit candidates for a list of
365/// parent loops.
366fn register_children_as_loop_exit_candidates(
367 cfg: &CfgInfo,
368 loop_exits: &mut HashMap<src::BlockId, IndexMap<src::BlockId, LoopExitCandidateInfo>>,
369 removed_parent_loops: &Vec<(src::BlockId, usize)>,
370 block_id: src::BlockId,
371) {
372 // List the reachable nodes
373 let reachable = list_reachable(&cfg.cfg_no_be, block_id);
374
375 let mut base_dist = 0;
376 // For every parent loop, in reverse order (we go from last to first in
377 // order to correctly compute the distances)
378 for (loop_id, loop_dist) in removed_parent_loops.iter().rev() {
379 // Update the distance to the loop entry
380 base_dist += *loop_dist;
381
382 // Retrieve the candidates
383 let candidates = loop_exits.get_mut(loop_id).unwrap();
384
385 // Update them
386 for (bid, dist) in reachable.iter() {
387 let distance = base_dist + *dist;
388 match candidates.get_mut(bid) {
389 None => {
390 candidates.insert(
391 *bid,
392 LoopExitCandidateInfo {
393 occurrences: vec![distance],
394 },
395 );
396 }
397 Some(c) => {
398 c.occurrences.push(distance);
399 }
400 }
401 }
402 }
403}
404
405/// Compute the loop exit candidates.
406///
407/// There may be several candidates with the same "optimality" (same number of
408/// occurrences, etc.), in which case we choose the first one which was registered
409/// (the order in which we explore the graph is deterministic): this is why we
410/// store the candidates in a linked hash map.
411fn compute_loop_exit_candidates(
412 cfg: &CfgInfo,
413 explored: &mut HashSet<src::BlockId>,
414 ordered_loops: &mut Vec<src::BlockId>,
415 loop_exits: &mut HashMap<src::BlockId, IndexMap<src::BlockId, LoopExitCandidateInfo>>,
416 // List of parent loops, with the distance to the entry of the loop (the distance
417 // is the distance between the current node and the loop entry for the last parent,
418 // and the distance between the parents for the others).
419 mut parent_loops: Vec<(src::BlockId, usize)>,
420 block_id: src::BlockId,
421) {
422 if explored.contains(&block_id) {
423 return;
424 }
425 explored.insert(block_id);
426
427 // Check if we enter a loop - add the corresponding node if necessary
428 if cfg.loop_entries.contains(&block_id) {
429 parent_loops.push((block_id, 1));
430 ordered_loops.push(block_id);
431 } else {
432 // Increase the distance with the parent loop
433 if !parent_loops.is_empty() {
434 parent_loops.last_mut().unwrap().1 += 1;
435 }
436 };
437
438 // Retrieve the children - note that we ignore the back edges
439 let children = cfg.cfg_no_be.neighbors(block_id);
440 for child in children {
441 // If the parent loop entry is not reachable from the child without going
442 // through a backward edge which goes directly to the loop entry, consider
443 // this node a potential exit.
444 ensure_sufficient_stack(|| {
445 match filter_loop_parents(cfg, &parent_loops, child) {
446 None => {
447 compute_loop_exit_candidates(
448 cfg,
449 explored,
450 ordered_loops,
451 loop_exits,
452 parent_loops.clone(),
453 child,
454 );
455 }
456 Some(fparent_loops) => {
457 // We filtered some parent loops: it means this child and its
458 // children are loop exit candidates for all those loops: we must
459 // thus register them.
460 // Note that we register the child *and* its children: the reason
461 // is that we might do something *then* actually jump to the exit.
462 // For instance, the following block of code:
463 // ```
464 // if cond { break; } else { ... }
465 // ```
466 //
467 // Gets translated in MIR to something like this:
468 // ```
469 // bb1: {
470 // if cond -> bb2 else -> bb3; // bb2 is not the real exit
471 // }
472 //
473 // bb2: {
474 // goto bb4; // bb4 is the real exit
475 // }
476 // ```
477 register_children_as_loop_exit_candidates(
478 cfg,
479 loop_exits,
480 &fparent_loops.removed_parents,
481 child,
482 );
483
484 // Explore, with the filtered parents
485 compute_loop_exit_candidates(
486 cfg,
487 explored,
488 ordered_loops,
489 loop_exits,
490 fparent_loops.remaining_parents,
491 child,
492 );
493 }
494 }
495 })
496 }
497}
498
499/// See [`compute_loop_switch_exits`](compute_loop_switch_exits) for
500/// explanations about what "exits" are.
501///
502/// The following function computes the loop exits. It acts as follows.
503///
504/// We keep track of a stack of the loops in which we entered.
505/// It is very easy to check when we enter a loop: loop entries are destinations
506/// of backward edges, which can be spotted with a simple graph exploration (see
507/// [`build_cfg_partial_info`](build_cfg_partial_info).
508/// The criteria to consider whether we exit a loop is the following:
509/// - we exit a loop if we go to a block from which we can't reach the loop
510/// entry at all
511/// - or if we can reach the loop entry, but must use a backward edge which goes
512/// to an outer loop
513///
514/// It is better explained on the following example:
515/// ```text
516/// 'outer while i < max {
517/// 'inner while j < max {
518/// j += 1;
519/// }
520/// // (i)
521/// i += 1;
522/// }
523/// ```
524/// If we enter the inner loop then go to (i) from the inner loop, we consider
525/// that we exited the outer loop because:
526/// - we can reach the entry of the inner loop from (i) (by finishing then
527/// starting again an iteration of the outer loop)
528/// - but doing this requires taking a backward edge which goes to the outer loop
529///
530/// Whenever we exit a loop, we save the block we went to as an exit candidate
531/// for this loop. Note that there may by many exit candidates. For instance,
532/// in the below example:
533/// ```text
534/// while ... {
535/// ...
536/// if ... {
537/// // We can't reach the loop entry from here: this is an exit
538/// // candidate
539/// return;
540/// }
541/// }
542/// // This is another exit candidate - and this is the one we want to use
543/// // as the "real" exit...
544/// ...
545/// ```
546///
547/// Also note that it may happen that we go several times to the same exit (if
548/// we use breaks for instance): we record the number of times an exit candidate
549/// is used.
550///
551/// Once we listed all the exit candidates, we find the "best" one for every
552/// loop, starting with the outer loops. We start with outer loops because
553/// inner loops might use breaks to exit to the exit of outer loops: if we
554/// start with the inner loops, the exit which is "natural" for the outer loop
555/// might end up being used for one of the inner loops...
556///
557/// The best exit is the following one:
558/// - it is the one which is used the most times (note that there can be
559/// several candidates which are referenced strictly more than once: see the
560/// comment below)
561/// - if several exits have the same number of occurrences, we choose the one
562/// for which we goto the "earliest" (earliest meaning that the goto is close to
563/// the loop entry node in the AST). The reason is that all the loops should
564/// have an outer if ... then ... else ... which executes the loop body or goes
565/// to the exit (note that this is not necessarily the first
566/// if ... then ... else ... we find: loop conditions can be arbitrary
567/// expressions, containing branchings).
568///
569/// # Several candidates for a loop exit:
570/// =====================================
571/// There used to be a sanity check to ensure there are no two different
572/// candidates with exactly the same number of occurrences and distance from
573/// the entry of the loop, if the number of occurrences is > 1.
574///
575/// We removed it because it does happen, for instance here (the match
576/// introduces an `unreachable` node, and it has the same number of
577/// occurrences and the same distance to the loop entry as the `panic`
578/// node):
579///
580/// ```text
581/// pub fn list_nth_mut_loop_pair<'a, T>(
582/// mut ls: &'a mut List<T>,
583/// mut i: u32,
584/// ) -> &'a mut T {
585/// loop {
586/// match ls {
587/// List::Nil => {
588/// panic!() // <-- best candidate
589/// }
590/// List::Cons(x, tl) => {
591/// if i == 0 {
592/// return x;
593/// } else {
594/// ls = tl;
595/// i -= 1;
596/// }
597/// }
598/// _ => {
599/// // Note that Rustc always introduces an unreachable branch after
600/// // desugaring matches.
601/// unreachable!(), // <-- best candidate
602/// }
603/// }
604/// }
605/// }
606/// ```
607///
608/// When this happens we choose an exit candidate whose edges don't necessarily
609/// lead to an error (above there are none, so we don't choose any exits). Note
610/// that this last condition is important to prevent loops from being unnecessarily
611/// nested:
612///
613/// ```text
614/// pub fn nested_loops_enum(step_out: usize, step_in: usize) -> usize {
615/// let mut s = 0;
616///
617/// for _ in 0..128 { // We don't want this loop to be nested with the loops below
618/// s += 1;
619/// }
620///
621/// for _ in 0..(step_out) {
622/// for _ in 0..(step_in) {
623/// s += 1;
624/// }
625/// }
626///
627/// s
628/// }
629/// ```
630fn compute_loop_exits(
631 ctx: &mut TransformCtx,
632 body: &src::ExprBody,
633 cfg: &CfgInfo,
634) -> HashMap<src::BlockId, Option<src::BlockId>> {
635 let mut explored = HashSet::new();
636 let mut ordered_loops = Vec::new();
637 let mut loop_exits = HashMap::new();
638
639 // Initialize the loop exits candidates
640 for loop_id in &cfg.loop_entries {
641 loop_exits.insert(*loop_id, IndexMap::new());
642 }
643
644 // Compute the candidates
645 compute_loop_exit_candidates(
646 cfg,
647 &mut explored,
648 &mut ordered_loops,
649 &mut loop_exits,
650 Vec::new(),
651 src::BlockId::ZERO,
652 );
653
654 {
655 // Debugging
656 let candidates: Vec<String> = loop_exits
657 .iter()
658 .map(|(loop_id, candidates)| format!("{loop_id} -> {candidates:?}"))
659 .collect();
660 trace!("Loop exit candidates:\n{}", candidates.join("\n"));
661 }
662
663 // Choose one candidate among the potential candidates.
664 let mut exits: HashSet<src::BlockId> = HashSet::new();
665 let mut chosen_loop_exits: HashMap<src::BlockId, Option<src::BlockId>> = HashMap::new();
666 // For every loop
667 for loop_id in ordered_loops {
668 // Check the candidates.
669 // Ignore the candidates which have already been chosen as exits for other
670 // loops (which should be outer loops).
671 // We choose the exit with:
672 // - the most occurrences
673 // - the least total distance (if there are several possibilities)
674 // - doesn't necessarily lead to an error (panic, unreachable)
675
676 // First:
677 // - filter the candidates
678 // - compute the number of occurrences
679 // - compute the sum of distances
680 // TODO: we could simply order by using a lexicographic order
681 let loop_exits = loop_exits
682 .get(&loop_id)
683 .unwrap()
684 .iter()
685 // If candidate already selected for another loop: ignore
686 .filter(|(candidate_id, _)| !exits.contains(candidate_id))
687 .map(|(candidate_id, candidate_info)| {
688 let num_occurrences = candidate_info.occurrences.len();
689 let dist_sum = candidate_info.occurrences.iter().sum();
690 (*candidate_id, num_occurrences, dist_sum)
691 })
692 .collect_vec();
693
694 trace!(
695 "Loop {}: possible exits:\n{}",
696 loop_id,
697 loop_exits
698 .iter()
699 .map(|(bid, occs, dsum)| format!(
700 "{bid} -> {{ occurrences: {occs}, dist_sum: {dsum} }}",
701 ))
702 .collect::<Vec<String>>()
703 .join("\n")
704 );
705
706 // Second: actually select the proper candidate.
707
708 // We find the one with the highest occurrence and the smallest distance
709 // from the entry of the loop (note that we take care of listing the exit
710 // candidates in a deterministic order).
711 let mut best_exit: Option<src::BlockId> = None;
712 let mut best_occurrences = 0;
713 let mut best_dist_sum = std::usize::MAX;
714 for (candidate_id, occurrences, dist_sum) in &loop_exits {
715 if (*occurrences > best_occurrences)
716 || (*occurrences == best_occurrences && *dist_sum < best_dist_sum)
717 {
718 best_exit = Some(*candidate_id);
719 best_occurrences = *occurrences;
720 best_dist_sum = *dist_sum;
721 }
722 }
723
724 let possible_candidates: Vec<_> = loop_exits
725 .iter()
726 .filter_map(|(bid, occs, dsum)| {
727 if *occs == best_occurrences && *dsum == best_dist_sum {
728 Some(*bid)
729 } else {
730 None
731 }
732 })
733 .collect();
734 let num_possible_candidates = loop_exits.len();
735
736 // If there is exactly one one best candidate, it is easy.
737 // Otherwise we need to split further.
738 if num_possible_candidates > 1 {
739 trace!("Best candidates: {:?}", possible_candidates);
740 // TODO: if we use a lexicographic order we can merge this with the code
741 // above.
742 // Remove the candidates which only lead to errors (panic or unreachable).
743 let candidates: Vec<_> = possible_candidates
744 .iter()
745 .filter(|bid| !cfg.only_reach_error.contains(bid))
746 .collect();
747 // If there is exactly one candidate we select it
748 if candidates.len() == 1 {
749 let exit_id = *candidates[0];
750 exits.insert(exit_id);
751 trace!("Loop {loop_id}: selected the best exit candidate {exit_id}");
752 chosen_loop_exits.insert(loop_id, Some(exit_id));
753 } else {
754 // Otherwise we do not select any exit.
755 // We don't want to select any exit if we are in the below situation
756 // (all paths lead to errors). We added a sanity check below to
757 // catch the situations where there are several exits which don't
758 // lead to errors.
759 //
760 // Example:
761 // ========
762 // ```
763 // loop {
764 // match ls {
765 // List::Nil => {
766 // panic!() // <-- best candidate
767 // }
768 // List::Cons(x, tl) => {
769 // if i == 0 {
770 // return x;
771 // } else {
772 // ls = tl;
773 // i -= 1;
774 // }
775 // }
776 // _ => {
777 // unreachable!(); // <-- best candidate (Rustc introduces an `unreachable` case)
778 // }
779 // }
780 // }
781 // ```
782 //
783 // Adding this sanity check so that we can see when there are
784 // several candidates.
785 let span = body.body[loop_id].terminator.span; // Taking *a* span from the block
786 sanity_check!(ctx, span, candidates.is_empty());
787 trace!("Loop {loop_id}: did not select an exit candidate because they all lead to panics");
788 chosen_loop_exits.insert(loop_id, None);
789 }
790 } else {
791 // Register the exit, if there is one
792 match best_exit {
793 None => {
794 // No exit was found
795 trace!("Loop {loop_id}: could not find an exit candidate");
796 chosen_loop_exits.insert(loop_id, None);
797 }
798 Some(exit_id) => {
799 exits.insert(exit_id);
800 trace!("Loop {loop_id}: selected the unique exit candidate {exit_id}");
801 chosen_loop_exits.insert(loop_id, Some(exit_id));
802 }
803 }
804 }
805 }
806
807 // Return the chosen exits
808 trace!("Chosen loop exits: {:?}", chosen_loop_exits);
809 chosen_loop_exits
810}
811
812/// Information used to compute the switch exits.
813/// We compute this information for every block in the graph.
814/// Note that we make sure to use immutable sets because we rely a lot
815/// on cloning.
816#[derive(Debug, Clone)]
817struct BlocksInfo {
818 id: src::BlockId,
819 /// All the successors of the block
820 succs: BTreeSet<OrdBlockId>,
821 /// Let's say we put a quantity of water equal to 1 on the block, and the
822 /// water flows downards. Whenever there is a branching, the quantity of
823 /// water gets equally divided between the branches. When the control flows
824 /// join, we put the water back together. The set below computes the amount
825 /// of water received by each descendant of the node.
826 ///
827 /// TODO: there must be a known algorithm which computes this, right?...
828 /// This is exactly this problems:
829 /// https://stackoverflow.com/questions/78221666/algorithm-for-total-flow-through-weighted-directed-acyclic-graph
830 /// TODO: the way I compute this is not efficient.
831 ///
832 /// Remark: in order to rank the nodes, we also use the negation of the
833 /// rank given by the topological order. The last elements of the set
834 /// have the highest flow, that is they are the nodes to which the maximum
835 /// number of paths converge. If several nodes have the same flow, we want
836 /// to take the highest one in the hierarchy: hence the use of the inverse
837 /// of the topological rank.
838 ///
839 /// Ex.:
840 /// ```text
841 /// A -- we start here
842 /// |
843 /// |---------------------------------------
844 /// | | | |
845 /// B:(0.25,-1) C:(0.25,-2) D:(0.25,-3) E:(0.25,-4)
846 /// | | |
847 /// |--------------------------
848 /// |
849 /// F:(0.75,-5)
850 /// |
851 /// |
852 /// G:(0.75,-6)
853 /// ```
854 /// The "best" node (with the highest (flow, rank) in the graph above is F.
855 flow: BTreeSet<FlowBlockId>,
856}
857
858/// Create an [OrdBlockId] from a block id and a rank given by a map giving
859/// a sort (topological in our use cases) over the graph.
860fn make_ord_block_id(
861 block_id: src::BlockId,
862 sort_map: &HashMap<src::BlockId, usize>,
863) -> OrdBlockId {
864 let rank = *sort_map.get(&block_id).unwrap();
865 OrdBlockId { id: block_id, rank }
866}
867
868/// Compute [BlocksInfo] for every block in the graph.
869/// This information is then used to compute the switch exits.
870fn compute_switch_exits_explore(
871 cfg: &CfgInfo,
872 tsort_map: &HashMap<src::BlockId, usize>,
873 memoized: &mut HashMap<src::BlockId, BlocksInfo>,
874 block_id: src::BlockId,
875) {
876 // Check if we already computer the info
877 if memoized.get(&block_id).is_some() {
878 return;
879 }
880
881 // Compute the block information for the children
882 let children_ids: Vec<src::BlockId> = cfg.cfg_no_be.neighbors(block_id).collect_vec();
883 children_ids
884 .iter()
885 .for_each(|bid| compute_switch_exits_explore(cfg, tsort_map, memoized, *bid));
886
887 // Retrieve the information
888 let children: Vec<&BlocksInfo> = children_ids
889 .iter()
890 .map(|bid| memoized.get(bid).unwrap())
891 .collect();
892
893 // Compute the successors
894 // Add the children themselves in their sets of successors
895 let all_succs: BTreeSet<OrdBlockId> = children.iter().fold(BTreeSet::new(), |acc, s| {
896 // Add the successors to the accumulator
897 let mut acc: BTreeSet<_> = acc.union(&s.succs).copied().collect();
898 // Add the child itself
899 acc.insert(make_ord_block_id(s.id, tsort_map));
900 acc
901 });
902
903 // Compute the "flows" (if there are children)
904 // TODO: this is computationally expensive...
905 let mut flow: HashMap<src::BlockId, (BigRational, isize)> = HashMap::new();
906
907 if children.len() > 0 {
908 // We need to divide the initial flow equally between the children
909 let factor = BigRational::new(BigInt::from(1), BigInt::from(children.len()));
910
911 // Small helper
912 let mut add_to_flow = |(id, f0): (src::BlockId, (BigRational, isize))| match flow.get(&id) {
913 None => flow.insert(id, f0),
914 Some(f1) => {
915 let f = f0.0 + f1.0.clone();
916 assert!(f0.1 == f1.1);
917 flow.insert(id, (f, f0.1))
918 }
919 };
920
921 // For each child, multiply the flows of its own children by the ratio,
922 // and add.
923 for child in children {
924 // First, add the child itself
925 let rank = isize::try_from(*tsort_map.get(&child.id).unwrap()).unwrap();
926 add_to_flow((child.id, (factor.clone(), -rank)));
927
928 // Then add its successors
929 for child1 in &child.flow {
930 add_to_flow((
931 child1.id,
932 (factor.clone() * child1.rank.0.clone(), child1.rank.1),
933 ));
934 }
935 }
936 }
937
938 // Put everything in an ordered set: the first block id will be the one with
939 // the highest flow, and in case of equality it will be the one with the
940 // smallest block id.
941 let flow: BTreeSet<FlowBlockId> = flow
942 .into_iter()
943 .map(|(id, rank)| BlockWithRank { rank, id })
944 .collect();
945
946 trace!("block: {block_id}, all successors: {all_succs:?}, flow: {flow:?}");
947
948 // Memoize
949 let info = BlocksInfo {
950 id: block_id,
951 succs: all_succs,
952 flow,
953 };
954 memoized.insert(block_id, info.clone());
955}
956
957/// Auxiliary helper
958///
959/// Check if it is possible to reach the exit of an outer switch from [bid]
960/// without going through the [exit_candidate]. We use the graph without
961/// backward edges.
962fn can_reach_outer_exit(
963 cfg: &CfgInfo,
964 outer_exits: &HashSet<src::BlockId>,
965 start_bid: src::BlockId,
966 exit_candidate: src::BlockId,
967) -> bool {
968 // The stack of blocks
969 let mut stack: Vec<src::BlockId> = vec![start_bid];
970 let mut explored: HashSet<src::BlockId> = HashSet::new();
971
972 while let Some(bid) = stack.pop() {
973 // Check if already explored
974 if explored.contains(&bid) {
975 break;
976 }
977 explored.insert(bid);
978
979 // Check if this is the exit candidate
980 if bid == exit_candidate {
981 // Stop exploring
982 break;
983 }
984
985 // Check if this is an outer exit
986 if outer_exits.contains(&bid) {
987 return true;
988 }
989
990 // Add the children to the stack
991 for child in cfg.cfg_no_be.neighbors(bid) {
992 stack.push(child);
993 }
994 }
995
996 false
997}
998
999/// See [`compute_loop_switch_exits`](compute_loop_switch_exits) for
1000/// explanations about what "exits" are.
1001///
1002/// In order to compute the switch exits, we simply recursively compute a
1003/// topologically ordered set of "filtered successors" as follows (note
1004/// that we work in the CFG *without* back edges):
1005/// - for a block which doesn't branch (only one successor), the filtered
1006/// successors is the set of reachable nodes.
1007/// - for a block which branches, we compute the nodes reachable from all
1008/// the children, and find the "best" intersection between those.
1009/// Note that we find the "best" intersection (a pair of branches which
1010/// maximize the intersection of filtered successors) because some branches
1011/// might never join the control-flow of the other branches, if they contain
1012/// a `break`, `return`, `panic`, etc., like here:
1013/// ```text
1014/// if b { x = 3; } { return; }
1015/// y += x;
1016/// ...
1017/// ```
1018/// Note that with nested switches, the branches of the inner switches might
1019/// goto the exits of the outer switches: for this reason, we give precedence
1020/// to the outer switches.
1021fn compute_switch_exits(
1022 cfg: &CfgInfo,
1023 tsort_map: &HashMap<src::BlockId, usize>,
1024) -> HashMap<src::BlockId, Option<src::BlockId>> {
1025 // Compute the successors info map, starting at the root node
1026 let mut succs_info_map = HashMap::new();
1027 trace!(
1028 "- cfg.cfg:\n{:?}\n- cfg.cfg_no_be:\n{:?}\n- cfg.switch_blocks:\n{:?}",
1029 cfg.cfg,
1030 cfg.cfg_no_be,
1031 cfg.switch_blocks
1032 );
1033 let _ = compute_switch_exits_explore(cfg, tsort_map, &mut succs_info_map, src::BlockId::ZERO);
1034
1035 // We need to give precedence to the outer switches: we thus iterate
1036 // over the switch blocks in topological order.
1037 let mut sorted_switch_blocks: BTreeSet<OrdBlockId> = BTreeSet::new();
1038 for bid in cfg.switch_blocks.iter() {
1039 sorted_switch_blocks.insert(make_ord_block_id(*bid, tsort_map));
1040 }
1041 trace!("sorted_switch_blocks: {:?}", sorted_switch_blocks);
1042
1043 // Debugging: print all the successors
1044 {
1045 trace!("Successors info:\n{}\n", {
1046 let mut out = vec![];
1047 for (bid, info) in &succs_info_map {
1048 out.push(
1049 format!(
1050 "{} -> {{succs: {:?}, flow: {:?}}}",
1051 bid, &info.succs, &info.flow
1052 )
1053 .to_string(),
1054 );
1055 }
1056 out.join("\n")
1057 });
1058 }
1059
1060 // For every node which is a switch, retrieve the exit.
1061 // As the set of intersection of successors is topologically sorted, the
1062 // exit should be the first node in the set (if the set is non empty).
1063 // Also, we need to explore the nodes in topological order, to give
1064 // precedence to the outer switches.
1065 let mut exits_set = HashSet::new();
1066 let mut ord_exits_set = BTreeSet::new();
1067 let mut exits = HashMap::new();
1068 for bid in sorted_switch_blocks {
1069 trace!("Finding exit candidate for: {bid:?}");
1070 let bid = bid.id;
1071 let info = succs_info_map.get(&bid).unwrap();
1072 let succs = &info.flow;
1073 // Find the best successor: this is the last one (with the highest flow,
1074 // and the highest reverse topological rank).
1075 if succs.is_empty() {
1076 trace!("{bid:?} has no successors");
1077 exits.insert(bid, None);
1078 } else {
1079 // We have an exit candidate: check that it was not already
1080 // taken by an external switch
1081 let exit = succs.last().unwrap();
1082 trace!("{bid:?} has an exit candidate: {exit:?}");
1083 if exits_set.contains(&exit.id) {
1084 trace!("Ignoring the exit candidate because already taken by an external switch");
1085 exits.insert(bid, None);
1086 } else {
1087 // It was not taken by an external switch.
1088 //
1089 // We must check that we can't reach the exit of an external
1090 // switch from one of the branches, without going through the
1091 // exit candidate.
1092 // We do this by simply checking that we can't reach any exits
1093 // (and use the fact that we explore the switch by using a
1094 // topological order to not discard valid exit candidates).
1095 //
1096 // The reason is that it can lead to code like the following:
1097 // ```
1098 // if ... { // if #1
1099 // if ... { // if #2
1100 // ...
1101 // // here, we have a `goto b1`, where b1 is the exit
1102 // // of if #2: we thus stop translating the blocks.
1103 // }
1104 // else {
1105 // ...
1106 // // here, we have a `goto b2`, where b2 is the exit
1107 // // of if #1: we thus stop translating the blocks.
1108 // }
1109 // // We insert code for the block b1 here (which is the exit of
1110 // // the exit of if #2). However, this block should only
1111 // // be executed in the branch "then" of the if #2, not in
1112 // // the branch "else".
1113 // ...
1114 // }
1115 // else {
1116 // ...
1117 // }
1118 // ```
1119
1120 // First: we do a quick check (does the set of all successors
1121 // intersect the set of exits for outer blocks?). If yes, we do
1122 // a more precise analysis: we check if we can reach the exit
1123 // *without going through* the exit candidate.
1124 if info.succs.intersection(&ord_exits_set).next().is_none()
1125 || !can_reach_outer_exit(cfg, &exits_set, bid, exit.id)
1126 {
1127 trace!("Keeping the exit candidate");
1128 // No intersection: ok
1129 exits_set.insert(exit.id);
1130 ord_exits_set.insert(make_ord_block_id(exit.id, tsort_map));
1131 exits.insert(bid, Some(exit.id));
1132 } else {
1133 trace!("Ignoring the exit candidate because of an intersection with external switches");
1134 exits.insert(bid, None);
1135 }
1136 }
1137 }
1138 }
1139
1140 exits
1141}
1142
1143/// The exits of a graph
1144#[derive(Debug, Clone)]
1145struct ExitInfo {
1146 /// The loop exits
1147 loop_exits: HashMap<src::BlockId, Option<src::BlockId>>,
1148 /// Some loop exits actually belong to outer switches. We still need
1149 /// to track them in the loop exits, in order to know when we should
1150 /// insert a break. However, we need to make sure we don't add the
1151 /// corresponding block in a sequence, after having translated the
1152 /// loop, like so:
1153 /// ```text
1154 /// loop {
1155 /// loop_body
1156 /// };
1157 /// exit_blocks // OK if the exit "belongs" to the loop
1158 /// ```
1159 ///
1160 /// In case the exit doesn't belong to the loop:
1161 /// ```text
1162 /// if b {
1163 /// loop {
1164 /// loop_body
1165 /// } // no exit blocks after the loop
1166 /// }
1167 /// else {
1168 /// ...
1169 /// };
1170 /// exit_blocks // the exit blocks are here
1171 /// ```
1172 owned_loop_exits: HashMap<src::BlockId, Option<src::BlockId>>,
1173 /// The switch exits.
1174 /// Note that the switch exits are always owned.
1175 owned_switch_exits: HashMap<src::BlockId, Option<src::BlockId>>,
1176}
1177
1178/// Compute the exits for the loops and the switches (switch on integer and
1179/// if ... then ... else ...). We need to do this because control-flow in MIR
1180/// is destructured: we have gotos everywhere.
1181///
1182/// Let's consider the following piece of code:
1183/// ```text
1184/// if cond1 { ... } else { ... };
1185/// if cond2 { ... } else { ... };
1186/// ```
1187/// Once converted to MIR, the control-flow is destructured, which means we
1188/// have gotos everywhere. When reconstructing the control-flow, we have
1189/// to be careful about the point where we should join the two branches of
1190/// the first if.
1191/// For instance, if we don't notice they should be joined at some point (i.e,
1192/// whatever the branch we take, there is a moment when we go to the exact
1193/// same place, just before the second if), we might generate code like
1194/// this, with some duplicata:
1195/// ```text
1196/// if cond1 { ...; if cond2 { ... } else { ...} }
1197/// else { ...; if cond2 { ... } else { ...} }
1198/// ```
1199///
1200/// Such a reconstructed program is valid, but it is definitely non-optimal:
1201/// it is very different from the original program (making it less clean and
1202/// clear), more bloated, and might involve duplicating the proof effort.
1203///
1204/// For this reason, we need to find the "exit" of the first loop, which is
1205/// the point where the two branches join. Note that this can be a bit tricky,
1206/// because there may be more than two branches (if we do `switch(x) { ... }`),
1207/// and some of them might not join (if they contain a `break`, `panic`,
1208/// `return`, etc.).
1209///
1210/// Finally, some similar issues arise for loops. For instance, let's consider
1211/// the following piece of code:
1212/// ```text
1213/// while cond1 {
1214/// e1;
1215/// if cond2 {
1216/// break;
1217/// }
1218/// e2;
1219/// }
1220/// e3;
1221/// return;
1222/// ```
1223///
1224/// Note that in MIR, the loop gets desugared to an if ... then ... else ....
1225/// From the MIR, We want to generate something like this:
1226/// ```text
1227/// loop {
1228/// if cond1 {
1229/// e1;
1230/// if cond2 {
1231/// break;
1232/// }
1233/// e2;
1234/// continue;
1235/// }
1236/// else {
1237/// break;
1238/// }
1239/// };
1240/// e3;
1241/// return;
1242/// ```
1243///
1244/// But if we don't pay attention, we might end up with that, once again with
1245/// duplications:
1246/// ```text
1247/// loop {
1248/// if cond1 {
1249/// e1;
1250/// if cond2 {
1251/// e3;
1252/// return;
1253/// }
1254/// e2;
1255/// continue;
1256/// }
1257/// else {
1258/// e3;
1259/// return;
1260/// }
1261/// }
1262/// ```
1263/// We thus have to notice that if the loop condition is false, we goto the same
1264/// block as when following the goto introduced by the break inside the loop, and
1265/// this block is dubbed the "loop exit".
1266///
1267/// The following function thus computes the "exits" for loops and switches, which
1268/// are basically the points where control-flow joins.
1269fn compute_loop_switch_exits(
1270 ctx: &mut TransformCtx,
1271 body: &src::ExprBody,
1272 cfg_info: &CfgInfo,
1273) -> ExitInfo {
1274 // Use the CFG without backward edges to topologically sort the nodes.
1275 // Note that `toposort` returns `Err` if and only if it finds cycles (which
1276 // can't happen).
1277 let tsorted: Vec<src::BlockId> = toposort(&cfg_info.cfg_no_be, None).unwrap();
1278
1279 // Build the map: block id -> topological sort rank
1280 let tsort_map: HashMap<src::BlockId, usize> = tsorted
1281 .into_iter()
1282 .enumerate()
1283 .map(|(i, block_id)| (block_id, i))
1284 .collect();
1285 trace!("tsort_map:\n{:?}", tsort_map);
1286
1287 // Compute the loop exits
1288 let loop_exits = compute_loop_exits(ctx, body, cfg_info);
1289 trace!("loop_exits:\n{:?}", loop_exits);
1290
1291 // Compute the switch exits
1292 let switch_exits = compute_switch_exits(cfg_info, &tsort_map);
1293 trace!("switch_exits:\n{:?}", switch_exits);
1294
1295 // Compute the exit info
1296 let mut exit_info = ExitInfo {
1297 loop_exits: HashMap::new(),
1298 owned_loop_exits: HashMap::new(),
1299 owned_switch_exits: HashMap::new(),
1300 };
1301
1302 // We need to give precedence to the outer switches and loops: we thus iterate
1303 // over the blocks in topological order.
1304 let mut sorted_blocks: BTreeSet<OrdBlockId> = BTreeSet::new();
1305 for bid in cfg_info
1306 .loop_entries
1307 .iter()
1308 .chain(cfg_info.switch_blocks.iter())
1309 {
1310 sorted_blocks.insert(make_ord_block_id(*bid, &tsort_map));
1311 }
1312
1313 // Keep track of the exits which were already attributed
1314 let mut all_exits = HashSet::new();
1315
1316 // Put all this together
1317 for bid in sorted_blocks {
1318 let bid = bid.id;
1319 // Check if loop or switch block
1320 if cfg_info.loop_entries.contains(&bid) {
1321 // This is a loop.
1322 //
1323 // For loops, we always register the exit (if there is one).
1324 // However, the exit may be owned by an outer switch (note
1325 // that we already took care of spreading the exits between
1326 // the inner/outer loops)
1327 let exit_id = loop_exits.get(&bid).unwrap();
1328 exit_info.loop_exits.insert(bid, *exit_id);
1329
1330 // Check if we "own" the exit
1331 match exit_id {
1332 None => {
1333 // No exit
1334 exit_info.owned_loop_exits.insert(bid, None);
1335 }
1336 Some(exit_id) => {
1337 if all_exits.contains(exit_id) {
1338 // We don't own it
1339 exit_info.owned_loop_exits.insert(bid, None);
1340 } else {
1341 // We own it
1342 exit_info.owned_loop_exits.insert(bid, Some(*exit_id));
1343 all_exits.insert(*exit_id);
1344 }
1345 }
1346 }
1347 } else {
1348 // For switches: check that the exit was not already given to a
1349 // loop
1350 let exit_id = switch_exits.get(&bid).unwrap();
1351
1352 match exit_id {
1353 None => {
1354 // No exit
1355 exit_info.owned_switch_exits.insert(bid, None);
1356 }
1357 Some(exit_id) => {
1358 if all_exits.contains(exit_id) {
1359 // We don't own it
1360 exit_info.owned_switch_exits.insert(bid, None);
1361 } else {
1362 // We own it
1363 exit_info.owned_switch_exits.insert(bid, Some(*exit_id));
1364 all_exits.insert(*exit_id);
1365 }
1366 }
1367 }
1368 }
1369 }
1370
1371 exit_info
1372}
1373
1374fn get_goto_kind(
1375 exits_info: &ExitInfo,
1376 parent_loops: &Vec<src::BlockId>,
1377 switch_exit_blocks: &HashSet<src::BlockId>,
1378 next_block_id: src::BlockId,
1379) -> GotoKind {
1380 // First explore the parent loops in revert order
1381 for (i, loop_id) in parent_loops.iter().rev().enumerate() {
1382 // If we goto a loop entry node: this is a 'continue'
1383 if next_block_id == *loop_id {
1384 return GotoKind::Continue(i);
1385 } else {
1386 // If we goto a loop exit node: this is a 'break'
1387 if let Some(exit_id) = exits_info.loop_exits.get(loop_id).unwrap() {
1388 if next_block_id == *exit_id {
1389 return GotoKind::Break(i);
1390 }
1391 }
1392 }
1393 }
1394
1395 // Check if the goto exits the current block
1396 if switch_exit_blocks.contains(&next_block_id) {
1397 return GotoKind::ExitBlock;
1398 }
1399
1400 // Default
1401 GotoKind::Goto
1402}
1403
1404enum GotoKind {
1405 Break(usize),
1406 Continue(usize),
1407 ExitBlock,
1408 Goto,
1409}
1410
1411/// `parent_span`: we need some span data for the new statement.
1412/// We use the one for the parent terminator.
1413fn translate_child_block(
1414 info: &mut BlockInfo<'_>,
1415 parent_loops: &Vec<src::BlockId>,
1416 switch_exit_blocks: &HashSet<src::BlockId>,
1417 parent_span: Span,
1418 child_id: src::BlockId,
1419) -> Option<tgt::Block> {
1420 // Check if this is a backward call
1421 match get_goto_kind(info.exits_info, parent_loops, switch_exit_blocks, child_id) {
1422 GotoKind::Break(index) => {
1423 let st = tgt::RawStatement::Break(index);
1424 Some(tgt::Statement::new(parent_span, st).into_block())
1425 }
1426 GotoKind::Continue(index) => {
1427 let st = tgt::RawStatement::Continue(index);
1428 Some(tgt::Statement::new(parent_span, st).into_block())
1429 }
1430 // If we are going to an exit block we simply ignore the goto
1431 GotoKind::ExitBlock => None,
1432 GotoKind::Goto => {
1433 // "Standard" goto: just recursively translate
1434 ensure_sufficient_stack(|| {
1435 Some(translate_block(
1436 info,
1437 parent_loops,
1438 switch_exit_blocks,
1439 child_id,
1440 ))
1441 })
1442 }
1443 }
1444}
1445
1446fn opt_block_unwrap_or_nop(span: Span, opt_block: Option<tgt::Block>) -> tgt::Block {
1447 opt_block.unwrap_or_else(|| tgt::Statement::new(span, tgt::RawStatement::Nop).into_block())
1448}
1449
1450fn translate_statement(st: &src::Statement) -> Option<tgt::Statement> {
1451 let src_span = st.span;
1452 let st = match st.content.clone() {
1453 src::RawStatement::Assign(place, rvalue) => tgt::RawStatement::Assign(place, rvalue),
1454 src::RawStatement::Call(s) => tgt::RawStatement::Call(s),
1455 src::RawStatement::SetDiscriminant(place, variant_id) => {
1456 tgt::RawStatement::SetDiscriminant(place, variant_id)
1457 }
1458 src::RawStatement::StorageLive(var_id) => tgt::RawStatement::StorageLive(var_id),
1459 src::RawStatement::StorageDead(var_id) => tgt::RawStatement::StorageDead(var_id),
1460 src::RawStatement::Deinit(place) => tgt::RawStatement::Deinit(place),
1461 src::RawStatement::Drop(place) => tgt::RawStatement::Drop(place),
1462 src::RawStatement::Assert(assert) => tgt::RawStatement::Assert(assert),
1463 src::RawStatement::Nop => tgt::RawStatement::Nop,
1464 src::RawStatement::Error(s) => tgt::RawStatement::Error(s),
1465 };
1466 Some(tgt::Statement::new(src_span, st))
1467}
1468
1469fn translate_terminator(
1470 info: &mut BlockInfo<'_>,
1471 parent_loops: &Vec<src::BlockId>,
1472 switch_exit_blocks: &HashSet<src::BlockId>,
1473 terminator: &src::Terminator,
1474) -> tgt::Block {
1475 let src_span = terminator.span;
1476
1477 match &terminator.content {
1478 src::RawTerminator::Abort(kind) => {
1479 tgt::Statement::new(src_span, tgt::RawStatement::Abort(kind.clone())).into_block()
1480 }
1481 src::RawTerminator::Return => {
1482 tgt::Statement::new(src_span, tgt::RawStatement::Return).into_block()
1483 }
1484 src::RawTerminator::Goto { target } => {
1485 let block = translate_child_block(
1486 info,
1487 parent_loops,
1488 switch_exit_blocks,
1489 terminator.span,
1490 *target,
1491 );
1492 let block = opt_block_unwrap_or_nop(terminator.span, block);
1493 block
1494 }
1495 src::RawTerminator::Switch { discr, targets } => {
1496 // Translate the target expressions
1497 let switch = match &targets {
1498 src::SwitchTargets::If(then_tgt, else_tgt) => {
1499 // Translate the children expressions
1500 let then_block = translate_child_block(
1501 info,
1502 parent_loops,
1503 switch_exit_blocks,
1504 terminator.span,
1505 *then_tgt,
1506 );
1507 // We use the terminator span information in case then
1508 // then statement is `None`
1509 let then_block = opt_block_unwrap_or_nop(terminator.span, then_block);
1510 let else_block = translate_child_block(
1511 info,
1512 parent_loops,
1513 switch_exit_blocks,
1514 terminator.span,
1515 *else_tgt,
1516 );
1517 let else_block = opt_block_unwrap_or_nop(terminator.span, else_block);
1518
1519 // Translate
1520 tgt::Switch::If(discr.clone(), then_block, else_block)
1521 }
1522 src::SwitchTargets::SwitchInt(int_ty, targets, otherwise) => {
1523 // Note that some branches can be grouped together, like
1524 // here:
1525 // ```
1526 // match e {
1527 // E::V1 | E::V2 => ..., // Grouped
1528 // E::V3 => ...
1529 // }
1530 // ```
1531 // We detect this by checking if a block has already been
1532 // translated as one of the branches of the switch.
1533 //
1534 // Rk.: note there may be intermediate gotos depending
1535 // on the MIR we use. Typically, we manage to detect the
1536 // grouped branches with Optimized MIR, but not with Promoted
1537 // MIR. See the comment in "tests/src/matches.rs".
1538
1539 // We link block ids to:
1540 // - vector of matched integer values
1541 // - translated blocks
1542 let mut branches: IndexMap<src::BlockId, (Vec<ScalarValue>, tgt::Block)> =
1543 IndexMap::new();
1544
1545 // Translate the children expressions
1546 for (v, bid) in targets.iter() {
1547 // Check if the block has already been translated:
1548 // if yes, it means we need to group branches
1549 if branches.contains_key(bid) {
1550 // Already translated: add the matched value to
1551 // the list of values
1552 let branch = branches.get_mut(bid).unwrap();
1553 branch.0.push(*v);
1554 } else {
1555 // Not translated: translate it
1556 let block = translate_child_block(
1557 info,
1558 parent_loops,
1559 switch_exit_blocks,
1560 terminator.span,
1561 *bid,
1562 );
1563 // We use the terminator span information in case then
1564 // then statement is `None`
1565 let block = opt_block_unwrap_or_nop(terminator.span, block);
1566 branches.insert(*bid, (vec![*v], block));
1567 }
1568 }
1569 let targets_blocks: Vec<(Vec<ScalarValue>, tgt::Block)> =
1570 branches.into_iter().map(|(_, x)| x).collect();
1571
1572 let otherwise_block = translate_child_block(
1573 info,
1574 parent_loops,
1575 switch_exit_blocks,
1576 terminator.span,
1577 *otherwise,
1578 );
1579 // We use the terminator span information in case then
1580 // then statement is `None`
1581 let otherwise_block = opt_block_unwrap_or_nop(terminator.span, otherwise_block);
1582
1583 // Translate
1584 tgt::Switch::SwitchInt(discr.clone(), *int_ty, targets_blocks, otherwise_block)
1585 }
1586 };
1587
1588 // Return
1589 let span = tgt::combine_switch_targets_span(&switch);
1590 let span = combine_span(&src_span, &span);
1591 let st = tgt::RawStatement::Switch(switch);
1592 tgt::Statement::new(span, st).into_block()
1593 }
1594 }
1595}
1596
1597/// Return `true` if whatever the path we take, evaluating the statement
1598/// necessarily leads to:
1599/// - a panic or return
1600/// - a break which goes to a loop outside the expression
1601/// - a continue statement
1602fn is_terminal(block: &tgt::Block) -> bool {
1603 is_terminal_explore_block(0, block)
1604}
1605
1606fn is_terminal_explore(num_loops: usize, st: &tgt::Statement) -> bool {
1607 match &st.content {
1608 tgt::RawStatement::Assign(_, _)
1609 | tgt::RawStatement::SetDiscriminant(_, _)
1610 | tgt::RawStatement::StorageLive(_)
1611 | tgt::RawStatement::StorageDead(_)
1612 | tgt::RawStatement::Deinit(_)
1613 | tgt::RawStatement::Drop(_)
1614 | tgt::RawStatement::Assert(_)
1615 | tgt::RawStatement::Call(_)
1616 | tgt::RawStatement::Nop
1617 | tgt::RawStatement::Error(_) => false,
1618 tgt::RawStatement::Abort(..) | tgt::RawStatement::Return => true,
1619 tgt::RawStatement::Break(index) => *index >= num_loops,
1620 tgt::RawStatement::Continue(_index) => true,
1621 tgt::RawStatement::Switch(switch) => switch
1622 .iter_targets()
1623 .all(|tgt_st| is_terminal_explore_block(num_loops, tgt_st)),
1624 tgt::RawStatement::Loop(loop_st) => is_terminal_explore_block(num_loops + 1, loop_st),
1625 }
1626}
1627fn is_terminal_explore_block(num_loops: usize, block: &tgt::Block) -> bool {
1628 block
1629 .statements
1630 .iter()
1631 .any(|st| is_terminal_explore(num_loops, st))
1632}
1633
1634/// Remark: some values are boxed (here, the returned statement) so that they
1635/// are allocated on the heap. This reduces stack usage (we had problems with
1636/// stack overflows in the past). A more efficient solution would be to use loops
1637/// to make this code constant space, but that would require a serious rewriting.
1638fn translate_block(
1639 info: &mut BlockInfo<'_>,
1640 parent_loops: &Vec<src::BlockId>,
1641 switch_exit_blocks: &HashSet<src::BlockId>,
1642 block_id: src::BlockId,
1643) -> tgt::Block {
1644 // If the user activated this check: check that we didn't already translate
1645 // this block, and insert the block id in the set of already translated blocks.
1646 trace!(
1647 "Parent loops: {:?}, Parent switch exits: {:?}, Block id: {}",
1648 parent_loops,
1649 switch_exit_blocks,
1650 block_id
1651 );
1652 if info.no_code_duplication {
1653 assert!(!info.explored.contains(&block_id));
1654 }
1655 info.explored.insert(block_id);
1656
1657 let block = info.body.body.get(block_id).unwrap();
1658
1659 // Check if we enter a loop: if so, update parent_loops and the current_exit_block
1660 let is_loop = info.cfg.loop_entries.contains(&block_id);
1661 let mut nparent_loops: Vec<src::BlockId>;
1662 let nparent_loops = if info.cfg.loop_entries.contains(&block_id) {
1663 nparent_loops = parent_loops.clone();
1664 nparent_loops.push(block_id);
1665 &nparent_loops
1666 } else {
1667 parent_loops
1668 };
1669
1670 // If we enter a switch or a loop, we need to check if we own the exit
1671 // block, in which case we need to append it to the loop/switch body
1672 // in a sequence
1673 let is_switch = block.terminator.content.is_switch();
1674 let next_block = if is_loop {
1675 *info.exits_info.owned_loop_exits.get(&block_id).unwrap()
1676 } else if is_switch {
1677 *info.exits_info.owned_switch_exits.get(&block_id).unwrap()
1678 } else {
1679 None
1680 };
1681
1682 // If we enter a switch, add the exit block to the set
1683 // of outer exit blocks
1684 let nswitch_exit_blocks = if is_switch {
1685 let mut nexit_blocks = switch_exit_blocks.clone();
1686 match next_block {
1687 None => nexit_blocks,
1688 Some(bid) => {
1689 nexit_blocks.insert(bid);
1690 nexit_blocks
1691 }
1692 }
1693 } else {
1694 switch_exit_blocks.clone()
1695 };
1696
1697 // Translate the terminator and the subsequent blocks.
1698 // Note that this terminator is an option: we might ignore it
1699 // (if it is an exit).
1700
1701 let terminator =
1702 translate_terminator(info, nparent_loops, &nswitch_exit_blocks, &block.terminator);
1703
1704 // Translate the statements inside the block
1705 let statements = block
1706 .statements
1707 .iter()
1708 .filter_map(|st| translate_statement(st))
1709 .collect_vec();
1710
1711 // Prepend the statements to the terminator.
1712 let mut block = if let Some(st) = tgt::Block::from_seq(statements) {
1713 st.merge(terminator)
1714 } else {
1715 terminator
1716 };
1717
1718 if is_loop {
1719 // Put the loop body inside a `Loop`.
1720 block = tgt::Statement::new(block.span, tgt::RawStatement::Loop(block)).into_block()
1721 } else if is_switch {
1722 if next_block.is_some() {
1723 // Sanity check: if there is an exit block, this block must be
1724 // reachable (i.e, there must exist a path in the switch which
1725 // doesn't end with `panic`, `return`, etc.).
1726 assert!(!is_terminal(&block));
1727 }
1728 } else {
1729 assert!(next_block.is_none());
1730 }
1731
1732 // Concatenate the exit expression, if needs be
1733 if let Some(exit_block_id) = next_block {
1734 let next_block = ensure_sufficient_stack(|| {
1735 translate_block(info, parent_loops, switch_exit_blocks, exit_block_id)
1736 });
1737 block = block.merge(next_block);
1738 }
1739
1740 block
1741}
1742
1743fn translate_body_aux(
1744 ctx: &mut TransformCtx,
1745 no_code_duplication: bool,
1746 src_body: &src::ExprBody,
1747) -> tgt::ExprBody {
1748 // Explore the function body to create the control-flow graph without backward
1749 // edges, and identify the loop entries (which are destinations of backward edges).
1750 let cfg_info = build_cfg_info(src_body);
1751 trace!("cfg_info: {:?}", cfg_info);
1752
1753 // Find the exit block for all the loops and switches, if such an exit point
1754 // exists.
1755 let exits_info = compute_loop_switch_exits(ctx, src_body, &cfg_info);
1756
1757 // Debugging
1758 trace!("exits_info:\n{:?}", exits_info);
1759
1760 // Translate the body by reconstructing the loops and the
1761 // conditional branchings.
1762 // Note that we shouldn't get `None`.
1763 let mut explored = HashSet::new();
1764 let mut info = BlockInfo {
1765 no_code_duplication,
1766 cfg: &cfg_info,
1767 body: src_body,
1768 exits_info: &exits_info,
1769 explored: &mut explored,
1770 };
1771 let tgt_body = translate_block(&mut info, &Vec::new(), &HashSet::new(), src::BlockId::ZERO);
1772
1773 // Sanity: check that we translated all the blocks
1774 for (bid, _) in src_body.body.iter_indexed_values() {
1775 assert!(explored.contains(&bid));
1776 }
1777
1778 tgt::ExprBody {
1779 span: src_body.span,
1780 locals: src_body.locals.clone(),
1781 comments: src_body.comments.clone(),
1782 body: tgt_body,
1783 }
1784}
1785
1786fn translate_body(ctx: &mut TransformCtx, no_code_duplication: bool, body: &mut gast::Body) {
1787 use gast::Body::{Structured, Unstructured};
1788 let Unstructured(src_body) = body else {
1789 panic!("Called `ullbc_to_llbc` on an already restructured body")
1790 };
1791 trace!("About to translate to ullbc: {:?}", src_body.span);
1792 let tgt_body = translate_body_aux(ctx, no_code_duplication, src_body);
1793 *body = Structured(tgt_body);
1794}
1795
1796pub struct Transform;
1797impl TransformPass for Transform {
1798 fn transform_ctx(&self, ctx: &mut TransformCtx) {
1799 // Translate the bodies one at a time.
1800 ctx.for_each_body(|ctx, body| {
1801 translate_body(ctx, ctx.options.no_code_duplication, body);
1802 });
1803
1804 // Print the functions
1805 let fmt_ctx = ctx.into_fmt();
1806 for fun in &ctx.translated.fun_decls {
1807 trace!(
1808 "# Signature:\n{}\n\n# Function definition:\n{}\n",
1809 fmt_ctx.format_object(&fun.signature),
1810 fmt_ctx.format_object(fun),
1811 );
1812 }
1813 // Print the global variables
1814 for global in &ctx.translated.global_decls {
1815 trace!(
1816 "# Type:\n{}\n\n# Global definition:\n{}\n",
1817 fmt_ctx.format_object(&global.ty),
1818 fmt_ctx.format_object(global)
1819 );
1820 }
1821
1822 if ctx.options.print_built_llbc {
1823 info!("# LLBC resulting from control-flow reconstruction:\n\n{ctx}\n",);
1824 }
1825 }
1826}