charon_lib/transform/ullbc_to_llbc.rs
1//! ULLBC to LLBC
2//!
3//! We reconstruct the control-flow in the Unstructured LLBC.
4//!
5//! The reconstruction algorithm is not written to be efficient (its complexity
6//! is probably very bad), but it was not written to be: this is still an early
7//! stage and we want the algorithm to generate the best reconstruction as
8//! possible. We still need to test the algorithm on more interesting examples,
9//! and will consider making it more efficient once it is a bit mature and well
10//! tested.
11//! Also note that we more importantly focus on making the algorithm sound: the
12//! reconstructed program must always be equivalent to the original MIR program,
13//! and the fact that the reconstruction preserves this property must be obvious.
14//!
15//! Finally, we conjecture the execution time shouldn't be too much a problem:
16//! we don't expect the translation mechanism to be applied on super huge functions
17//! (which will be difficult to formally analyze), and the MIR graphs are actually
18//! not that big because statements are grouped into code blocks (a block is made
19//! of a list of statements, followed by a terminator - branchings and jumps can
20//! only be performed by terminators -, meaning that MIR graphs don't have that
21//! many nodes and edges).
22
23use crate::ast::*;
24use crate::common::ensure_sufficient_stack;
25use crate::errors::sanity_check;
26use crate::formatter::IntoFormatter;
27use crate::llbc_ast as tgt;
28use crate::meta::{Span, combine_span};
29use crate::pretty::FmtWithCtx;
30use crate::transform::TransformCtx;
31use crate::ullbc_ast::{self as src};
32use indexmap::IndexMap;
33use itertools::Itertools;
34use num_bigint::BigInt;
35use num_rational::BigRational;
36use petgraph::Direction;
37use petgraph::algo::toposort;
38use petgraph::graphmap::DiGraphMap;
39use std::collections::{BTreeSet, HashMap, HashSet, VecDeque};
40
41use super::ctx::TransformPass;
42
43/// Control-Flow Graph
44type Cfg = DiGraphMap<src::BlockId, ()>;
45
46/// Small utility
47struct BlockInfo<'a> {
48 cfg: &'a CfgInfo,
49 body: &'a src::ExprBody,
50 exits_info: &'a ExitInfo,
51 explored: &'a mut HashSet<src::BlockId>,
52}
53
54/// This structure contains various information about a function's CFG.
55#[derive(Debug)]
56struct CfgInfo {
57 /// The CFG
58 pub cfg: Cfg,
59 /// The CFG where all the backward edges have been removed
60 pub cfg_no_be: Cfg,
61 /// We consider the destination of the backward edges to be loop entries and
62 /// store them here.
63 pub loop_entries: HashSet<src::BlockId>,
64 /// The backward edges
65 pub backward_edges: HashSet<(src::BlockId, src::BlockId)>,
66 /// The blocks whose terminators are a switch are stored here.
67 pub switch_blocks: HashSet<src::BlockId>,
68 /// The set of nodes from where we can only reach error nodes (panic, etc.)
69 pub only_reach_error: HashSet<src::BlockId>,
70}
71
72/// Build the CFGs (the "regular" CFG and the CFG without backward edges) and
73/// compute some information like the loop entries and the switch blocks.
74fn build_cfg_info(body: &src::ExprBody) -> CfgInfo {
75 let mut cfg = CfgInfo {
76 cfg: Cfg::new(),
77 cfg_no_be: Cfg::new(),
78 loop_entries: HashSet::new(),
79 backward_edges: HashSet::new(),
80 switch_blocks: HashSet::new(),
81 only_reach_error: HashSet::new(),
82 };
83
84 // Add the nodes
85 for block_id in body.body.iter_indices() {
86 cfg.cfg.add_node(block_id);
87 cfg.cfg_no_be.add_node(block_id);
88 }
89
90 // Add the edges
91 let ancestors = HashSet::new();
92 let mut explored = HashSet::new();
93 build_cfg_partial_info_edges(
94 &mut cfg,
95 &ancestors,
96 &mut explored,
97 body,
98 src::BlockId::ZERO,
99 );
100
101 cfg
102}
103
104fn block_is_switch(body: &src::ExprBody, block_id: src::BlockId) -> bool {
105 let block = body.body.get(block_id).unwrap();
106 block.terminator.content.is_switch()
107}
108
109/// The terminator of the block is a panic, etc.
110fn block_is_error(body: &src::ExprBody, block_id: src::BlockId) -> bool {
111 let block = body.body.get(block_id).unwrap();
112 use src::RawTerminator::*;
113 match &block.terminator.content {
114 Abort(..) => true,
115 Goto { .. } | Switch { .. } | Return | Call { .. } | UnwindResume => false,
116 }
117}
118
119fn build_cfg_partial_info_edges(
120 cfg: &mut CfgInfo,
121 ancestors: &HashSet<src::BlockId>,
122 explored: &mut HashSet<src::BlockId>,
123 body: &src::ExprBody,
124 block_id: src::BlockId,
125) {
126 // Check if we already explored the current node
127 if explored.contains(&block_id) {
128 return;
129 }
130 explored.insert(block_id);
131
132 // Insert the current block in the set of ancestors blocks
133 let mut ancestors = ancestors.clone();
134 ancestors.insert(block_id);
135
136 // Check if it is a switch
137 if block_is_switch(body, block_id) {
138 cfg.switch_blocks.insert(block_id);
139 }
140
141 // Retrieve the block targets
142 let mut targets = body.body.get(block_id).unwrap().targets();
143 // Hack: we don't translate unwind paths in llbc so we ignore them here.
144 if let src::RawTerminator::Call { target, .. } =
145 body.body.get(block_id).unwrap().terminator.content
146 {
147 targets = vec![target];
148 }
149
150 let mut has_backward_edge = false;
151
152 // Add edges for all the targets and explore them, if they are not predecessors
153 for tgt in &targets {
154 // Insert the edge in the "regular" CFG
155 cfg.cfg.add_edge(block_id, *tgt, ());
156
157 // We need to check if it is a backward edge before inserting it in the
158 // CFG without backward edges and exploring it
159 if ancestors.contains(tgt) {
160 // This is a backward edge
161 has_backward_edge = true;
162 cfg.loop_entries.insert(*tgt);
163 cfg.backward_edges.insert((block_id, *tgt));
164 } else {
165 // Not a backward edge: insert the edge and explore
166 cfg.cfg_no_be.add_edge(block_id, *tgt, ());
167 ensure_sufficient_stack(|| {
168 build_cfg_partial_info_edges(cfg, &ancestors, explored, body, *tgt);
169 })
170 }
171 }
172
173 // Check if this node can only reach error nodes:
174 // - we check if the current node ends with an error terminator
175 // - or check that all the targets lead to error nodes
176 // Note that if there is a backward edge, we consider that we don't necessarily
177 // go to error.
178 if !has_backward_edge
179 && (block_is_error(body, block_id)
180 || (
181 // The targets are empty if this is an error node *or* a return node
182 !targets.is_empty() && targets.iter().all(|tgt| cfg.only_reach_error.contains(tgt))
183 ))
184 {
185 cfg.only_reach_error.insert(block_id);
186 }
187}
188
189#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
190struct BlockWithRank<T> {
191 /// A "rank" quantity that we use to order the nodes.
192 /// By placing the `rank` field before the `id`, we make sure that
193 /// we use the rank first in the lexicographic order.
194 rank: T,
195 id: src::BlockId,
196}
197
198type OrdBlockId = BlockWithRank<usize>;
199
200/// For the rank we use:
201/// - a "flow" quantity (see [BlocksInfo])
202/// - the *inverse* rank in the topological sort (i.e., `- topo_rank`)
203type FlowBlockId = BlockWithRank<(BigRational, isize)>;
204
205#[derive(Debug, Clone)]
206struct LoopExitCandidateInfo {
207 /// The occurrences going to this exit.
208 /// For every occurrence, we register the distance between the loop entry
209 /// and the node from which the edge going to the exit starts.
210 /// If later we have to choose between candidates with the same number
211 /// of occurrences, we choose the one with the smallest distances.
212 ///
213 /// Note that it *may* happen that we have several exit candidates referenced
214 /// more than once for one loop. This comes from the fact that whenever we
215 /// reach a node from which the loop entry is not reachable (without using a
216 /// backward edge leading to an outer loop entry), we register this node
217 /// as well as all its children as exit candidates.
218 /// Consider the following example:
219 /// ```text
220 /// while i < max {
221 /// if cond {
222 /// break;
223 /// }
224 /// s += i;
225 /// i += 1
226 /// }
227 /// // All the below nodes are exit candidates (each of them is referenced twice)
228 /// s += 1;
229 /// return s;
230 /// ```
231 pub occurrences: Vec<usize>,
232}
233
234/// Check if a loop entry is reachable from a node, in a graph where we remove
235/// the backward edges going directly to the loop entry.
236///
237/// If the loop entry is not reachable, it means that:
238/// - the loop entry is not reachable at all
239/// - or it is only reachable through an outer loop
240///
241/// The starting node should be a (transitive) child of the loop entry.
242/// We use this to find candidates for loop exits.
243fn loop_entry_is_reachable_from_inner(
244 cfg: &CfgInfo,
245 loop_entry: src::BlockId,
246 block_id: src::BlockId,
247) -> bool {
248 // It is reachable in the complete graph. Check if it is reachable by not
249 // going through backward edges which go to outer loops. In practice, we
250 // just need to forbid the use of any backward edges at the exception of
251 // those which go directly to the current loop's entry. This means that we
252 // ignore backward edges to outer loops of course, but also backward edges
253 // to inner loops because we shouldn't need to follow those (there should be
254 // more direct paths).
255
256 // Explore the graph starting at block_id
257 let mut explored: HashSet<src::BlockId> = HashSet::new();
258 let mut stack: VecDeque<src::BlockId> = VecDeque::new();
259 stack.push_back(block_id);
260 while !stack.is_empty() {
261 let bid = stack.pop_front().unwrap();
262 if explored.contains(&bid) {
263 continue;
264 }
265 explored.insert(bid);
266
267 let next_ids = cfg.cfg.neighbors_directed(bid, Direction::Outgoing);
268 for next_id in next_ids {
269 // Check if this is a backward edge
270 if cfg.backward_edges.contains(&(bid, next_id)) {
271 // Backward edge: only allow those going directly to the current
272 // loop's entry
273 if next_id == loop_entry {
274 // The loop entry is reachable
275 return true;
276 } else {
277 // Forbidden edge: ignore
278 continue;
279 }
280 } else {
281 // Nothing special: add the node to the stack for further
282 // exploration
283 stack.push_back(next_id);
284 }
285 }
286 }
287
288 // The loop entry is not reachable
289 false
290}
291
292struct FilteredLoopParents {
293 remaining_parents: Vec<(src::BlockId, usize)>,
294 removed_parents: Vec<(src::BlockId, usize)>,
295}
296
297fn filter_loop_parents(
298 cfg: &CfgInfo,
299 parent_loops: &Vec<(src::BlockId, usize)>,
300 block_id: src::BlockId,
301) -> Option<FilteredLoopParents> {
302 let mut eliminate: usize = 0;
303 for (loop_id, _ldist) in parent_loops.iter().rev() {
304 if !loop_entry_is_reachable_from_inner(cfg, *loop_id, block_id) {
305 eliminate += 1;
306 } else {
307 break;
308 }
309 }
310
311 if eliminate > 0 {
312 // Split the vector of parents
313 let (remaining_parents, removed_parents) =
314 parent_loops.split_at(parent_loops.len() - eliminate);
315 let (mut remaining_parents, removed_parents) =
316 (remaining_parents.to_vec(), removed_parents.to_vec());
317
318 // Update the distance to the last loop - we just increment the distance
319 // by 1, because from the point of view of the parent loop, we just exited
320 // a block and go to the next sequence of instructions.
321 if !remaining_parents.is_empty() {
322 remaining_parents.last_mut().unwrap().1 += 1;
323 }
324
325 Some(FilteredLoopParents {
326 remaining_parents,
327 removed_parents,
328 })
329 } else {
330 None
331 }
332}
333
334/// List the nodes reachable from a starting point.
335/// We list the nodes and the depth (in the AST) at which they were found.
336fn list_reachable(cfg: &Cfg, start: src::BlockId) -> HashMap<src::BlockId, usize> {
337 let mut reachable: HashMap<src::BlockId, usize> = HashMap::new();
338 let mut stack: VecDeque<(src::BlockId, usize)> = VecDeque::new();
339 stack.push_back((start, 0));
340
341 while !stack.is_empty() {
342 let (bid, dist) = stack.pop_front().unwrap();
343
344 // Ignore this node if we already registered it with a better distance
345 match reachable.get(&bid) {
346 None => (),
347 Some(original_dist) => {
348 if *original_dist < dist {
349 continue;
350 }
351 }
352 }
353
354 // Inset the node with its distance
355 reachable.insert(bid, dist);
356
357 // Add the children to the stack
358 for child in cfg.neighbors(bid) {
359 stack.push_back((child, dist + 1));
360 }
361 }
362
363 // Return
364 reachable
365}
366
367/// Register a node and its children as exit candidates for a list of
368/// parent loops.
369fn register_children_as_loop_exit_candidates(
370 cfg: &CfgInfo,
371 loop_exits: &mut HashMap<src::BlockId, IndexMap<src::BlockId, LoopExitCandidateInfo>>,
372 removed_parent_loops: &Vec<(src::BlockId, usize)>,
373 block_id: src::BlockId,
374) {
375 // List the reachable nodes
376 let reachable = list_reachable(&cfg.cfg_no_be, block_id);
377
378 let mut base_dist = 0;
379 // For every parent loop, in reverse order (we go from last to first in
380 // order to correctly compute the distances)
381 for (loop_id, loop_dist) in removed_parent_loops.iter().rev() {
382 // Update the distance to the loop entry
383 base_dist += *loop_dist;
384
385 // Retrieve the candidates
386 let candidates = loop_exits.get_mut(loop_id).unwrap();
387
388 // Update them
389 for (bid, dist) in reachable.iter() {
390 let distance = base_dist + *dist;
391 match candidates.get_mut(bid) {
392 None => {
393 candidates.insert(
394 *bid,
395 LoopExitCandidateInfo {
396 occurrences: vec![distance],
397 },
398 );
399 }
400 Some(c) => {
401 c.occurrences.push(distance);
402 }
403 }
404 }
405 }
406}
407
408/// Compute the loop exit candidates.
409///
410/// There may be several candidates with the same "optimality" (same number of
411/// occurrences, etc.), in which case we choose the first one which was registered
412/// (the order in which we explore the graph is deterministic): this is why we
413/// store the candidates in a linked hash map.
414fn compute_loop_exit_candidates(
415 cfg: &CfgInfo,
416 explored: &mut HashSet<src::BlockId>,
417 ordered_loops: &mut Vec<src::BlockId>,
418 loop_exits: &mut HashMap<src::BlockId, IndexMap<src::BlockId, LoopExitCandidateInfo>>,
419 // List of parent loops, with the distance to the entry of the loop (the distance
420 // is the distance between the current node and the loop entry for the last parent,
421 // and the distance between the parents for the others).
422 mut parent_loops: Vec<(src::BlockId, usize)>,
423 block_id: src::BlockId,
424) {
425 if explored.contains(&block_id) {
426 return;
427 }
428 explored.insert(block_id);
429
430 // Check if we enter a loop - add the corresponding node if necessary
431 if cfg.loop_entries.contains(&block_id) {
432 parent_loops.push((block_id, 1));
433 ordered_loops.push(block_id);
434 } else {
435 // Increase the distance with the parent loop
436 if !parent_loops.is_empty() {
437 parent_loops.last_mut().unwrap().1 += 1;
438 }
439 };
440
441 // Retrieve the children - note that we ignore the back edges
442 let children = cfg.cfg_no_be.neighbors(block_id);
443 for child in children {
444 // If the parent loop entry is not reachable from the child without going
445 // through a backward edge which goes directly to the loop entry, consider
446 // this node a potential exit.
447 ensure_sufficient_stack(|| {
448 match filter_loop_parents(cfg, &parent_loops, child) {
449 None => {
450 compute_loop_exit_candidates(
451 cfg,
452 explored,
453 ordered_loops,
454 loop_exits,
455 parent_loops.clone(),
456 child,
457 );
458 }
459 Some(fparent_loops) => {
460 // We filtered some parent loops: it means this child and its
461 // children are loop exit candidates for all those loops: we must
462 // thus register them.
463 // Note that we register the child *and* its children: the reason
464 // is that we might do something *then* actually jump to the exit.
465 // For instance, the following block of code:
466 // ```
467 // if cond { break; } else { ... }
468 // ```
469 //
470 // Gets translated in MIR to something like this:
471 // ```
472 // bb1: {
473 // if cond -> bb2 else -> bb3; // bb2 is not the real exit
474 // }
475 //
476 // bb2: {
477 // goto bb4; // bb4 is the real exit
478 // }
479 // ```
480 register_children_as_loop_exit_candidates(
481 cfg,
482 loop_exits,
483 &fparent_loops.removed_parents,
484 child,
485 );
486
487 // Explore, with the filtered parents
488 compute_loop_exit_candidates(
489 cfg,
490 explored,
491 ordered_loops,
492 loop_exits,
493 fparent_loops.remaining_parents,
494 child,
495 );
496 }
497 }
498 })
499 }
500}
501
502/// See [`compute_loop_switch_exits`] for
503/// explanations about what "exits" are.
504///
505/// The following function computes the loop exits. It acts as follows.
506///
507/// We keep track of a stack of the loops in which we entered.
508/// It is very easy to check when we enter a loop: loop entries are destinations
509/// of backward edges, which can be spotted with a simple graph exploration (see
510/// [`build_cfg_partial_info_edges`].
511/// The criteria to consider whether we exit a loop is the following:
512/// - we exit a loop if we go to a block from which we can't reach the loop
513/// entry at all
514/// - or if we can reach the loop entry, but must use a backward edge which goes
515/// to an outer loop
516///
517/// It is better explained on the following example:
518/// ```text
519/// 'outer while i < max {
520/// 'inner while j < max {
521/// j += 1;
522/// }
523/// // (i)
524/// i += 1;
525/// }
526/// ```
527/// If we enter the inner loop then go to (i) from the inner loop, we consider
528/// that we exited the outer loop because:
529/// - we can reach the entry of the inner loop from (i) (by finishing then
530/// starting again an iteration of the outer loop)
531/// - but doing this requires taking a backward edge which goes to the outer loop
532///
533/// Whenever we exit a loop, we save the block we went to as an exit candidate
534/// for this loop. Note that there may by many exit candidates. For instance,
535/// in the below example:
536/// ```text
537/// while ... {
538/// ...
539/// if ... {
540/// // We can't reach the loop entry from here: this is an exit
541/// // candidate
542/// return;
543/// }
544/// }
545/// // This is another exit candidate - and this is the one we want to use
546/// // as the "real" exit...
547/// ...
548/// ```
549///
550/// Also note that it may happen that we go several times to the same exit (if
551/// we use breaks for instance): we record the number of times an exit candidate
552/// is used.
553///
554/// Once we listed all the exit candidates, we find the "best" one for every
555/// loop, starting with the outer loops. We start with outer loops because
556/// inner loops might use breaks to exit to the exit of outer loops: if we
557/// start with the inner loops, the exit which is "natural" for the outer loop
558/// might end up being used for one of the inner loops...
559///
560/// The best exit is the following one:
561/// - it is the one which is used the most times (note that there can be
562/// several candidates which are referenced strictly more than once: see the
563/// comment below)
564/// - if several exits have the same number of occurrences, we choose the one
565/// for which we goto the "earliest" (earliest meaning that the goto is close to
566/// the loop entry node in the AST). The reason is that all the loops should
567/// have an outer if ... then ... else ... which executes the loop body or goes
568/// to the exit (note that this is not necessarily the first
569/// if ... then ... else ... we find: loop conditions can be arbitrary
570/// expressions, containing branchings).
571///
572/// # Several candidates for a loop exit:
573/// =====================================
574/// There used to be a sanity check to ensure there are no two different
575/// candidates with exactly the same number of occurrences and distance from
576/// the entry of the loop, if the number of occurrences is > 1.
577///
578/// We removed it because it does happen, for instance here (the match
579/// introduces an `unreachable` node, and it has the same number of
580/// occurrences and the same distance to the loop entry as the `panic`
581/// node):
582///
583/// ```text
584/// pub fn list_nth_mut_loop_pair<'a, T>(
585/// mut ls: &'a mut List<T>,
586/// mut i: u32,
587/// ) -> &'a mut T {
588/// loop {
589/// match ls {
590/// List::Nil => {
591/// panic!() // <-- best candidate
592/// }
593/// List::Cons(x, tl) => {
594/// if i == 0 {
595/// return x;
596/// } else {
597/// ls = tl;
598/// i -= 1;
599/// }
600/// }
601/// _ => {
602/// // Note that Rustc always introduces an unreachable branch after
603/// // desugaring matches.
604/// unreachable!(), // <-- best candidate
605/// }
606/// }
607/// }
608/// }
609/// ```
610///
611/// When this happens we choose an exit candidate whose edges don't necessarily
612/// lead to an error (above there are none, so we don't choose any exits). Note
613/// that this last condition is important to prevent loops from being unnecessarily
614/// nested:
615///
616/// ```text
617/// pub fn nested_loops_enum(step_out: usize, step_in: usize) -> usize {
618/// let mut s = 0;
619///
620/// for _ in 0..128 { // We don't want this loop to be nested with the loops below
621/// s += 1;
622/// }
623///
624/// for _ in 0..(step_out) {
625/// for _ in 0..(step_in) {
626/// s += 1;
627/// }
628/// }
629///
630/// s
631/// }
632/// ```
633fn compute_loop_exits(
634 ctx: &mut TransformCtx,
635 body: &src::ExprBody,
636 cfg: &CfgInfo,
637) -> HashMap<src::BlockId, Option<src::BlockId>> {
638 let mut explored = HashSet::new();
639 let mut ordered_loops = Vec::new();
640 let mut loop_exits = HashMap::new();
641
642 // Initialize the loop exits candidates
643 for loop_id in &cfg.loop_entries {
644 loop_exits.insert(*loop_id, IndexMap::new());
645 }
646
647 // Compute the candidates
648 compute_loop_exit_candidates(
649 cfg,
650 &mut explored,
651 &mut ordered_loops,
652 &mut loop_exits,
653 Vec::new(),
654 src::BlockId::ZERO,
655 );
656
657 {
658 // Debugging
659 let candidates: Vec<String> = loop_exits
660 .iter()
661 .map(|(loop_id, candidates)| format!("{loop_id} -> {candidates:?}"))
662 .collect();
663 trace!("Loop exit candidates:\n{}", candidates.join("\n"));
664 }
665
666 // Choose one candidate among the potential candidates.
667 let mut exits: HashSet<src::BlockId> = HashSet::new();
668 let mut chosen_loop_exits: HashMap<src::BlockId, Option<src::BlockId>> = HashMap::new();
669 // For every loop
670 for loop_id in ordered_loops {
671 // Check the candidates.
672 // Ignore the candidates which have already been chosen as exits for other
673 // loops (which should be outer loops).
674 // We choose the exit with:
675 // - the most occurrences
676 // - the least total distance (if there are several possibilities)
677 // - doesn't necessarily lead to an error (panic, unreachable)
678
679 // First:
680 // - filter the candidates
681 // - compute the number of occurrences
682 // - compute the sum of distances
683 // TODO: we could simply order by using a lexicographic order
684 let loop_exits = loop_exits
685 .get(&loop_id)
686 .unwrap()
687 .iter()
688 // If candidate already selected for another loop: ignore
689 .filter(|(candidate_id, _)| !exits.contains(candidate_id))
690 .map(|(candidate_id, candidate_info)| {
691 let num_occurrences = candidate_info.occurrences.len();
692 let dist_sum = candidate_info.occurrences.iter().sum();
693 (*candidate_id, num_occurrences, dist_sum)
694 })
695 .collect_vec();
696
697 trace!(
698 "Loop {}: possible exits:\n{}",
699 loop_id,
700 loop_exits
701 .iter()
702 .map(|(bid, occs, dsum)| format!(
703 "{bid} -> {{ occurrences: {occs}, dist_sum: {dsum} }}",
704 ))
705 .collect::<Vec<String>>()
706 .join("\n")
707 );
708
709 // Second: actually select the proper candidate.
710
711 // We find the one with the highest occurrence and the smallest distance
712 // from the entry of the loop (note that we take care of listing the exit
713 // candidates in a deterministic order).
714 let mut best_exit: Option<src::BlockId> = None;
715 let mut best_occurrences = 0;
716 let mut best_dist_sum = std::usize::MAX;
717 for (candidate_id, occurrences, dist_sum) in &loop_exits {
718 if (*occurrences > best_occurrences)
719 || (*occurrences == best_occurrences && *dist_sum < best_dist_sum)
720 {
721 best_exit = Some(*candidate_id);
722 best_occurrences = *occurrences;
723 best_dist_sum = *dist_sum;
724 }
725 }
726
727 let possible_candidates: Vec<_> = loop_exits
728 .iter()
729 .filter_map(|(bid, occs, dsum)| {
730 if *occs == best_occurrences && *dsum == best_dist_sum {
731 Some(*bid)
732 } else {
733 None
734 }
735 })
736 .collect();
737 let num_possible_candidates = loop_exits.len();
738
739 // If there is exactly one one best candidate, it is easy.
740 // Otherwise we need to split further.
741 if num_possible_candidates > 1 {
742 trace!("Best candidates: {:?}", possible_candidates);
743 // TODO: if we use a lexicographic order we can merge this with the code
744 // above.
745 // Remove the candidates which only lead to errors (panic or unreachable).
746 let candidates: Vec<_> = possible_candidates
747 .iter()
748 .filter(|bid| !cfg.only_reach_error.contains(bid))
749 .collect();
750 // If there is exactly one candidate we select it
751 if candidates.len() == 1 {
752 let exit_id = *candidates[0];
753 exits.insert(exit_id);
754 trace!("Loop {loop_id}: selected the best exit candidate {exit_id}");
755 chosen_loop_exits.insert(loop_id, Some(exit_id));
756 } else {
757 // Otherwise we do not select any exit.
758 // We don't want to select any exit if we are in the below situation
759 // (all paths lead to errors). We added a sanity check below to
760 // catch the situations where there are several exits which don't
761 // lead to errors.
762 //
763 // Example:
764 // ========
765 // ```
766 // loop {
767 // match ls {
768 // List::Nil => {
769 // panic!() // <-- best candidate
770 // }
771 // List::Cons(x, tl) => {
772 // if i == 0 {
773 // return x;
774 // } else {
775 // ls = tl;
776 // i -= 1;
777 // }
778 // }
779 // _ => {
780 // unreachable!(); // <-- best candidate (Rustc introduces an `unreachable` case)
781 // }
782 // }
783 // }
784 // ```
785 //
786 // Adding this sanity check so that we can see when there are
787 // several candidates.
788 let span = body.body[loop_id].terminator.span; // Taking *a* span from the block
789 sanity_check!(ctx, span, candidates.is_empty());
790 trace!(
791 "Loop {loop_id}: did not select an exit candidate because they all lead to panics"
792 );
793 chosen_loop_exits.insert(loop_id, None);
794 }
795 } else {
796 // Register the exit, if there is one
797 match best_exit {
798 None => {
799 // No exit was found
800 trace!("Loop {loop_id}: could not find an exit candidate");
801 chosen_loop_exits.insert(loop_id, None);
802 }
803 Some(exit_id) => {
804 exits.insert(exit_id);
805 trace!("Loop {loop_id}: selected the unique exit candidate {exit_id}");
806 chosen_loop_exits.insert(loop_id, Some(exit_id));
807 }
808 }
809 }
810 }
811
812 // Return the chosen exits
813 trace!("Chosen loop exits: {:?}", chosen_loop_exits);
814 chosen_loop_exits
815}
816
817/// Information used to compute the switch exits.
818/// We compute this information for every block in the graph.
819/// Note that we make sure to use immutable sets because we rely a lot
820/// on cloning.
821#[derive(Debug, Clone)]
822struct BlocksInfo {
823 id: src::BlockId,
824 /// All the successors of the block
825 succs: BTreeSet<OrdBlockId>,
826 /// Let's say we put a quantity of water equal to 1 on the block, and the
827 /// water flows downards. Whenever there is a branching, the quantity of
828 /// water gets equally divided between the branches. When the control flows
829 /// join, we put the water back together. The set below computes the amount
830 /// of water received by each descendant of the node.
831 ///
832 /// TODO: there must be a known algorithm which computes this, right?...
833 /// This is exactly this problems:
834 /// <https://stackoverflow.com/questions/78221666/algorithm-for-total-flow-through-weighted-directed-acyclic-graph>
835 /// TODO: the way I compute this is not efficient.
836 ///
837 /// Remark: in order to rank the nodes, we also use the negation of the
838 /// rank given by the topological order. The last elements of the set
839 /// have the highest flow, that is they are the nodes to which the maximum
840 /// number of paths converge. If several nodes have the same flow, we want
841 /// to take the highest one in the hierarchy: hence the use of the inverse
842 /// of the topological rank.
843 ///
844 /// Ex.:
845 /// ```text
846 /// A -- we start here
847 /// |
848 /// |---------------------------------------
849 /// | | | |
850 /// B:(0.25,-1) C:(0.25,-2) D:(0.25,-3) E:(0.25,-4)
851 /// | | |
852 /// |--------------------------
853 /// |
854 /// F:(0.75,-5)
855 /// |
856 /// |
857 /// G:(0.75,-6)
858 /// ```
859 /// The "best" node (with the highest (flow, rank) in the graph above is F.
860 flow: BTreeSet<FlowBlockId>,
861}
862
863/// Create an [OrdBlockId] from a block id and a rank given by a map giving
864/// a sort (topological in our use cases) over the graph.
865fn make_ord_block_id(
866 block_id: src::BlockId,
867 sort_map: &HashMap<src::BlockId, usize>,
868) -> OrdBlockId {
869 let rank = *sort_map.get(&block_id).unwrap();
870 OrdBlockId { id: block_id, rank }
871}
872
873/// Compute [BlocksInfo] for every block in the graph.
874/// This information is then used to compute the switch exits.
875fn compute_switch_exits_explore(
876 cfg: &CfgInfo,
877 tsort_map: &HashMap<src::BlockId, usize>,
878 memoized: &mut HashMap<src::BlockId, BlocksInfo>,
879 block_id: src::BlockId,
880) {
881 // Check if we already computer the info
882 if memoized.get(&block_id).is_some() {
883 return;
884 }
885
886 // Compute the block information for the children
887 let children_ids: Vec<src::BlockId> = cfg.cfg_no_be.neighbors(block_id).collect_vec();
888 children_ids
889 .iter()
890 .for_each(|bid| compute_switch_exits_explore(cfg, tsort_map, memoized, *bid));
891
892 // Retrieve the information
893 let children: Vec<&BlocksInfo> = children_ids
894 .iter()
895 .map(|bid| memoized.get(bid).unwrap())
896 .collect();
897
898 // Compute the successors
899 // Add the children themselves in their sets of successors
900 let all_succs: BTreeSet<OrdBlockId> = children.iter().fold(BTreeSet::new(), |acc, s| {
901 // Add the successors to the accumulator
902 let mut acc: BTreeSet<_> = acc.union(&s.succs).copied().collect();
903 // Add the child itself
904 acc.insert(make_ord_block_id(s.id, tsort_map));
905 acc
906 });
907
908 // Compute the "flows" (if there are children)
909 // TODO: this is computationally expensive...
910 let mut flow: HashMap<src::BlockId, (BigRational, isize)> = HashMap::new();
911
912 if children.len() > 0 {
913 // We need to divide the initial flow equally between the children
914 let factor = BigRational::new(BigInt::from(1), BigInt::from(children.len()));
915
916 // Small helper
917 let mut add_to_flow = |(id, f0): (src::BlockId, (BigRational, isize))| match flow.get(&id) {
918 None => flow.insert(id, f0),
919 Some(f1) => {
920 let f = f0.0 + f1.0.clone();
921 assert!(f0.1 == f1.1);
922 flow.insert(id, (f, f0.1))
923 }
924 };
925
926 // For each child, multiply the flows of its own children by the ratio,
927 // and add.
928 for child in children {
929 // First, add the child itself
930 let rank = isize::try_from(*tsort_map.get(&child.id).unwrap()).unwrap();
931 add_to_flow((child.id, (factor.clone(), -rank)));
932
933 // Then add its successors
934 for child1 in &child.flow {
935 add_to_flow((
936 child1.id,
937 (factor.clone() * child1.rank.0.clone(), child1.rank.1),
938 ));
939 }
940 }
941 }
942
943 // Put everything in an ordered set: the first block id will be the one with
944 // the highest flow, and in case of equality it will be the one with the
945 // smallest block id.
946 let flow: BTreeSet<FlowBlockId> = flow
947 .into_iter()
948 .map(|(id, rank)| BlockWithRank { rank, id })
949 .collect();
950
951 trace!("block: {block_id}, all successors: {all_succs:?}, flow: {flow:?}");
952
953 // Memoize
954 let info = BlocksInfo {
955 id: block_id,
956 succs: all_succs,
957 flow,
958 };
959 memoized.insert(block_id, info.clone());
960}
961
962/// Auxiliary helper
963///
964/// Check if it is possible to reach the exit of an outer switch from `start_bid`
965/// without going through the `exit_candidate`. We use the graph without
966/// backward edges.
967fn can_reach_outer_exit(
968 cfg: &CfgInfo,
969 outer_exits: &HashSet<src::BlockId>,
970 start_bid: src::BlockId,
971 exit_candidate: src::BlockId,
972) -> bool {
973 // The stack of blocks
974 let mut stack: Vec<src::BlockId> = vec![start_bid];
975 let mut explored: HashSet<src::BlockId> = HashSet::new();
976
977 while let Some(bid) = stack.pop() {
978 // Check if already explored
979 if explored.contains(&bid) {
980 break;
981 }
982 explored.insert(bid);
983
984 // Check if this is the exit candidate
985 if bid == exit_candidate {
986 // Stop exploring
987 break;
988 }
989
990 // Check if this is an outer exit
991 if outer_exits.contains(&bid) {
992 return true;
993 }
994
995 // Add the children to the stack
996 for child in cfg.cfg_no_be.neighbors(bid) {
997 stack.push(child);
998 }
999 }
1000
1001 false
1002}
1003
1004/// See [`compute_loop_switch_exits`] for
1005/// explanations about what "exits" are.
1006///
1007/// In order to compute the switch exits, we simply recursively compute a
1008/// topologically ordered set of "filtered successors" as follows (note
1009/// that we work in the CFG *without* back edges):
1010/// - for a block which doesn't branch (only one successor), the filtered
1011/// successors is the set of reachable nodes.
1012/// - for a block which branches, we compute the nodes reachable from all
1013/// the children, and find the "best" intersection between those.
1014/// Note that we find the "best" intersection (a pair of branches which
1015/// maximize the intersection of filtered successors) because some branches
1016/// might never join the control-flow of the other branches, if they contain
1017/// a `break`, `return`, `panic`, etc., like here:
1018/// ```text
1019/// if b { x = 3; } { return; }
1020/// y += x;
1021/// ...
1022/// ```
1023/// Note that with nested switches, the branches of the inner switches might
1024/// goto the exits of the outer switches: for this reason, we give precedence
1025/// to the outer switches.
1026fn compute_switch_exits(
1027 cfg: &CfgInfo,
1028 tsort_map: &HashMap<src::BlockId, usize>,
1029) -> HashMap<src::BlockId, Option<src::BlockId>> {
1030 // Compute the successors info map, starting at the root node
1031 let mut succs_info_map = HashMap::new();
1032 trace!(
1033 "- cfg.cfg:\n{:?}\n- cfg.cfg_no_be:\n{:?}\n- cfg.switch_blocks:\n{:?}",
1034 cfg.cfg, cfg.cfg_no_be, cfg.switch_blocks
1035 );
1036 let _ = compute_switch_exits_explore(cfg, tsort_map, &mut succs_info_map, src::BlockId::ZERO);
1037
1038 // We need to give precedence to the outer switches: we thus iterate
1039 // over the switch blocks in topological order.
1040 let mut sorted_switch_blocks: BTreeSet<OrdBlockId> = BTreeSet::new();
1041 for bid in cfg.switch_blocks.iter() {
1042 sorted_switch_blocks.insert(make_ord_block_id(*bid, tsort_map));
1043 }
1044 trace!("sorted_switch_blocks: {:?}", sorted_switch_blocks);
1045
1046 // Debugging: print all the successors
1047 {
1048 trace!("Successors info:\n{}\n", {
1049 let mut out = vec![];
1050 for (bid, info) in &succs_info_map {
1051 out.push(
1052 format!(
1053 "{} -> {{succs: {:?}, flow: {:?}}}",
1054 bid, &info.succs, &info.flow
1055 )
1056 .to_string(),
1057 );
1058 }
1059 out.join("\n")
1060 });
1061 }
1062
1063 // For every node which is a switch, retrieve the exit.
1064 // As the set of intersection of successors is topologically sorted, the
1065 // exit should be the first node in the set (if the set is non empty).
1066 // Also, we need to explore the nodes in topological order, to give
1067 // precedence to the outer switches.
1068 let mut exits_set = HashSet::new();
1069 let mut ord_exits_set = BTreeSet::new();
1070 let mut exits = HashMap::new();
1071 for bid in sorted_switch_blocks {
1072 trace!("Finding exit candidate for: {bid:?}");
1073 let bid = bid.id;
1074 let info = succs_info_map.get(&bid).unwrap();
1075 let succs = &info.flow;
1076 // Find the best successor: this is the last one (with the highest flow,
1077 // and the highest reverse topological rank).
1078 if succs.is_empty() {
1079 trace!("{bid:?} has no successors");
1080 exits.insert(bid, None);
1081 } else {
1082 // We have an exit candidate: check that it was not already
1083 // taken by an external switch
1084 let exit = succs.last().unwrap();
1085 trace!("{bid:?} has an exit candidate: {exit:?}");
1086 if exits_set.contains(&exit.id) {
1087 trace!("Ignoring the exit candidate because already taken by an external switch");
1088 exits.insert(bid, None);
1089 } else {
1090 // It was not taken by an external switch.
1091 //
1092 // We must check that we can't reach the exit of an external
1093 // switch from one of the branches, without going through the
1094 // exit candidate.
1095 // We do this by simply checking that we can't reach any exits
1096 // (and use the fact that we explore the switch by using a
1097 // topological order to not discard valid exit candidates).
1098 //
1099 // The reason is that it can lead to code like the following:
1100 // ```
1101 // if ... { // if #1
1102 // if ... { // if #2
1103 // ...
1104 // // here, we have a `goto b1`, where b1 is the exit
1105 // // of if #2: we thus stop translating the blocks.
1106 // }
1107 // else {
1108 // ...
1109 // // here, we have a `goto b2`, where b2 is the exit
1110 // // of if #1: we thus stop translating the blocks.
1111 // }
1112 // // We insert code for the block b1 here (which is the exit of
1113 // // the exit of if #2). However, this block should only
1114 // // be executed in the branch "then" of the if #2, not in
1115 // // the branch "else".
1116 // ...
1117 // }
1118 // else {
1119 // ...
1120 // }
1121 // ```
1122
1123 // First: we do a quick check (does the set of all successors
1124 // intersect the set of exits for outer blocks?). If yes, we do
1125 // a more precise analysis: we check if we can reach the exit
1126 // *without going through* the exit candidate.
1127 if info.succs.intersection(&ord_exits_set).next().is_none()
1128 || !can_reach_outer_exit(cfg, &exits_set, bid, exit.id)
1129 {
1130 trace!("Keeping the exit candidate");
1131 // No intersection: ok
1132 exits_set.insert(exit.id);
1133 ord_exits_set.insert(make_ord_block_id(exit.id, tsort_map));
1134 exits.insert(bid, Some(exit.id));
1135 } else {
1136 trace!(
1137 "Ignoring the exit candidate because of an intersection with external switches"
1138 );
1139 exits.insert(bid, None);
1140 }
1141 }
1142 }
1143 }
1144
1145 exits
1146}
1147
1148/// The exits of a graph
1149#[derive(Debug, Clone)]
1150struct ExitInfo {
1151 /// The loop exits
1152 loop_exits: HashMap<src::BlockId, Option<src::BlockId>>,
1153 /// Some loop exits actually belong to outer switches. We still need
1154 /// to track them in the loop exits, in order to know when we should
1155 /// insert a break. However, we need to make sure we don't add the
1156 /// corresponding block in a sequence, after having translated the
1157 /// loop, like so:
1158 /// ```text
1159 /// loop {
1160 /// loop_body
1161 /// };
1162 /// exit_blocks // OK if the exit "belongs" to the loop
1163 /// ```
1164 ///
1165 /// In case the exit doesn't belong to the loop:
1166 /// ```text
1167 /// if b {
1168 /// loop {
1169 /// loop_body
1170 /// } // no exit blocks after the loop
1171 /// }
1172 /// else {
1173 /// ...
1174 /// };
1175 /// exit_blocks // the exit blocks are here
1176 /// ```
1177 owned_loop_exits: HashMap<src::BlockId, Option<src::BlockId>>,
1178 /// The switch exits.
1179 /// Note that the switch exits are always owned.
1180 owned_switch_exits: HashMap<src::BlockId, Option<src::BlockId>>,
1181}
1182
1183/// Compute the exits for the loops and the switches (switch on integer and
1184/// if ... then ... else ...). We need to do this because control-flow in MIR
1185/// is destructured: we have gotos everywhere.
1186///
1187/// Let's consider the following piece of code:
1188/// ```text
1189/// if cond1 { ... } else { ... };
1190/// if cond2 { ... } else { ... };
1191/// ```
1192/// Once converted to MIR, the control-flow is destructured, which means we
1193/// have gotos everywhere. When reconstructing the control-flow, we have
1194/// to be careful about the point where we should join the two branches of
1195/// the first if.
1196/// For instance, if we don't notice they should be joined at some point (i.e,
1197/// whatever the branch we take, there is a moment when we go to the exact
1198/// same place, just before the second if), we might generate code like
1199/// this, with some duplicata:
1200/// ```text
1201/// if cond1 { ...; if cond2 { ... } else { ...} }
1202/// else { ...; if cond2 { ... } else { ...} }
1203/// ```
1204///
1205/// Such a reconstructed program is valid, but it is definitely non-optimal:
1206/// it is very different from the original program (making it less clean and
1207/// clear), more bloated, and might involve duplicating the proof effort.
1208///
1209/// For this reason, we need to find the "exit" of the first loop, which is
1210/// the point where the two branches join. Note that this can be a bit tricky,
1211/// because there may be more than two branches (if we do `switch(x) { ... }`),
1212/// and some of them might not join (if they contain a `break`, `panic`,
1213/// `return`, etc.).
1214///
1215/// Finally, some similar issues arise for loops. For instance, let's consider
1216/// the following piece of code:
1217/// ```text
1218/// while cond1 {
1219/// e1;
1220/// if cond2 {
1221/// break;
1222/// }
1223/// e2;
1224/// }
1225/// e3;
1226/// return;
1227/// ```
1228///
1229/// Note that in MIR, the loop gets desugared to an if ... then ... else ....
1230/// From the MIR, We want to generate something like this:
1231/// ```text
1232/// loop {
1233/// if cond1 {
1234/// e1;
1235/// if cond2 {
1236/// break;
1237/// }
1238/// e2;
1239/// continue;
1240/// }
1241/// else {
1242/// break;
1243/// }
1244/// };
1245/// e3;
1246/// return;
1247/// ```
1248///
1249/// But if we don't pay attention, we might end up with that, once again with
1250/// duplications:
1251/// ```text
1252/// loop {
1253/// if cond1 {
1254/// e1;
1255/// if cond2 {
1256/// e3;
1257/// return;
1258/// }
1259/// e2;
1260/// continue;
1261/// }
1262/// else {
1263/// e3;
1264/// return;
1265/// }
1266/// }
1267/// ```
1268/// We thus have to notice that if the loop condition is false, we goto the same
1269/// block as when following the goto introduced by the break inside the loop, and
1270/// this block is dubbed the "loop exit".
1271///
1272/// The following function thus computes the "exits" for loops and switches, which
1273/// are basically the points where control-flow joins.
1274fn compute_loop_switch_exits(
1275 ctx: &mut TransformCtx,
1276 body: &src::ExprBody,
1277 cfg_info: &CfgInfo,
1278) -> ExitInfo {
1279 // Use the CFG without backward edges to topologically sort the nodes.
1280 // Note that `toposort` returns `Err` if and only if it finds cycles (which
1281 // can't happen).
1282 let tsorted: Vec<src::BlockId> = toposort(&cfg_info.cfg_no_be, None).unwrap();
1283
1284 // Build the map: block id -> topological sort rank
1285 let tsort_map: HashMap<src::BlockId, usize> = tsorted
1286 .into_iter()
1287 .enumerate()
1288 .map(|(i, block_id)| (block_id, i))
1289 .collect();
1290 trace!("tsort_map:\n{:?}", tsort_map);
1291
1292 // Compute the loop exits
1293 let loop_exits = compute_loop_exits(ctx, body, cfg_info);
1294 trace!("loop_exits:\n{:?}", loop_exits);
1295
1296 // Compute the switch exits
1297 let switch_exits = compute_switch_exits(cfg_info, &tsort_map);
1298 trace!("switch_exits:\n{:?}", switch_exits);
1299
1300 // Compute the exit info
1301 let mut exit_info = ExitInfo {
1302 loop_exits: HashMap::new(),
1303 owned_loop_exits: HashMap::new(),
1304 owned_switch_exits: HashMap::new(),
1305 };
1306
1307 // We need to give precedence to the outer switches and loops: we thus iterate
1308 // over the blocks in topological order.
1309 let mut sorted_blocks: BTreeSet<OrdBlockId> = BTreeSet::new();
1310 for bid in cfg_info
1311 .loop_entries
1312 .iter()
1313 .chain(cfg_info.switch_blocks.iter())
1314 {
1315 sorted_blocks.insert(make_ord_block_id(*bid, &tsort_map));
1316 }
1317
1318 // Keep track of the exits which were already attributed
1319 let mut all_exits = HashSet::new();
1320
1321 // Put all this together
1322 for bid in sorted_blocks {
1323 let bid = bid.id;
1324 // Check if loop or switch block
1325 if cfg_info.loop_entries.contains(&bid) {
1326 // This is a loop.
1327 //
1328 // For loops, we always register the exit (if there is one).
1329 // However, the exit may be owned by an outer switch (note
1330 // that we already took care of spreading the exits between
1331 // the inner/outer loops)
1332 let exit_id = loop_exits.get(&bid).unwrap();
1333 exit_info.loop_exits.insert(bid, *exit_id);
1334
1335 // Check if we "own" the exit
1336 match exit_id {
1337 None => {
1338 // No exit
1339 exit_info.owned_loop_exits.insert(bid, None);
1340 }
1341 Some(exit_id) => {
1342 if all_exits.contains(exit_id) {
1343 // We don't own it
1344 exit_info.owned_loop_exits.insert(bid, None);
1345 } else {
1346 // We own it
1347 exit_info.owned_loop_exits.insert(bid, Some(*exit_id));
1348 all_exits.insert(*exit_id);
1349 }
1350 }
1351 }
1352 } else {
1353 // For switches: check that the exit was not already given to a
1354 // loop
1355 let exit_id = switch_exits.get(&bid).unwrap();
1356
1357 match exit_id {
1358 None => {
1359 // No exit
1360 exit_info.owned_switch_exits.insert(bid, None);
1361 }
1362 Some(exit_id) => {
1363 if all_exits.contains(exit_id) {
1364 // We don't own it
1365 exit_info.owned_switch_exits.insert(bid, None);
1366 } else {
1367 // We own it
1368 exit_info.owned_switch_exits.insert(bid, Some(*exit_id));
1369 all_exits.insert(*exit_id);
1370 }
1371 }
1372 }
1373 }
1374 }
1375
1376 exit_info
1377}
1378
1379fn get_goto_kind(
1380 exits_info: &ExitInfo,
1381 parent_loops: &Vec<src::BlockId>,
1382 switch_exit_blocks: &HashSet<src::BlockId>,
1383 next_block_id: src::BlockId,
1384) -> GotoKind {
1385 // First explore the parent loops in revert order
1386 for (i, loop_id) in parent_loops.iter().rev().enumerate() {
1387 // If we goto a loop entry node: this is a 'continue'
1388 if next_block_id == *loop_id {
1389 return GotoKind::Continue(i);
1390 } else {
1391 // If we goto a loop exit node: this is a 'break'
1392 if let Some(exit_id) = exits_info.loop_exits.get(loop_id).unwrap() {
1393 if next_block_id == *exit_id {
1394 return GotoKind::Break(i);
1395 }
1396 }
1397 }
1398 }
1399
1400 // Check if the goto exits the current block
1401 if switch_exit_blocks.contains(&next_block_id) {
1402 return GotoKind::ExitBlock;
1403 }
1404
1405 // Default
1406 GotoKind::Goto
1407}
1408
1409enum GotoKind {
1410 Break(usize),
1411 Continue(usize),
1412 ExitBlock,
1413 Goto,
1414}
1415
1416/// `parent_span`: we need some span data for the new statement.
1417/// We use the one for the parent terminator.
1418fn translate_child_block(
1419 info: &mut BlockInfo<'_>,
1420 parent_loops: &Vec<src::BlockId>,
1421 switch_exit_blocks: &HashSet<src::BlockId>,
1422 parent_span: Span,
1423 child_id: src::BlockId,
1424) -> Option<tgt::Block> {
1425 // Check if this is a backward call
1426 match get_goto_kind(info.exits_info, parent_loops, switch_exit_blocks, child_id) {
1427 GotoKind::Break(index) => {
1428 let st = tgt::RawStatement::Break(index);
1429 Some(tgt::Statement::new(parent_span, st).into_block())
1430 }
1431 GotoKind::Continue(index) => {
1432 let st = tgt::RawStatement::Continue(index);
1433 Some(tgt::Statement::new(parent_span, st).into_block())
1434 }
1435 // If we are going to an exit block we simply ignore the goto
1436 GotoKind::ExitBlock => None,
1437 GotoKind::Goto => {
1438 // "Standard" goto: just recursively translate
1439 ensure_sufficient_stack(|| {
1440 Some(translate_block(
1441 info,
1442 parent_loops,
1443 switch_exit_blocks,
1444 child_id,
1445 ))
1446 })
1447 }
1448 }
1449}
1450
1451fn opt_block_unwrap_or_nop(span: Span, opt_block: Option<tgt::Block>) -> tgt::Block {
1452 opt_block.unwrap_or_else(|| tgt::Statement::new(span, tgt::RawStatement::Nop).into_block())
1453}
1454
1455fn translate_statement(st: &src::Statement) -> Option<tgt::Statement> {
1456 let src_span = st.span;
1457 let st = match st.content.clone() {
1458 src::RawStatement::Assign(place, rvalue) => tgt::RawStatement::Assign(place, rvalue),
1459 src::RawStatement::SetDiscriminant(place, variant_id) => {
1460 tgt::RawStatement::SetDiscriminant(place, variant_id)
1461 }
1462 src::RawStatement::CopyNonOverlapping(copy) => tgt::RawStatement::CopyNonOverlapping(copy),
1463 src::RawStatement::StorageLive(var_id) => tgt::RawStatement::StorageLive(var_id),
1464 src::RawStatement::StorageDead(var_id) => tgt::RawStatement::StorageDead(var_id),
1465 src::RawStatement::Deinit(place) => tgt::RawStatement::Deinit(place),
1466 src::RawStatement::Drop(place, tref) => tgt::RawStatement::Drop(place, tref),
1467 src::RawStatement::Assert(assert) => tgt::RawStatement::Assert(assert),
1468 src::RawStatement::Nop => tgt::RawStatement::Nop,
1469 src::RawStatement::Error(s) => tgt::RawStatement::Error(s),
1470 };
1471 Some(tgt::Statement::new(src_span, st))
1472}
1473
1474fn translate_terminator(
1475 info: &mut BlockInfo<'_>,
1476 parent_loops: &Vec<src::BlockId>,
1477 switch_exit_blocks: &HashSet<src::BlockId>,
1478 terminator: &src::Terminator,
1479) -> tgt::Block {
1480 let src_span = terminator.span;
1481
1482 match &terminator.content {
1483 src::RawTerminator::Abort(kind) => {
1484 tgt::Statement::new(src_span, tgt::RawStatement::Abort(kind.clone())).into_block()
1485 }
1486 src::RawTerminator::Return => {
1487 tgt::Statement::new(src_span, tgt::RawStatement::Return).into_block()
1488 }
1489 src::RawTerminator::UnwindResume => {
1490 tgt::Statement::new(src_span, tgt::RawStatement::Abort(AbortKind::Panic(None)))
1491 .into_block()
1492 }
1493 src::RawTerminator::Call {
1494 call,
1495 target,
1496 on_unwind: _,
1497 } => {
1498 // TODO: Have unwinds in the LLBC
1499 let target_block = translate_child_block(
1500 info,
1501 parent_loops,
1502 switch_exit_blocks,
1503 terminator.span,
1504 *target,
1505 );
1506 let mut block = opt_block_unwrap_or_nop(terminator.span, target_block);
1507 let st = tgt::Statement::new(src_span, tgt::RawStatement::Call(call.clone()));
1508 block.statements.insert(0, st);
1509 block
1510 }
1511 src::RawTerminator::Goto { target } => {
1512 let block = translate_child_block(
1513 info,
1514 parent_loops,
1515 switch_exit_blocks,
1516 terminator.span,
1517 *target,
1518 );
1519 let block = opt_block_unwrap_or_nop(terminator.span, block);
1520 block
1521 }
1522 src::RawTerminator::Switch { discr, targets } => {
1523 // Translate the target expressions
1524 let switch = match &targets {
1525 src::SwitchTargets::If(then_tgt, else_tgt) => {
1526 // Translate the children expressions
1527 let then_block = translate_child_block(
1528 info,
1529 parent_loops,
1530 switch_exit_blocks,
1531 terminator.span,
1532 *then_tgt,
1533 );
1534 // We use the terminator span information in case then
1535 // then statement is `None`
1536 let then_block = opt_block_unwrap_or_nop(terminator.span, then_block);
1537 let else_block = translate_child_block(
1538 info,
1539 parent_loops,
1540 switch_exit_blocks,
1541 terminator.span,
1542 *else_tgt,
1543 );
1544 let else_block = opt_block_unwrap_or_nop(terminator.span, else_block);
1545
1546 // Translate
1547 tgt::Switch::If(discr.clone(), then_block, else_block)
1548 }
1549 src::SwitchTargets::SwitchInt(int_ty, targets, otherwise) => {
1550 // Note that some branches can be grouped together, like
1551 // here:
1552 // ```
1553 // match e {
1554 // E::V1 | E::V2 => ..., // Grouped
1555 // E::V3 => ...
1556 // }
1557 // ```
1558 // We detect this by checking if a block has already been
1559 // translated as one of the branches of the switch.
1560 //
1561 // Rk.: note there may be intermediate gotos depending
1562 // on the MIR we use. Typically, we manage to detect the
1563 // grouped branches with Optimized MIR, but not with Promoted
1564 // MIR. See the comment in "tests/src/matches.rs".
1565
1566 // We link block ids to:
1567 // - vector of matched integer values
1568 // - translated blocks
1569 let mut branches: IndexMap<src::BlockId, (Vec<ScalarValue>, tgt::Block)> =
1570 IndexMap::new();
1571
1572 // Translate the children expressions
1573 for (v, bid) in targets.iter() {
1574 // Check if the block has already been translated:
1575 // if yes, it means we need to group branches
1576 if branches.contains_key(bid) {
1577 // Already translated: add the matched value to
1578 // the list of values
1579 let branch = branches.get_mut(bid).unwrap();
1580 branch.0.push(*v);
1581 } else {
1582 // Not translated: translate it
1583 let block = translate_child_block(
1584 info,
1585 parent_loops,
1586 switch_exit_blocks,
1587 terminator.span,
1588 *bid,
1589 );
1590 // We use the terminator span information in case then
1591 // then statement is `None`
1592 let block = opt_block_unwrap_or_nop(terminator.span, block);
1593 branches.insert(*bid, (vec![*v], block));
1594 }
1595 }
1596 let targets_blocks: Vec<(Vec<ScalarValue>, tgt::Block)> =
1597 branches.into_iter().map(|(_, x)| x).collect();
1598
1599 let otherwise_block = translate_child_block(
1600 info,
1601 parent_loops,
1602 switch_exit_blocks,
1603 terminator.span,
1604 *otherwise,
1605 );
1606 // We use the terminator span information in case then
1607 // then statement is `None`
1608 let otherwise_block = opt_block_unwrap_or_nop(terminator.span, otherwise_block);
1609
1610 // Translate
1611 tgt::Switch::SwitchInt(discr.clone(), *int_ty, targets_blocks, otherwise_block)
1612 }
1613 };
1614
1615 // Return
1616 let span = tgt::combine_switch_targets_span(&switch);
1617 let span = combine_span(&src_span, &span);
1618 let st = tgt::RawStatement::Switch(switch);
1619 tgt::Statement::new(span, st).into_block()
1620 }
1621 }
1622}
1623
1624/// Remark: some values are boxed (here, the returned statement) so that they
1625/// are allocated on the heap. This reduces stack usage (we had problems with
1626/// stack overflows in the past). A more efficient solution would be to use loops
1627/// to make this code constant space, but that would require a serious rewriting.
1628fn translate_block(
1629 info: &mut BlockInfo<'_>,
1630 parent_loops: &Vec<src::BlockId>,
1631 switch_exit_blocks: &HashSet<src::BlockId>,
1632 block_id: src::BlockId,
1633) -> tgt::Block {
1634 // If the user activated this check: check that we didn't already translate
1635 // this block, and insert the block id in the set of already translated blocks.
1636 trace!(
1637 "Parent loops: {:?}, Parent switch exits: {:?}, Block id: {}",
1638 parent_loops, switch_exit_blocks, block_id
1639 );
1640 info.explored.insert(block_id);
1641
1642 let block = info.body.body.get(block_id).unwrap();
1643
1644 // Check if we enter a loop: if so, update parent_loops and the current_exit_block
1645 let is_loop = info.cfg.loop_entries.contains(&block_id);
1646 let mut nparent_loops: Vec<src::BlockId>;
1647 let nparent_loops = if info.cfg.loop_entries.contains(&block_id) {
1648 nparent_loops = parent_loops.clone();
1649 nparent_loops.push(block_id);
1650 &nparent_loops
1651 } else {
1652 parent_loops
1653 };
1654
1655 // If we enter a switch or a loop, we need to check if we own the exit
1656 // block, in which case we need to append it to the loop/switch body
1657 // in a sequence
1658 let is_switch = block.terminator.content.is_switch();
1659 let next_block = if is_loop {
1660 *info.exits_info.owned_loop_exits.get(&block_id).unwrap()
1661 } else if is_switch {
1662 *info.exits_info.owned_switch_exits.get(&block_id).unwrap()
1663 } else {
1664 None
1665 };
1666
1667 // If we enter a switch, add the exit block to the set
1668 // of outer exit blocks
1669 let nswitch_exit_blocks = if is_switch {
1670 let mut nexit_blocks = switch_exit_blocks.clone();
1671 match next_block {
1672 None => nexit_blocks,
1673 Some(bid) => {
1674 nexit_blocks.insert(bid);
1675 nexit_blocks
1676 }
1677 }
1678 } else {
1679 switch_exit_blocks.clone()
1680 };
1681
1682 // Translate the terminator and the subsequent blocks.
1683 // Note that this terminator is an option: we might ignore it
1684 // (if it is an exit).
1685
1686 let terminator =
1687 translate_terminator(info, nparent_loops, &nswitch_exit_blocks, &block.terminator);
1688
1689 // Translate the statements inside the block
1690 let statements = block
1691 .statements
1692 .iter()
1693 .filter_map(|st| translate_statement(st))
1694 .collect_vec();
1695
1696 // Prepend the statements to the terminator.
1697 let mut block = if let Some(st) = tgt::Block::from_seq(statements) {
1698 st.merge(terminator)
1699 } else {
1700 terminator
1701 };
1702
1703 if is_loop {
1704 // Put the loop body inside a `Loop`.
1705 block = tgt::Statement::new(block.span, tgt::RawStatement::Loop(block)).into_block()
1706 } else if !is_switch {
1707 assert!(next_block.is_none());
1708 }
1709
1710 // Concatenate the exit expression, if needs be
1711 if let Some(exit_block_id) = next_block {
1712 let next_block = ensure_sufficient_stack(|| {
1713 translate_block(info, parent_loops, switch_exit_blocks, exit_block_id)
1714 });
1715 block = block.merge(next_block);
1716 }
1717
1718 block
1719}
1720
1721fn translate_body_aux(ctx: &mut TransformCtx, src_body: &src::ExprBody) -> tgt::ExprBody {
1722 // Explore the function body to create the control-flow graph without backward
1723 // edges, and identify the loop entries (which are destinations of backward edges).
1724 let cfg_info = build_cfg_info(src_body);
1725 trace!("cfg_info: {:?}", cfg_info);
1726
1727 // Find the exit block for all the loops and switches, if such an exit point
1728 // exists.
1729 let exits_info = compute_loop_switch_exits(ctx, src_body, &cfg_info);
1730
1731 // Debugging
1732 trace!("exits_info:\n{:?}", exits_info);
1733
1734 // Translate the body by reconstructing the loops and the
1735 // conditional branchings.
1736 // Note that we shouldn't get `None`.
1737 let mut explored = HashSet::new();
1738 let mut info = BlockInfo {
1739 cfg: &cfg_info,
1740 body: src_body,
1741 exits_info: &exits_info,
1742 explored: &mut explored,
1743 };
1744 let tgt_body = translate_block(&mut info, &Vec::new(), &HashSet::new(), src::BlockId::ZERO);
1745
1746 tgt::ExprBody {
1747 span: src_body.span,
1748 locals: src_body.locals.clone(),
1749 comments: src_body.comments.clone(),
1750 body: tgt_body,
1751 }
1752}
1753
1754fn translate_body(ctx: &mut TransformCtx, body: &mut gast::Body) {
1755 use gast::Body::{Structured, Unstructured};
1756 let Unstructured(src_body) = body else {
1757 panic!("Called `ullbc_to_llbc` on an already restructured body")
1758 };
1759 trace!("About to translate to ullbc: {:?}", src_body.span);
1760 let tgt_body = translate_body_aux(ctx, src_body);
1761 *body = Structured(tgt_body);
1762}
1763
1764pub struct Transform;
1765impl TransformPass for Transform {
1766 fn transform_ctx(&self, ctx: &mut TransformCtx) {
1767 // Translate the bodies one at a time.
1768 ctx.for_each_body(|ctx, body| {
1769 translate_body(ctx, body);
1770 });
1771
1772 // Print the functions
1773 let fmt_ctx = &ctx.into_fmt();
1774 for fun in &ctx.translated.fun_decls {
1775 trace!(
1776 "# Signature:\n{}\n\n# Function definition:\n{}\n",
1777 fun.signature.with_ctx(fmt_ctx),
1778 fun.with_ctx(fmt_ctx),
1779 );
1780 }
1781 // Print the global variables
1782 for global in &ctx.translated.global_decls {
1783 trace!(
1784 "# Type:\n{}\n\n# Global definition:\n{}\n",
1785 global.ty.with_ctx(fmt_ctx),
1786 global.with_ctx(fmt_ctx),
1787 );
1788 }
1789
1790 if ctx.options.print_built_llbc {
1791 info!("# LLBC resulting from control-flow reconstruction:\n\n{ctx}\n",);
1792 }
1793 }
1794}