charon_lib/transform/
ullbc_to_llbc.rs

1//! ULLBC to LLBC
2//!
3//! We reconstruct the control-flow in the Unstructured LLBC.
4//!
5//! The reconstruction algorithm is not written to be efficient (its complexity
6//! is probably very bad), but it was not written to be: this is still an early
7//! stage and we want the algorithm to generate the best reconstruction as
8//! possible. We still need to test the algorithm on more interesting examples,
9//! and will consider making it more efficient once it is a bit mature and well
10//! tested.
11//! Also note that we more importantly focus on making the algorithm sound: the
12//! reconstructed program must always be equivalent to the original MIR program,
13//! and the fact that the reconstruction preserves this property must be obvious.
14//!
15//! Finally, we conjecture the execution time shouldn't be too much a problem:
16//! we don't expect the translation mechanism to be applied on super huge functions
17//! (which will be difficult to formally analyze), and the MIR graphs are actually
18//! not that big because statements are grouped into code blocks (a block is made
19//! of a list of statements, followed by a terminator - branchings and jumps can
20//! only be performed by terminators -, meaning that MIR graphs don't have that
21//! many nodes and edges).
22
23use crate::ast::*;
24use crate::common::ensure_sufficient_stack;
25use crate::errors::sanity_check;
26use crate::formatter::{Formatter, IntoFormatter};
27use crate::llbc_ast as tgt;
28use crate::meta::{combine_span, Span};
29use crate::transform::TransformCtx;
30use crate::ullbc_ast::{self as src};
31use indexmap::IndexMap;
32use itertools::Itertools;
33use num_bigint::BigInt;
34use num_rational::BigRational;
35use petgraph::algo::toposort;
36use petgraph::graphmap::DiGraphMap;
37use petgraph::Direction;
38use std::collections::{BTreeSet, HashMap, HashSet, VecDeque};
39
40use super::ctx::TransformPass;
41
42/// Control-Flow Graph
43type Cfg = DiGraphMap<src::BlockId, ()>;
44
45/// Small utility
46struct BlockInfo<'a> {
47    cfg: &'a CfgInfo,
48    body: &'a src::ExprBody,
49    exits_info: &'a ExitInfo,
50    explored: &'a mut HashSet<src::BlockId>,
51}
52
53/// This structure contains various information about a function's CFG.
54#[derive(Debug)]
55struct CfgInfo {
56    /// The CFG
57    pub cfg: Cfg,
58    /// The CFG where all the backward edges have been removed
59    pub cfg_no_be: Cfg,
60    /// We consider the destination of the backward edges to be loop entries and
61    /// store them here.
62    pub loop_entries: HashSet<src::BlockId>,
63    /// The backward edges
64    pub backward_edges: HashSet<(src::BlockId, src::BlockId)>,
65    /// The blocks whose terminators are a switch are stored here.
66    pub switch_blocks: HashSet<src::BlockId>,
67    /// The set of nodes from where we can only reach error nodes (panic, etc.)
68    pub only_reach_error: HashSet<src::BlockId>,
69}
70
71/// Build the CFGs (the "regular" CFG and the CFG without backward edges) and
72/// compute some information like the loop entries and the switch blocks.
73fn build_cfg_info(body: &src::ExprBody) -> CfgInfo {
74    let mut cfg = CfgInfo {
75        cfg: Cfg::new(),
76        cfg_no_be: Cfg::new(),
77        loop_entries: HashSet::new(),
78        backward_edges: HashSet::new(),
79        switch_blocks: HashSet::new(),
80        only_reach_error: HashSet::new(),
81    };
82
83    // Add the nodes
84    for block_id in body.body.iter_indices() {
85        cfg.cfg.add_node(block_id);
86        cfg.cfg_no_be.add_node(block_id);
87    }
88
89    // Add the edges
90    let ancestors = HashSet::new();
91    let mut explored = HashSet::new();
92    build_cfg_partial_info_edges(
93        &mut cfg,
94        &ancestors,
95        &mut explored,
96        body,
97        src::BlockId::ZERO,
98    );
99
100    cfg
101}
102
103fn block_is_switch(body: &src::ExprBody, block_id: src::BlockId) -> bool {
104    let block = body.body.get(block_id).unwrap();
105    block.terminator.content.is_switch()
106}
107
108/// The terminator of the block is a panic, etc.
109fn block_is_error(body: &src::ExprBody, block_id: src::BlockId) -> bool {
110    let block = body.body.get(block_id).unwrap();
111    use src::RawTerminator::*;
112    match &block.terminator.content {
113        Abort(..) => true,
114        Goto { .. } | Switch { .. } | Return | Call { .. } => false,
115    }
116}
117
118fn build_cfg_partial_info_edges(
119    cfg: &mut CfgInfo,
120    ancestors: &HashSet<src::BlockId>,
121    explored: &mut HashSet<src::BlockId>,
122    body: &src::ExprBody,
123    block_id: src::BlockId,
124) {
125    // Check if we already explored the current node
126    if explored.contains(&block_id) {
127        return;
128    }
129    explored.insert(block_id);
130
131    // Insert the current block in the set of ancestors blocks
132    let mut ancestors = ancestors.clone();
133    ancestors.insert(block_id);
134
135    // Check if it is a switch
136    if block_is_switch(body, block_id) {
137        cfg.switch_blocks.insert(block_id);
138    }
139
140    // Retrieve the block targets
141    let targets = body.body.get(block_id).unwrap().targets();
142    let mut has_backward_edge = false;
143
144    // Add edges for all the targets and explore them, if they are not predecessors
145    for tgt in &targets {
146        // Insert the edge in the "regular" CFG
147        cfg.cfg.add_edge(block_id, *tgt, ());
148
149        // We need to check if it is a backward edge before inserting it in the
150        // CFG without backward edges and exploring it
151        if ancestors.contains(tgt) {
152            // This is a backward edge
153            has_backward_edge = true;
154            cfg.loop_entries.insert(*tgt);
155            cfg.backward_edges.insert((block_id, *tgt));
156        } else {
157            // Not a backward edge: insert the edge and explore
158            cfg.cfg_no_be.add_edge(block_id, *tgt, ());
159            ensure_sufficient_stack(|| {
160                build_cfg_partial_info_edges(cfg, &ancestors, explored, body, *tgt);
161            })
162        }
163    }
164
165    // Check if this node can only reach error nodes:
166    // - we check if the current node ends with an error terminator
167    // - or check that all the targets lead to error nodes
168    // Note that if there is a backward edge, we consider that we don't necessarily
169    // go to error.
170    if !has_backward_edge
171        && (block_is_error(body, block_id)
172            || (
173                // The targets are empty if this is an error node *or* a return node
174                !targets.is_empty() && targets.iter().all(|tgt| cfg.only_reach_error.contains(tgt))
175            ))
176    {
177        cfg.only_reach_error.insert(block_id);
178    }
179}
180
181#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
182struct BlockWithRank<T> {
183    /// A "rank" quantity that we use to order the nodes.
184    /// By placing the `rank` field before the `id`, we make sure that
185    /// we use the rank first in the lexicographic order.
186    rank: T,
187    id: src::BlockId,
188}
189
190type OrdBlockId = BlockWithRank<usize>;
191
192/// For the rank we use:
193/// - a "flow" quantity (see [BlocksInfo])
194/// - the *inverse* rank in the topological sort (i.e., `- topo_rank`)
195type FlowBlockId = BlockWithRank<(BigRational, isize)>;
196
197#[derive(Debug, Clone)]
198struct LoopExitCandidateInfo {
199    /// The occurrences going to this exit.
200    /// For every occurrence, we register the distance between the loop entry
201    /// and the node from which the edge going to the exit starts.
202    /// If later we have to choose between candidates with the same number
203    /// of occurrences, we choose the one with the smallest distances.
204    ///
205    /// Note that it *may* happen that we have several exit candidates referenced
206    /// more than once for one loop. This comes from the fact that whenever we
207    /// reach a node from which the loop entry is not reachable (without using a
208    /// backward edge leading to an outer loop entry), we register this node
209    /// as well as all its children as exit candidates.
210    /// Consider the following example:
211    /// ```text
212    /// while i < max {
213    ///     if cond {
214    ///         break;
215    ///     }
216    ///     s += i;
217    ///     i += 1
218    /// }
219    /// // All the below nodes are exit candidates (each of them is referenced twice)
220    /// s += 1;
221    /// return s;
222    /// ```
223    pub occurrences: Vec<usize>,
224}
225
226/// Check if a loop entry is reachable from a node, in a graph where we remove
227/// the backward edges going directly to the loop entry.
228///
229/// If the loop entry is not reachable, it means that:
230/// - the loop entry is not reachable at all
231/// - or it is only reachable through an outer loop
232///
233/// The starting node should be a (transitive) child of the loop entry.
234/// We use this to find candidates for loop exits.
235fn loop_entry_is_reachable_from_inner(
236    cfg: &CfgInfo,
237    loop_entry: src::BlockId,
238    block_id: src::BlockId,
239) -> bool {
240    // It is reachable in the complete graph. Check if it is reachable by not
241    // going through backward edges which go to outer loops. In practice, we
242    // just need to forbid the use of any backward edges at the exception of
243    // those which go directly to the current loop's entry. This means that we
244    // ignore backward edges to outer loops of course, but also backward edges
245    // to inner loops because we shouldn't need to follow those (there should be
246    // more direct paths).
247
248    // Explore the graph starting at block_id
249    let mut explored: HashSet<src::BlockId> = HashSet::new();
250    let mut stack: VecDeque<src::BlockId> = VecDeque::new();
251    stack.push_back(block_id);
252    while !stack.is_empty() {
253        let bid = stack.pop_front().unwrap();
254        if explored.contains(&bid) {
255            continue;
256        }
257        explored.insert(bid);
258
259        let next_ids = cfg.cfg.neighbors_directed(bid, Direction::Outgoing);
260        for next_id in next_ids {
261            // Check if this is a backward edge
262            if cfg.backward_edges.contains(&(bid, next_id)) {
263                // Backward edge: only allow those going directly to the current
264                // loop's entry
265                if next_id == loop_entry {
266                    // The loop entry is reachable
267                    return true;
268                } else {
269                    // Forbidden edge: ignore
270                    continue;
271                }
272            } else {
273                // Nothing special: add the node to the stack for further
274                // exploration
275                stack.push_back(next_id);
276            }
277        }
278    }
279
280    // The loop entry is not reachable
281    false
282}
283
284struct FilteredLoopParents {
285    remaining_parents: Vec<(src::BlockId, usize)>,
286    removed_parents: Vec<(src::BlockId, usize)>,
287}
288
289fn filter_loop_parents(
290    cfg: &CfgInfo,
291    parent_loops: &Vec<(src::BlockId, usize)>,
292    block_id: src::BlockId,
293) -> Option<FilteredLoopParents> {
294    let mut eliminate: usize = 0;
295    for (loop_id, _ldist) in parent_loops.iter().rev() {
296        if !loop_entry_is_reachable_from_inner(cfg, *loop_id, block_id) {
297            eliminate += 1;
298        } else {
299            break;
300        }
301    }
302
303    if eliminate > 0 {
304        // Split the vector of parents
305        let (remaining_parents, removed_parents) =
306            parent_loops.split_at(parent_loops.len() - eliminate);
307        let (mut remaining_parents, removed_parents) =
308            (remaining_parents.to_vec(), removed_parents.to_vec());
309
310        // Update the distance to the last loop - we just increment the distance
311        // by 1, because from the point of view of the parent loop, we just exited
312        // a block and go to the next sequence of instructions.
313        if !remaining_parents.is_empty() {
314            remaining_parents.last_mut().unwrap().1 += 1;
315        }
316
317        Some(FilteredLoopParents {
318            remaining_parents,
319            removed_parents,
320        })
321    } else {
322        None
323    }
324}
325
326/// List the nodes reachable from a starting point.
327/// We list the nodes and the depth (in the AST) at which they were found.
328fn list_reachable(cfg: &Cfg, start: src::BlockId) -> HashMap<src::BlockId, usize> {
329    let mut reachable: HashMap<src::BlockId, usize> = HashMap::new();
330    let mut stack: VecDeque<(src::BlockId, usize)> = VecDeque::new();
331    stack.push_back((start, 0));
332
333    while !stack.is_empty() {
334        let (bid, dist) = stack.pop_front().unwrap();
335
336        // Ignore this node if we already registered it with a better distance
337        match reachable.get(&bid) {
338            None => (),
339            Some(original_dist) => {
340                if *original_dist < dist {
341                    continue;
342                }
343            }
344        }
345
346        // Inset the node with its distance
347        reachable.insert(bid, dist);
348
349        // Add the children to the stack
350        for child in cfg.neighbors(bid) {
351            stack.push_back((child, dist + 1));
352        }
353    }
354
355    // Return
356    reachable
357}
358
359/// Register a node and its children as exit candidates for a list of
360/// parent loops.
361fn register_children_as_loop_exit_candidates(
362    cfg: &CfgInfo,
363    loop_exits: &mut HashMap<src::BlockId, IndexMap<src::BlockId, LoopExitCandidateInfo>>,
364    removed_parent_loops: &Vec<(src::BlockId, usize)>,
365    block_id: src::BlockId,
366) {
367    // List the reachable nodes
368    let reachable = list_reachable(&cfg.cfg_no_be, block_id);
369
370    let mut base_dist = 0;
371    // For every parent loop, in reverse order (we go from last to first in
372    // order to correctly compute the distances)
373    for (loop_id, loop_dist) in removed_parent_loops.iter().rev() {
374        // Update the distance to the loop entry
375        base_dist += *loop_dist;
376
377        // Retrieve the candidates
378        let candidates = loop_exits.get_mut(loop_id).unwrap();
379
380        // Update them
381        for (bid, dist) in reachable.iter() {
382            let distance = base_dist + *dist;
383            match candidates.get_mut(bid) {
384                None => {
385                    candidates.insert(
386                        *bid,
387                        LoopExitCandidateInfo {
388                            occurrences: vec![distance],
389                        },
390                    );
391                }
392                Some(c) => {
393                    c.occurrences.push(distance);
394                }
395            }
396        }
397    }
398}
399
400/// Compute the loop exit candidates.
401///
402/// There may be several candidates with the same "optimality" (same number of
403/// occurrences, etc.), in which case we choose the first one which was registered
404/// (the order in which we explore the graph is deterministic): this is why we
405/// store the candidates in a linked hash map.
406fn compute_loop_exit_candidates(
407    cfg: &CfgInfo,
408    explored: &mut HashSet<src::BlockId>,
409    ordered_loops: &mut Vec<src::BlockId>,
410    loop_exits: &mut HashMap<src::BlockId, IndexMap<src::BlockId, LoopExitCandidateInfo>>,
411    // List of parent loops, with the distance to the entry of the loop (the distance
412    // is the distance between the current node and the loop entry for the last parent,
413    // and the distance between the parents for the others).
414    mut parent_loops: Vec<(src::BlockId, usize)>,
415    block_id: src::BlockId,
416) {
417    if explored.contains(&block_id) {
418        return;
419    }
420    explored.insert(block_id);
421
422    // Check if we enter a loop - add the corresponding node if necessary
423    if cfg.loop_entries.contains(&block_id) {
424        parent_loops.push((block_id, 1));
425        ordered_loops.push(block_id);
426    } else {
427        // Increase the distance with the parent loop
428        if !parent_loops.is_empty() {
429            parent_loops.last_mut().unwrap().1 += 1;
430        }
431    };
432
433    // Retrieve the children - note that we ignore the back edges
434    let children = cfg.cfg_no_be.neighbors(block_id);
435    for child in children {
436        // If the parent loop entry is not reachable from the child without going
437        // through a backward edge which goes directly to the loop entry, consider
438        // this node a potential exit.
439        ensure_sufficient_stack(|| {
440            match filter_loop_parents(cfg, &parent_loops, child) {
441                None => {
442                    compute_loop_exit_candidates(
443                        cfg,
444                        explored,
445                        ordered_loops,
446                        loop_exits,
447                        parent_loops.clone(),
448                        child,
449                    );
450                }
451                Some(fparent_loops) => {
452                    // We filtered some parent loops: it means this child and its
453                    // children are loop exit candidates for all those loops: we must
454                    // thus register them.
455                    // Note that we register the child *and* its children: the reason
456                    // is that we might do something *then* actually jump to the exit.
457                    // For instance, the following block of code:
458                    // ```
459                    // if cond { break; } else { ... }
460                    // ```
461                    //
462                    // Gets translated in MIR to something like this:
463                    // ```
464                    // bb1: {
465                    //   if cond -> bb2 else -> bb3; // bb2 is not the real exit
466                    // }
467                    //
468                    // bb2: {
469                    //   goto bb4; // bb4 is the real exit
470                    // }
471                    // ```
472                    register_children_as_loop_exit_candidates(
473                        cfg,
474                        loop_exits,
475                        &fparent_loops.removed_parents,
476                        child,
477                    );
478
479                    // Explore, with the filtered parents
480                    compute_loop_exit_candidates(
481                        cfg,
482                        explored,
483                        ordered_loops,
484                        loop_exits,
485                        fparent_loops.remaining_parents,
486                        child,
487                    );
488                }
489            }
490        })
491    }
492}
493
494/// See [`compute_loop_switch_exits`](compute_loop_switch_exits) for
495/// explanations about what "exits" are.
496///
497/// The following function computes the loop exits. It acts as follows.
498///
499/// We keep track of a stack of the loops in which we entered.
500/// It is very easy to check when we enter a loop: loop entries are destinations
501/// of backward edges, which can be spotted with a simple graph exploration (see
502/// [`build_cfg_partial_info`](build_cfg_partial_info).
503/// The criteria to consider whether we exit a loop is the following:
504/// - we exit a loop if we go to a block from which we can't reach the loop
505///   entry at all
506/// - or if we can reach the loop entry, but must use a backward edge which goes
507///   to an outer loop
508///
509/// It is better explained on the following example:
510/// ```text
511/// 'outer while i < max {
512///     'inner while j < max {
513///        j += 1;
514///     }
515///     // (i)
516///     i += 1;
517/// }
518/// ```
519/// If we enter the inner loop then go to (i) from the inner loop, we consider
520/// that we exited the outer loop because:
521/// - we can reach the entry of the inner loop from (i) (by finishing then
522///   starting again an iteration of the outer loop)
523/// - but doing this requires taking a backward edge which goes to the outer loop
524///
525/// Whenever we exit a loop, we save the block we went to as an exit candidate
526/// for this loop. Note that there may by many exit candidates. For instance,
527/// in the below example:
528/// ```text
529/// while ... {
530///    ...
531///    if ... {
532///        // We can't reach the loop entry from here: this is an exit
533///        // candidate
534///        return;
535///    }
536/// }
537/// // This is another exit candidate - and this is the one we want to use
538/// // as the "real" exit...
539/// ...
540/// ```
541///
542/// Also note that it may happen that we go several times to the same exit (if
543/// we use breaks for instance): we record the number of times an exit candidate
544/// is used.
545///
546/// Once we listed all the exit candidates, we find the "best" one for every
547/// loop, starting with the outer loops. We start with outer loops because
548/// inner loops might use breaks to exit to the exit of outer loops: if we
549/// start with the inner loops, the exit which is "natural" for the outer loop
550/// might end up being used for one of the inner loops...
551///
552/// The best exit is the following one:
553/// - it is the one which is used the most times (note that there can be
554///   several candidates which are referenced strictly more than once: see the
555///   comment below)
556/// - if several exits have the same number of occurrences, we choose the one
557///   for which we goto the "earliest" (earliest meaning that the goto is close to
558///   the loop entry node in the AST). The reason is that all the loops should
559///   have an outer if ... then ... else ... which executes the loop body or goes
560///   to the exit (note that this is not necessarily the first
561///   if ... then ... else ... we find: loop conditions can be arbitrary
562///   expressions, containing branchings).
563///
564/// # Several candidates for a loop exit:
565/// =====================================
566/// There used to be a sanity check to ensure there are no two different
567/// candidates with exactly the same number of occurrences and distance from
568/// the entry of the loop, if the number of occurrences is > 1.
569///
570/// We removed it because it does happen, for instance here (the match
571/// introduces an `unreachable` node, and it has the same number of
572/// occurrences and the same distance to the loop entry as the `panic`
573/// node):
574///
575/// ```text
576/// pub fn list_nth_mut_loop_pair<'a, T>(
577///     mut ls: &'a mut List<T>,
578///     mut i: u32,
579/// ) -> &'a mut T {
580///     loop {
581///         match ls {
582///             List::Nil => {
583///                 panic!() // <-- best candidate
584///             }
585///             List::Cons(x, tl) => {
586///                 if i == 0 {
587///                     return x;
588///                 } else {
589///                     ls = tl;
590///                     i -= 1;
591///                 }
592///             }
593///             _ => {
594///               // Note that Rustc always introduces an unreachable branch after
595///               // desugaring matches.
596///               unreachable!(), // <-- best candidate
597///             }
598///         }
599///     }
600/// }
601/// ```
602///
603/// When this happens we choose an exit candidate whose edges don't necessarily
604/// lead to an error (above there are none, so we don't choose any exits). Note
605/// that this last condition is important to prevent loops from being unnecessarily
606/// nested:
607///
608/// ```text
609/// pub fn nested_loops_enum(step_out: usize, step_in: usize) -> usize {
610///     let mut s = 0;
611///
612///     for _ in 0..128 { // We don't want this loop to be nested with the loops below
613///         s += 1;
614///     }
615///
616///     for _ in 0..(step_out) {
617///         for _ in 0..(step_in) {
618///             s += 1;
619///         }
620///     }
621///
622///     s
623/// }
624/// ```
625fn compute_loop_exits(
626    ctx: &mut TransformCtx,
627    body: &src::ExprBody,
628    cfg: &CfgInfo,
629) -> HashMap<src::BlockId, Option<src::BlockId>> {
630    let mut explored = HashSet::new();
631    let mut ordered_loops = Vec::new();
632    let mut loop_exits = HashMap::new();
633
634    // Initialize the loop exits candidates
635    for loop_id in &cfg.loop_entries {
636        loop_exits.insert(*loop_id, IndexMap::new());
637    }
638
639    // Compute the candidates
640    compute_loop_exit_candidates(
641        cfg,
642        &mut explored,
643        &mut ordered_loops,
644        &mut loop_exits,
645        Vec::new(),
646        src::BlockId::ZERO,
647    );
648
649    {
650        // Debugging
651        let candidates: Vec<String> = loop_exits
652            .iter()
653            .map(|(loop_id, candidates)| format!("{loop_id} -> {candidates:?}"))
654            .collect();
655        trace!("Loop exit candidates:\n{}", candidates.join("\n"));
656    }
657
658    // Choose one candidate among the potential candidates.
659    let mut exits: HashSet<src::BlockId> = HashSet::new();
660    let mut chosen_loop_exits: HashMap<src::BlockId, Option<src::BlockId>> = HashMap::new();
661    // For every loop
662    for loop_id in ordered_loops {
663        // Check the candidates.
664        // Ignore the candidates which have already been chosen as exits for other
665        // loops (which should be outer loops).
666        // We choose the exit with:
667        // - the most occurrences
668        // - the least total distance (if there are several possibilities)
669        // - doesn't necessarily lead to an error (panic, unreachable)
670
671        // First:
672        // - filter the candidates
673        // - compute the number of occurrences
674        // - compute the sum of distances
675        // TODO: we could simply order by using a lexicographic order
676        let loop_exits = loop_exits
677            .get(&loop_id)
678            .unwrap()
679            .iter()
680            // If candidate already selected for another loop: ignore
681            .filter(|(candidate_id, _)| !exits.contains(candidate_id))
682            .map(|(candidate_id, candidate_info)| {
683                let num_occurrences = candidate_info.occurrences.len();
684                let dist_sum = candidate_info.occurrences.iter().sum();
685                (*candidate_id, num_occurrences, dist_sum)
686            })
687            .collect_vec();
688
689        trace!(
690            "Loop {}: possible exits:\n{}",
691            loop_id,
692            loop_exits
693                .iter()
694                .map(|(bid, occs, dsum)| format!(
695                    "{bid} -> {{ occurrences: {occs}, dist_sum: {dsum} }}",
696                ))
697                .collect::<Vec<String>>()
698                .join("\n")
699        );
700
701        // Second: actually select the proper candidate.
702
703        // We find the one with the highest occurrence and the smallest distance
704        // from the entry of the loop (note that we take care of listing the exit
705        // candidates in a deterministic order).
706        let mut best_exit: Option<src::BlockId> = None;
707        let mut best_occurrences = 0;
708        let mut best_dist_sum = std::usize::MAX;
709        for (candidate_id, occurrences, dist_sum) in &loop_exits {
710            if (*occurrences > best_occurrences)
711                || (*occurrences == best_occurrences && *dist_sum < best_dist_sum)
712            {
713                best_exit = Some(*candidate_id);
714                best_occurrences = *occurrences;
715                best_dist_sum = *dist_sum;
716            }
717        }
718
719        let possible_candidates: Vec<_> = loop_exits
720            .iter()
721            .filter_map(|(bid, occs, dsum)| {
722                if *occs == best_occurrences && *dsum == best_dist_sum {
723                    Some(*bid)
724                } else {
725                    None
726                }
727            })
728            .collect();
729        let num_possible_candidates = loop_exits.len();
730
731        // If there is exactly one one best candidate, it is easy.
732        // Otherwise we need to split further.
733        if num_possible_candidates > 1 {
734            trace!("Best candidates: {:?}", possible_candidates);
735            // TODO: if we use a lexicographic order we can merge this with the code
736            // above.
737            // Remove the candidates which only lead to errors (panic or unreachable).
738            let candidates: Vec<_> = possible_candidates
739                .iter()
740                .filter(|bid| !cfg.only_reach_error.contains(bid))
741                .collect();
742            // If there is exactly one candidate we select it
743            if candidates.len() == 1 {
744                let exit_id = *candidates[0];
745                exits.insert(exit_id);
746                trace!("Loop {loop_id}: selected the best exit candidate {exit_id}");
747                chosen_loop_exits.insert(loop_id, Some(exit_id));
748            } else {
749                // Otherwise we do not select any exit.
750                // We don't want to select any exit if we are in the below situation
751                // (all paths lead to errors). We added a sanity check below to
752                // catch the situations where there are several exits which don't
753                // lead to errors.
754                //
755                // Example:
756                // ========
757                // ```
758                // loop {
759                //     match ls {
760                //         List::Nil => {
761                //             panic!() // <-- best candidate
762                //         }
763                //         List::Cons(x, tl) => {
764                //             if i == 0 {
765                //                 return x;
766                //             } else {
767                //                 ls = tl;
768                //                 i -= 1;
769                //             }
770                //         }
771                //         _ => {
772                //           unreachable!(); // <-- best candidate (Rustc introduces an `unreachable` case)
773                //         }
774                //     }
775                // }
776                // ```
777                //
778                // Adding this sanity check so that we can see when there are
779                // several candidates.
780                let span = body.body[loop_id].terminator.span; // Taking *a* span from the block
781                sanity_check!(ctx, span, candidates.is_empty());
782                trace!("Loop {loop_id}: did not select an exit candidate because they all lead to panics");
783                chosen_loop_exits.insert(loop_id, None);
784            }
785        } else {
786            // Register the exit, if there is one
787            match best_exit {
788                None => {
789                    // No exit was found
790                    trace!("Loop {loop_id}: could not find an exit candidate");
791                    chosen_loop_exits.insert(loop_id, None);
792                }
793                Some(exit_id) => {
794                    exits.insert(exit_id);
795                    trace!("Loop {loop_id}: selected the unique exit candidate {exit_id}");
796                    chosen_loop_exits.insert(loop_id, Some(exit_id));
797                }
798            }
799        }
800    }
801
802    // Return the chosen exits
803    trace!("Chosen loop exits: {:?}", chosen_loop_exits);
804    chosen_loop_exits
805}
806
807/// Information used to compute the switch exits.
808/// We compute this information for every block in the graph.
809/// Note that we make sure to use immutable sets because we rely a lot
810/// on cloning.
811#[derive(Debug, Clone)]
812struct BlocksInfo {
813    id: src::BlockId,
814    /// All the successors of the block
815    succs: BTreeSet<OrdBlockId>,
816    /// Let's say we put a quantity of water equal to 1 on the block, and the
817    /// water flows downards. Whenever there is a branching, the quantity of
818    /// water gets equally divided between the branches. When the control flows
819    /// join, we put the water back together. The set below computes the amount
820    /// of water received by each descendant of the node.
821    ///
822    /// TODO: there must be a known algorithm which computes this, right?...
823    /// This is exactly this problems:
824    /// https://stackoverflow.com/questions/78221666/algorithm-for-total-flow-through-weighted-directed-acyclic-graph
825    /// TODO: the way I compute this is not efficient.
826    ///
827    /// Remark: in order to rank the nodes, we also use the negation of the
828    /// rank given by the topological order. The last elements of the set
829    /// have the highest flow, that is they are the nodes to which the maximum
830    /// number of paths converge. If several nodes have the same flow, we want
831    /// to take the highest one in the hierarchy: hence the use of the inverse
832    /// of the topological rank.
833    ///
834    /// Ex.:
835    /// ```text
836    /// A  -- we start here
837    /// |
838    /// |---------------------------------------
839    /// |            |            |            |
840    /// B:(0.25,-1)  C:(0.25,-2)  D:(0.25,-3)  E:(0.25,-4)
841    /// |            |            |
842    /// |--------------------------
843    /// |
844    /// F:(0.75,-5)
845    /// |
846    /// |
847    /// G:(0.75,-6)
848    /// ```
849    /// The "best" node (with the highest (flow, rank) in the graph above is F.
850    flow: BTreeSet<FlowBlockId>,
851}
852
853/// Create an [OrdBlockId] from a block id and a rank given by a map giving
854/// a sort (topological in our use cases) over the graph.
855fn make_ord_block_id(
856    block_id: src::BlockId,
857    sort_map: &HashMap<src::BlockId, usize>,
858) -> OrdBlockId {
859    let rank = *sort_map.get(&block_id).unwrap();
860    OrdBlockId { id: block_id, rank }
861}
862
863/// Compute [BlocksInfo] for every block in the graph.
864/// This information is then used to compute the switch exits.
865fn compute_switch_exits_explore(
866    cfg: &CfgInfo,
867    tsort_map: &HashMap<src::BlockId, usize>,
868    memoized: &mut HashMap<src::BlockId, BlocksInfo>,
869    block_id: src::BlockId,
870) {
871    // Check if we already computer the info
872    if memoized.get(&block_id).is_some() {
873        return;
874    }
875
876    // Compute the block information for the children
877    let children_ids: Vec<src::BlockId> = cfg.cfg_no_be.neighbors(block_id).collect_vec();
878    children_ids
879        .iter()
880        .for_each(|bid| compute_switch_exits_explore(cfg, tsort_map, memoized, *bid));
881
882    // Retrieve the information
883    let children: Vec<&BlocksInfo> = children_ids
884        .iter()
885        .map(|bid| memoized.get(bid).unwrap())
886        .collect();
887
888    // Compute the successors
889    // Add the children themselves in their sets of successors
890    let all_succs: BTreeSet<OrdBlockId> = children.iter().fold(BTreeSet::new(), |acc, s| {
891        // Add the successors to the accumulator
892        let mut acc: BTreeSet<_> = acc.union(&s.succs).copied().collect();
893        // Add the child itself
894        acc.insert(make_ord_block_id(s.id, tsort_map));
895        acc
896    });
897
898    // Compute the "flows" (if there are children)
899    // TODO: this is computationally expensive...
900    let mut flow: HashMap<src::BlockId, (BigRational, isize)> = HashMap::new();
901
902    if children.len() > 0 {
903        // We need to divide the initial flow equally between the children
904        let factor = BigRational::new(BigInt::from(1), BigInt::from(children.len()));
905
906        // Small helper
907        let mut add_to_flow = |(id, f0): (src::BlockId, (BigRational, isize))| match flow.get(&id) {
908            None => flow.insert(id, f0),
909            Some(f1) => {
910                let f = f0.0 + f1.0.clone();
911                assert!(f0.1 == f1.1);
912                flow.insert(id, (f, f0.1))
913            }
914        };
915
916        // For each child, multiply the flows of its own children by the ratio,
917        // and add.
918        for child in children {
919            // First, add the child itself
920            let rank = isize::try_from(*tsort_map.get(&child.id).unwrap()).unwrap();
921            add_to_flow((child.id, (factor.clone(), -rank)));
922
923            // Then add its successors
924            for child1 in &child.flow {
925                add_to_flow((
926                    child1.id,
927                    (factor.clone() * child1.rank.0.clone(), child1.rank.1),
928                ));
929            }
930        }
931    }
932
933    // Put everything in an ordered set: the first block id will be the one with
934    // the highest flow, and in case of equality it will be the one with the
935    // smallest block id.
936    let flow: BTreeSet<FlowBlockId> = flow
937        .into_iter()
938        .map(|(id, rank)| BlockWithRank { rank, id })
939        .collect();
940
941    trace!("block: {block_id}, all successors: {all_succs:?}, flow: {flow:?}");
942
943    // Memoize
944    let info = BlocksInfo {
945        id: block_id,
946        succs: all_succs,
947        flow,
948    };
949    memoized.insert(block_id, info.clone());
950}
951
952/// Auxiliary helper
953///
954/// Check if it is possible to reach the exit of an outer switch from [bid]
955/// without going through the [exit_candidate]. We use the graph without
956/// backward edges.
957fn can_reach_outer_exit(
958    cfg: &CfgInfo,
959    outer_exits: &HashSet<src::BlockId>,
960    start_bid: src::BlockId,
961    exit_candidate: src::BlockId,
962) -> bool {
963    // The stack of blocks
964    let mut stack: Vec<src::BlockId> = vec![start_bid];
965    let mut explored: HashSet<src::BlockId> = HashSet::new();
966
967    while let Some(bid) = stack.pop() {
968        // Check if already explored
969        if explored.contains(&bid) {
970            break;
971        }
972        explored.insert(bid);
973
974        // Check if this is the exit candidate
975        if bid == exit_candidate {
976            // Stop exploring
977            break;
978        }
979
980        // Check if this is an outer exit
981        if outer_exits.contains(&bid) {
982            return true;
983        }
984
985        // Add the children to the stack
986        for child in cfg.cfg_no_be.neighbors(bid) {
987            stack.push(child);
988        }
989    }
990
991    false
992}
993
994/// See [`compute_loop_switch_exits`](compute_loop_switch_exits) for
995/// explanations about what "exits" are.
996///
997/// In order to compute the switch exits, we simply recursively compute a
998/// topologically ordered set of "filtered successors" as follows (note
999/// that we work in the CFG *without* back edges):
1000/// - for a block which doesn't branch (only one successor), the filtered
1001///   successors is the set of reachable nodes.
1002/// - for a block which branches, we compute the nodes reachable from all
1003///   the children, and find the "best" intersection between those.
1004///   Note that we find the "best" intersection (a pair of branches which
1005///   maximize the intersection of filtered successors) because some branches
1006///   might never join the control-flow of the other branches, if they contain
1007///   a `break`, `return`, `panic`, etc., like here:
1008///   ```text
1009///   if b { x = 3; } { return; }
1010///   y += x;
1011///   ...
1012///   ```
1013/// Note that with nested switches, the branches of the inner switches might
1014/// goto the exits of the outer switches: for this reason, we give precedence
1015/// to the outer switches.
1016fn compute_switch_exits(
1017    cfg: &CfgInfo,
1018    tsort_map: &HashMap<src::BlockId, usize>,
1019) -> HashMap<src::BlockId, Option<src::BlockId>> {
1020    // Compute the successors info map, starting at the root node
1021    let mut succs_info_map = HashMap::new();
1022    trace!(
1023        "- cfg.cfg:\n{:?}\n- cfg.cfg_no_be:\n{:?}\n- cfg.switch_blocks:\n{:?}",
1024        cfg.cfg,
1025        cfg.cfg_no_be,
1026        cfg.switch_blocks
1027    );
1028    let _ = compute_switch_exits_explore(cfg, tsort_map, &mut succs_info_map, src::BlockId::ZERO);
1029
1030    // We need to give precedence to the outer switches: we thus iterate
1031    // over the switch blocks in topological order.
1032    let mut sorted_switch_blocks: BTreeSet<OrdBlockId> = BTreeSet::new();
1033    for bid in cfg.switch_blocks.iter() {
1034        sorted_switch_blocks.insert(make_ord_block_id(*bid, tsort_map));
1035    }
1036    trace!("sorted_switch_blocks: {:?}", sorted_switch_blocks);
1037
1038    // Debugging: print all the successors
1039    {
1040        trace!("Successors info:\n{}\n", {
1041            let mut out = vec![];
1042            for (bid, info) in &succs_info_map {
1043                out.push(
1044                    format!(
1045                        "{} -> {{succs: {:?}, flow: {:?}}}",
1046                        bid, &info.succs, &info.flow
1047                    )
1048                    .to_string(),
1049                );
1050            }
1051            out.join("\n")
1052        });
1053    }
1054
1055    // For every node which is a switch, retrieve the exit.
1056    // As the set of intersection of successors is topologically sorted, the
1057    // exit should be the first node in the set (if the set is non empty).
1058    // Also, we need to explore the nodes in topological order, to give
1059    // precedence to the outer switches.
1060    let mut exits_set = HashSet::new();
1061    let mut ord_exits_set = BTreeSet::new();
1062    let mut exits = HashMap::new();
1063    for bid in sorted_switch_blocks {
1064        trace!("Finding exit candidate for: {bid:?}");
1065        let bid = bid.id;
1066        let info = succs_info_map.get(&bid).unwrap();
1067        let succs = &info.flow;
1068        // Find the best successor: this is the last one (with the highest flow,
1069        // and the highest reverse topological rank).
1070        if succs.is_empty() {
1071            trace!("{bid:?} has no successors");
1072            exits.insert(bid, None);
1073        } else {
1074            // We have an exit candidate: check that it was not already
1075            // taken by an external switch
1076            let exit = succs.last().unwrap();
1077            trace!("{bid:?} has an exit candidate: {exit:?}");
1078            if exits_set.contains(&exit.id) {
1079                trace!("Ignoring the exit candidate because already taken by an external switch");
1080                exits.insert(bid, None);
1081            } else {
1082                // It was not taken by an external switch.
1083                //
1084                // We must check that we can't reach the exit of an external
1085                // switch from one of the branches, without going through the
1086                // exit candidate.
1087                // We do this by simply checking that we can't reach any exits
1088                // (and use the fact that we explore the switch by using a
1089                // topological order to not discard valid exit candidates).
1090                //
1091                // The reason is that it can lead to code like the following:
1092                // ```
1093                // if ... { // if #1
1094                //   if ... { // if #2
1095                //     ...
1096                //     // here, we have a `goto b1`, where b1 is the exit
1097                //     // of if #2: we thus stop translating the blocks.
1098                //   }
1099                //   else {
1100                //     ...
1101                //     // here, we have a `goto b2`, where b2 is the exit
1102                //     // of if #1: we thus stop translating the blocks.
1103                //   }
1104                //   // We insert code for the block b1 here (which is the exit of
1105                //   // the exit of if #2). However, this block should only
1106                //   // be executed in the branch "then" of the if #2, not in
1107                //   // the branch "else".
1108                //   ...
1109                // }
1110                // else {
1111                //   ...
1112                // }
1113                // ```
1114
1115                // First: we do a quick check (does the set of all successors
1116                // intersect the set of exits for outer blocks?). If yes, we do
1117                // a more precise analysis: we check if we can reach the exit
1118                // *without going through* the exit candidate.
1119                if info.succs.intersection(&ord_exits_set).next().is_none()
1120                    || !can_reach_outer_exit(cfg, &exits_set, bid, exit.id)
1121                {
1122                    trace!("Keeping the exit candidate");
1123                    // No intersection: ok
1124                    exits_set.insert(exit.id);
1125                    ord_exits_set.insert(make_ord_block_id(exit.id, tsort_map));
1126                    exits.insert(bid, Some(exit.id));
1127                } else {
1128                    trace!("Ignoring the exit candidate because of an intersection with external switches");
1129                    exits.insert(bid, None);
1130                }
1131            }
1132        }
1133    }
1134
1135    exits
1136}
1137
1138/// The exits of a graph
1139#[derive(Debug, Clone)]
1140struct ExitInfo {
1141    /// The loop exits
1142    loop_exits: HashMap<src::BlockId, Option<src::BlockId>>,
1143    /// Some loop exits actually belong to outer switches. We still need
1144    /// to track them in the loop exits, in order to know when we should
1145    /// insert a break. However, we need to make sure we don't add the
1146    /// corresponding block in a sequence, after having translated the
1147    /// loop, like so:
1148    /// ```text
1149    /// loop {
1150    ///   loop_body
1151    /// };
1152    /// exit_blocks // OK if the exit "belongs" to the loop
1153    /// ```
1154    ///
1155    /// In case the exit doesn't belong to the loop:
1156    /// ```text
1157    /// if b {
1158    ///   loop {
1159    ///     loop_body
1160    ///   } // no exit blocks after the loop
1161    /// }
1162    /// else {
1163    ///   ...
1164    /// };
1165    /// exit_blocks // the exit blocks are here
1166    /// ```
1167    owned_loop_exits: HashMap<src::BlockId, Option<src::BlockId>>,
1168    /// The switch exits.
1169    /// Note that the switch exits are always owned.
1170    owned_switch_exits: HashMap<src::BlockId, Option<src::BlockId>>,
1171}
1172
1173/// Compute the exits for the loops and the switches (switch on integer and
1174/// if ... then ... else ...). We need to do this because control-flow in MIR
1175/// is destructured: we have gotos everywhere.
1176///
1177/// Let's consider the following piece of code:
1178/// ```text
1179/// if cond1 { ... } else { ... };
1180/// if cond2 { ... } else { ... };
1181/// ```
1182/// Once converted to MIR, the control-flow is destructured, which means we
1183/// have gotos everywhere. When reconstructing the control-flow, we have
1184/// to be careful about the point where we should join the two branches of
1185/// the first if.
1186/// For instance, if we don't notice they should be joined at some point (i.e,
1187/// whatever the branch we take, there is a moment when we go to the exact
1188/// same place, just before the second if), we might generate code like
1189/// this, with some duplicata:
1190/// ```text
1191/// if cond1 { ...; if cond2 { ... } else { ...} }
1192/// else { ...; if cond2 { ... } else { ...} }
1193/// ```
1194///
1195/// Such a reconstructed program is valid, but it is definitely non-optimal:
1196/// it is very different from the original program (making it less clean and
1197/// clear), more bloated, and might involve duplicating the proof effort.
1198///
1199/// For this reason, we need to find the "exit" of the first loop, which is
1200/// the point where the two branches join. Note that this can be a bit tricky,
1201/// because there may be more than two branches (if we do `switch(x) { ... }`),
1202/// and some of them might not join (if they contain a `break`, `panic`,
1203/// `return`, etc.).
1204///
1205/// Finally, some similar issues arise for loops. For instance, let's consider
1206/// the following piece of code:
1207/// ```text
1208/// while cond1 {
1209///   e1;
1210///   if cond2 {
1211///     break;
1212///   }
1213///   e2;
1214/// }
1215/// e3;
1216/// return;
1217/// ```
1218///
1219/// Note that in MIR, the loop gets desugared to an if ... then ... else ....
1220/// From the MIR, We want to generate something like this:
1221/// ```text
1222/// loop {
1223///   if cond1 {
1224///     e1;
1225///     if cond2 {
1226///       break;
1227///     }
1228///     e2;
1229///     continue;
1230///   }
1231///   else {
1232///     break;
1233///   }
1234/// };
1235/// e3;
1236/// return;
1237/// ```
1238///
1239/// But if we don't pay attention, we might end up with that, once again with
1240/// duplications:
1241/// ```text
1242/// loop {
1243///   if cond1 {
1244///     e1;
1245///     if cond2 {
1246///       e3;
1247///       return;
1248///     }
1249///     e2;
1250///     continue;
1251///   }
1252///   else {
1253///     e3;
1254///     return;
1255///   }
1256/// }
1257/// ```
1258/// We thus have to notice that if the loop condition is false, we goto the same
1259/// block as when following the goto introduced by the break inside the loop, and
1260/// this block is dubbed the "loop exit".
1261///
1262/// The following function thus computes the "exits" for loops and switches, which
1263/// are basically the points where control-flow joins.
1264fn compute_loop_switch_exits(
1265    ctx: &mut TransformCtx,
1266    body: &src::ExprBody,
1267    cfg_info: &CfgInfo,
1268) -> ExitInfo {
1269    // Use the CFG without backward edges to topologically sort the nodes.
1270    // Note that `toposort` returns `Err` if and only if it finds cycles (which
1271    // can't happen).
1272    let tsorted: Vec<src::BlockId> = toposort(&cfg_info.cfg_no_be, None).unwrap();
1273
1274    // Build the map: block id -> topological sort rank
1275    let tsort_map: HashMap<src::BlockId, usize> = tsorted
1276        .into_iter()
1277        .enumerate()
1278        .map(|(i, block_id)| (block_id, i))
1279        .collect();
1280    trace!("tsort_map:\n{:?}", tsort_map);
1281
1282    // Compute the loop exits
1283    let loop_exits = compute_loop_exits(ctx, body, cfg_info);
1284    trace!("loop_exits:\n{:?}", loop_exits);
1285
1286    // Compute the switch exits
1287    let switch_exits = compute_switch_exits(cfg_info, &tsort_map);
1288    trace!("switch_exits:\n{:?}", switch_exits);
1289
1290    // Compute the exit info
1291    let mut exit_info = ExitInfo {
1292        loop_exits: HashMap::new(),
1293        owned_loop_exits: HashMap::new(),
1294        owned_switch_exits: HashMap::new(),
1295    };
1296
1297    // We need to give precedence to the outer switches and loops: we thus iterate
1298    // over the blocks in topological order.
1299    let mut sorted_blocks: BTreeSet<OrdBlockId> = BTreeSet::new();
1300    for bid in cfg_info
1301        .loop_entries
1302        .iter()
1303        .chain(cfg_info.switch_blocks.iter())
1304    {
1305        sorted_blocks.insert(make_ord_block_id(*bid, &tsort_map));
1306    }
1307
1308    // Keep track of the exits which were already attributed
1309    let mut all_exits = HashSet::new();
1310
1311    // Put all this together
1312    for bid in sorted_blocks {
1313        let bid = bid.id;
1314        // Check if loop or switch block
1315        if cfg_info.loop_entries.contains(&bid) {
1316            // This is a loop.
1317            //
1318            // For loops, we always register the exit (if there is one).
1319            // However, the exit may be owned by an outer switch (note
1320            // that we already took care of spreading the exits between
1321            // the inner/outer loops)
1322            let exit_id = loop_exits.get(&bid).unwrap();
1323            exit_info.loop_exits.insert(bid, *exit_id);
1324
1325            // Check if we "own" the exit
1326            match exit_id {
1327                None => {
1328                    // No exit
1329                    exit_info.owned_loop_exits.insert(bid, None);
1330                }
1331                Some(exit_id) => {
1332                    if all_exits.contains(exit_id) {
1333                        // We don't own it
1334                        exit_info.owned_loop_exits.insert(bid, None);
1335                    } else {
1336                        // We own it
1337                        exit_info.owned_loop_exits.insert(bid, Some(*exit_id));
1338                        all_exits.insert(*exit_id);
1339                    }
1340                }
1341            }
1342        } else {
1343            // For switches: check that the exit was not already given to a
1344            // loop
1345            let exit_id = switch_exits.get(&bid).unwrap();
1346
1347            match exit_id {
1348                None => {
1349                    // No exit
1350                    exit_info.owned_switch_exits.insert(bid, None);
1351                }
1352                Some(exit_id) => {
1353                    if all_exits.contains(exit_id) {
1354                        // We don't own it
1355                        exit_info.owned_switch_exits.insert(bid, None);
1356                    } else {
1357                        // We own it
1358                        exit_info.owned_switch_exits.insert(bid, Some(*exit_id));
1359                        all_exits.insert(*exit_id);
1360                    }
1361                }
1362            }
1363        }
1364    }
1365
1366    exit_info
1367}
1368
1369fn get_goto_kind(
1370    exits_info: &ExitInfo,
1371    parent_loops: &Vec<src::BlockId>,
1372    switch_exit_blocks: &HashSet<src::BlockId>,
1373    next_block_id: src::BlockId,
1374) -> GotoKind {
1375    // First explore the parent loops in revert order
1376    for (i, loop_id) in parent_loops.iter().rev().enumerate() {
1377        // If we goto a loop entry node: this is a 'continue'
1378        if next_block_id == *loop_id {
1379            return GotoKind::Continue(i);
1380        } else {
1381            // If we goto a loop exit node: this is a 'break'
1382            if let Some(exit_id) = exits_info.loop_exits.get(loop_id).unwrap() {
1383                if next_block_id == *exit_id {
1384                    return GotoKind::Break(i);
1385                }
1386            }
1387        }
1388    }
1389
1390    // Check if the goto exits the current block
1391    if switch_exit_blocks.contains(&next_block_id) {
1392        return GotoKind::ExitBlock;
1393    }
1394
1395    // Default
1396    GotoKind::Goto
1397}
1398
1399enum GotoKind {
1400    Break(usize),
1401    Continue(usize),
1402    ExitBlock,
1403    Goto,
1404}
1405
1406/// `parent_span`: we need some span data for the new statement.
1407/// We use the one for the parent terminator.
1408fn translate_child_block(
1409    info: &mut BlockInfo<'_>,
1410    parent_loops: &Vec<src::BlockId>,
1411    switch_exit_blocks: &HashSet<src::BlockId>,
1412    parent_span: Span,
1413    child_id: src::BlockId,
1414) -> Option<tgt::Block> {
1415    // Check if this is a backward call
1416    match get_goto_kind(info.exits_info, parent_loops, switch_exit_blocks, child_id) {
1417        GotoKind::Break(index) => {
1418            let st = tgt::RawStatement::Break(index);
1419            Some(tgt::Statement::new(parent_span, st).into_block())
1420        }
1421        GotoKind::Continue(index) => {
1422            let st = tgt::RawStatement::Continue(index);
1423            Some(tgt::Statement::new(parent_span, st).into_block())
1424        }
1425        // If we are going to an exit block we simply ignore the goto
1426        GotoKind::ExitBlock => None,
1427        GotoKind::Goto => {
1428            // "Standard" goto: just recursively translate
1429            ensure_sufficient_stack(|| {
1430                Some(translate_block(
1431                    info,
1432                    parent_loops,
1433                    switch_exit_blocks,
1434                    child_id,
1435                ))
1436            })
1437        }
1438    }
1439}
1440
1441fn opt_block_unwrap_or_nop(span: Span, opt_block: Option<tgt::Block>) -> tgt::Block {
1442    opt_block.unwrap_or_else(|| tgt::Statement::new(span, tgt::RawStatement::Nop).into_block())
1443}
1444
1445fn translate_statement(st: &src::Statement) -> Option<tgt::Statement> {
1446    let src_span = st.span;
1447    let st = match st.content.clone() {
1448        src::RawStatement::Assign(place, rvalue) => tgt::RawStatement::Assign(place, rvalue),
1449        src::RawStatement::SetDiscriminant(place, variant_id) => {
1450            tgt::RawStatement::SetDiscriminant(place, variant_id)
1451        }
1452        src::RawStatement::CopyNonOverlapping(copy) => tgt::RawStatement::CopyNonOverlapping(copy),
1453        src::RawStatement::StorageLive(var_id) => tgt::RawStatement::StorageLive(var_id),
1454        src::RawStatement::StorageDead(var_id) => tgt::RawStatement::StorageDead(var_id),
1455        src::RawStatement::Deinit(place) => tgt::RawStatement::Deinit(place),
1456        src::RawStatement::Drop(place) => tgt::RawStatement::Drop(place),
1457        src::RawStatement::Assert(assert) => tgt::RawStatement::Assert(assert),
1458        src::RawStatement::Nop => tgt::RawStatement::Nop,
1459        src::RawStatement::Error(s) => tgt::RawStatement::Error(s),
1460    };
1461    Some(tgt::Statement::new(src_span, st))
1462}
1463
1464fn translate_terminator(
1465    info: &mut BlockInfo<'_>,
1466    parent_loops: &Vec<src::BlockId>,
1467    switch_exit_blocks: &HashSet<src::BlockId>,
1468    terminator: &src::Terminator,
1469) -> tgt::Block {
1470    let src_span = terminator.span;
1471
1472    match &terminator.content {
1473        src::RawTerminator::Abort(kind) => {
1474            tgt::Statement::new(src_span, tgt::RawStatement::Abort(kind.clone())).into_block()
1475        }
1476        src::RawTerminator::Return => {
1477            tgt::Statement::new(src_span, tgt::RawStatement::Return).into_block()
1478        }
1479        src::RawTerminator::Call { call, target } => {
1480            let target_block = translate_child_block(
1481                info,
1482                parent_loops,
1483                switch_exit_blocks,
1484                terminator.span,
1485                *target,
1486            );
1487            let mut block = opt_block_unwrap_or_nop(terminator.span, target_block);
1488            let st = tgt::Statement::new(src_span, tgt::RawStatement::Call(call.clone()));
1489            block.statements.insert(0, st);
1490            block
1491        }
1492        src::RawTerminator::Goto { target } => {
1493            let block = translate_child_block(
1494                info,
1495                parent_loops,
1496                switch_exit_blocks,
1497                terminator.span,
1498                *target,
1499            );
1500            let block = opt_block_unwrap_or_nop(terminator.span, block);
1501            block
1502        }
1503        src::RawTerminator::Switch { discr, targets } => {
1504            // Translate the target expressions
1505            let switch = match &targets {
1506                src::SwitchTargets::If(then_tgt, else_tgt) => {
1507                    // Translate the children expressions
1508                    let then_block = translate_child_block(
1509                        info,
1510                        parent_loops,
1511                        switch_exit_blocks,
1512                        terminator.span,
1513                        *then_tgt,
1514                    );
1515                    // We use the terminator span information in case then
1516                    // then statement is `None`
1517                    let then_block = opt_block_unwrap_or_nop(terminator.span, then_block);
1518                    let else_block = translate_child_block(
1519                        info,
1520                        parent_loops,
1521                        switch_exit_blocks,
1522                        terminator.span,
1523                        *else_tgt,
1524                    );
1525                    let else_block = opt_block_unwrap_or_nop(terminator.span, else_block);
1526
1527                    // Translate
1528                    tgt::Switch::If(discr.clone(), then_block, else_block)
1529                }
1530                src::SwitchTargets::SwitchInt(int_ty, targets, otherwise) => {
1531                    // Note that some branches can be grouped together, like
1532                    // here:
1533                    // ```
1534                    // match e {
1535                    //   E::V1 | E::V2 => ..., // Grouped
1536                    //   E::V3 => ...
1537                    // }
1538                    // ```
1539                    // We detect this by checking if a block has already been
1540                    // translated as one of the branches of the switch.
1541                    //
1542                    // Rk.: note there may be intermediate gotos depending
1543                    // on the MIR we use. Typically, we manage to detect the
1544                    // grouped branches with Optimized MIR, but not with Promoted
1545                    // MIR. See the comment in "tests/src/matches.rs".
1546
1547                    // We link block ids to:
1548                    // - vector of matched integer values
1549                    // - translated blocks
1550                    let mut branches: IndexMap<src::BlockId, (Vec<ScalarValue>, tgt::Block)> =
1551                        IndexMap::new();
1552
1553                    // Translate the children expressions
1554                    for (v, bid) in targets.iter() {
1555                        // Check if the block has already been translated:
1556                        // if yes, it means we need to group branches
1557                        if branches.contains_key(bid) {
1558                            // Already translated: add the matched value to
1559                            // the list of values
1560                            let branch = branches.get_mut(bid).unwrap();
1561                            branch.0.push(*v);
1562                        } else {
1563                            // Not translated: translate it
1564                            let block = translate_child_block(
1565                                info,
1566                                parent_loops,
1567                                switch_exit_blocks,
1568                                terminator.span,
1569                                *bid,
1570                            );
1571                            // We use the terminator span information in case then
1572                            // then statement is `None`
1573                            let block = opt_block_unwrap_or_nop(terminator.span, block);
1574                            branches.insert(*bid, (vec![*v], block));
1575                        }
1576                    }
1577                    let targets_blocks: Vec<(Vec<ScalarValue>, tgt::Block)> =
1578                        branches.into_iter().map(|(_, x)| x).collect();
1579
1580                    let otherwise_block = translate_child_block(
1581                        info,
1582                        parent_loops,
1583                        switch_exit_blocks,
1584                        terminator.span,
1585                        *otherwise,
1586                    );
1587                    // We use the terminator span information in case then
1588                    // then statement is `None`
1589                    let otherwise_block = opt_block_unwrap_or_nop(terminator.span, otherwise_block);
1590
1591                    // Translate
1592                    tgt::Switch::SwitchInt(discr.clone(), *int_ty, targets_blocks, otherwise_block)
1593                }
1594            };
1595
1596            // Return
1597            let span = tgt::combine_switch_targets_span(&switch);
1598            let span = combine_span(&src_span, &span);
1599            let st = tgt::RawStatement::Switch(switch);
1600            tgt::Statement::new(span, st).into_block()
1601        }
1602    }
1603}
1604
1605/// Remark: some values are boxed (here, the returned statement) so that they
1606/// are allocated on the heap. This reduces stack usage (we had problems with
1607/// stack overflows in the past). A more efficient solution would be to use loops
1608/// to make this code constant space, but that would require a serious rewriting.
1609fn translate_block(
1610    info: &mut BlockInfo<'_>,
1611    parent_loops: &Vec<src::BlockId>,
1612    switch_exit_blocks: &HashSet<src::BlockId>,
1613    block_id: src::BlockId,
1614) -> tgt::Block {
1615    // If the user activated this check: check that we didn't already translate
1616    // this block, and insert the block id in the set of already translated blocks.
1617    trace!(
1618        "Parent loops: {:?}, Parent switch exits: {:?}, Block id: {}",
1619        parent_loops,
1620        switch_exit_blocks,
1621        block_id
1622    );
1623    info.explored.insert(block_id);
1624
1625    let block = info.body.body.get(block_id).unwrap();
1626
1627    // Check if we enter a loop: if so, update parent_loops and the current_exit_block
1628    let is_loop = info.cfg.loop_entries.contains(&block_id);
1629    let mut nparent_loops: Vec<src::BlockId>;
1630    let nparent_loops = if info.cfg.loop_entries.contains(&block_id) {
1631        nparent_loops = parent_loops.clone();
1632        nparent_loops.push(block_id);
1633        &nparent_loops
1634    } else {
1635        parent_loops
1636    };
1637
1638    // If we enter a switch or a loop, we need to check if we own the exit
1639    // block, in which case we need to append it to the loop/switch body
1640    // in a sequence
1641    let is_switch = block.terminator.content.is_switch();
1642    let next_block = if is_loop {
1643        *info.exits_info.owned_loop_exits.get(&block_id).unwrap()
1644    } else if is_switch {
1645        *info.exits_info.owned_switch_exits.get(&block_id).unwrap()
1646    } else {
1647        None
1648    };
1649
1650    // If we enter a switch, add the exit block to the set
1651    // of outer exit blocks
1652    let nswitch_exit_blocks = if is_switch {
1653        let mut nexit_blocks = switch_exit_blocks.clone();
1654        match next_block {
1655            None => nexit_blocks,
1656            Some(bid) => {
1657                nexit_blocks.insert(bid);
1658                nexit_blocks
1659            }
1660        }
1661    } else {
1662        switch_exit_blocks.clone()
1663    };
1664
1665    // Translate the terminator and the subsequent blocks.
1666    // Note that this terminator is an option: we might ignore it
1667    // (if it is an exit).
1668
1669    let terminator =
1670        translate_terminator(info, nparent_loops, &nswitch_exit_blocks, &block.terminator);
1671
1672    // Translate the statements inside the block
1673    let statements = block
1674        .statements
1675        .iter()
1676        .filter_map(|st| translate_statement(st))
1677        .collect_vec();
1678
1679    // Prepend the statements to the terminator.
1680    let mut block = if let Some(st) = tgt::Block::from_seq(statements) {
1681        st.merge(terminator)
1682    } else {
1683        terminator
1684    };
1685
1686    if is_loop {
1687        // Put the loop body inside a `Loop`.
1688        block = tgt::Statement::new(block.span, tgt::RawStatement::Loop(block)).into_block()
1689    } else if !is_switch {
1690        assert!(next_block.is_none());
1691    }
1692
1693    // Concatenate the exit expression, if needs be
1694    if let Some(exit_block_id) = next_block {
1695        let next_block = ensure_sufficient_stack(|| {
1696            translate_block(info, parent_loops, switch_exit_blocks, exit_block_id)
1697        });
1698        block = block.merge(next_block);
1699    }
1700
1701    block
1702}
1703
1704fn translate_body_aux(ctx: &mut TransformCtx, src_body: &src::ExprBody) -> tgt::ExprBody {
1705    // Explore the function body to create the control-flow graph without backward
1706    // edges, and identify the loop entries (which are destinations of backward edges).
1707    let cfg_info = build_cfg_info(src_body);
1708    trace!("cfg_info: {:?}", cfg_info);
1709
1710    // Find the exit block for all the loops and switches, if such an exit point
1711    // exists.
1712    let exits_info = compute_loop_switch_exits(ctx, src_body, &cfg_info);
1713
1714    // Debugging
1715    trace!("exits_info:\n{:?}", exits_info);
1716
1717    // Translate the body by reconstructing the loops and the
1718    // conditional branchings.
1719    // Note that we shouldn't get `None`.
1720    let mut explored = HashSet::new();
1721    let mut info = BlockInfo {
1722        cfg: &cfg_info,
1723        body: src_body,
1724        exits_info: &exits_info,
1725        explored: &mut explored,
1726    };
1727    let tgt_body = translate_block(&mut info, &Vec::new(), &HashSet::new(), src::BlockId::ZERO);
1728
1729    tgt::ExprBody {
1730        span: src_body.span,
1731        locals: src_body.locals.clone(),
1732        comments: src_body.comments.clone(),
1733        body: tgt_body,
1734    }
1735}
1736
1737fn translate_body(ctx: &mut TransformCtx, body: &mut gast::Body) {
1738    use gast::Body::{Structured, Unstructured};
1739    let Unstructured(src_body) = body else {
1740        panic!("Called `ullbc_to_llbc` on an already restructured body")
1741    };
1742    trace!("About to translate to ullbc: {:?}", src_body.span);
1743    let tgt_body = translate_body_aux(ctx, src_body);
1744    *body = Structured(tgt_body);
1745}
1746
1747pub struct Transform;
1748impl TransformPass for Transform {
1749    fn transform_ctx(&self, ctx: &mut TransformCtx) {
1750        // Translate the bodies one at a time.
1751        ctx.for_each_body(|ctx, body| {
1752            translate_body(ctx, body);
1753        });
1754
1755        // Print the functions
1756        let fmt_ctx = ctx.into_fmt();
1757        for fun in &ctx.translated.fun_decls {
1758            trace!(
1759                "# Signature:\n{}\n\n# Function definition:\n{}\n",
1760                fmt_ctx.format_object(&fun.signature),
1761                fmt_ctx.format_object(fun),
1762            );
1763        }
1764        // Print the global variables
1765        for global in &ctx.translated.global_decls {
1766            trace!(
1767                "# Type:\n{}\n\n# Global definition:\n{}\n",
1768                fmt_ctx.format_object(&global.ty),
1769                fmt_ctx.format_object(global)
1770            );
1771        }
1772
1773        if ctx.options.print_built_llbc {
1774            info!("# LLBC resulting from control-flow reconstruction:\n\n{ctx}\n",);
1775        }
1776    }
1777}