charon_lib/transform/
ullbc_to_llbc.rs

1//! ULLBC to LLBC
2//!
3//! We reconstruct the control-flow in the Unstructured LLBC.
4//!
5//! The reconstruction algorithm is not written to be efficient (its complexity
6//! is probably very bad), but it was not written to be: this is still an early
7//! stage and we want the algorithm to generate the best reconstruction as
8//! possible. We still need to test the algorithm on more interesting examples,
9//! and will consider making it more efficient once it is a bit mature and well
10//! tested.
11//! Also note that we more importantly focus on making the algorithm sound: the
12//! reconstructed program must always be equivalent to the original MIR program,
13//! and the fact that the reconstruction preserves this property must be obvious.
14//!
15//! Finally, we conjecture the execution time shouldn't be too much a problem:
16//! we don't expect the translation mechanism to be applied on super huge functions
17//! (which will be difficult to formally analyze), and the MIR graphs are actually
18//! not that big because statements are grouped into code blocks (a block is made
19//! of a list of statements, followed by a terminator - branchings and jumps can
20//! only be performed by terminators -, meaning that MIR graphs don't have that
21//! many nodes and edges).
22
23use crate::ast::*;
24use crate::common::ensure_sufficient_stack;
25use crate::errors::sanity_check;
26use crate::formatter::{Formatter, IntoFormatter};
27use crate::llbc_ast as tgt;
28use crate::meta::{combine_span, Span};
29use crate::transform::TransformCtx;
30use crate::ullbc_ast::{self as src};
31use indexmap::IndexMap;
32use itertools::Itertools;
33use num_bigint::BigInt;
34use num_rational::BigRational;
35use petgraph::algo::toposort;
36use petgraph::graphmap::DiGraphMap;
37use petgraph::Direction;
38use std::collections::{BTreeSet, HashMap, HashSet, VecDeque};
39
40use super::ctx::TransformPass;
41
42/// Control-Flow Graph
43type Cfg = DiGraphMap<src::BlockId, ()>;
44
45/// Small utility
46struct BlockInfo<'a> {
47    /// `no_code_duplication`: if true, check that no block is translated twice (this
48    /// can be a sign that the reconstruction is of poor quality, but sometimes
49    /// code duplication is necessary, in the presence of "fused" match branches for
50    /// instance, like in `match ... { Foo | Bar => { ... }}`).
51    no_code_duplication: bool,
52    cfg: &'a CfgInfo,
53    body: &'a src::ExprBody,
54    exits_info: &'a ExitInfo,
55    explored: &'a mut HashSet<src::BlockId>,
56}
57
58/// This structure contains various information about a function's CFG.
59#[derive(Debug)]
60struct CfgInfo {
61    /// The CFG
62    pub cfg: Cfg,
63    /// The CFG where all the backward edges have been removed
64    pub cfg_no_be: Cfg,
65    /// We consider the destination of the backward edges to be loop entries and
66    /// store them here.
67    pub loop_entries: HashSet<src::BlockId>,
68    /// The backward edges
69    pub backward_edges: HashSet<(src::BlockId, src::BlockId)>,
70    /// The blocks whose terminators are a switch are stored here.
71    pub switch_blocks: HashSet<src::BlockId>,
72    /// The set of nodes from where we can only reach error nodes (panic, etc.)
73    pub only_reach_error: HashSet<src::BlockId>,
74}
75
76/// Build the CFGs (the "regular" CFG and the CFG without backward edges) and
77/// compute some information like the loop entries and the switch blocks.
78fn build_cfg_info(body: &src::ExprBody) -> CfgInfo {
79    let mut cfg = CfgInfo {
80        cfg: Cfg::new(),
81        cfg_no_be: Cfg::new(),
82        loop_entries: HashSet::new(),
83        backward_edges: HashSet::new(),
84        switch_blocks: HashSet::new(),
85        only_reach_error: HashSet::new(),
86    };
87
88    // Add the nodes
89    for block_id in body.body.iter_indices() {
90        cfg.cfg.add_node(block_id);
91        cfg.cfg_no_be.add_node(block_id);
92    }
93
94    // Add the edges
95    let ancestors = HashSet::new();
96    let mut explored = HashSet::new();
97    build_cfg_partial_info_edges(
98        &mut cfg,
99        &ancestors,
100        &mut explored,
101        body,
102        src::BlockId::ZERO,
103    );
104
105    cfg
106}
107
108fn block_is_switch(body: &src::ExprBody, block_id: src::BlockId) -> bool {
109    let block = body.body.get(block_id).unwrap();
110    block.terminator.content.is_switch()
111}
112
113/// The terminator of the block is a panic, etc.
114fn block_is_error(body: &src::ExprBody, block_id: src::BlockId) -> bool {
115    let block = body.body.get(block_id).unwrap();
116    use src::RawTerminator::*;
117    match &block.terminator.content {
118        Abort(..) => true,
119        Goto { .. } | Switch { .. } | Return { .. } => false,
120    }
121}
122
123fn build_cfg_partial_info_edges(
124    cfg: &mut CfgInfo,
125    ancestors: &HashSet<src::BlockId>,
126    explored: &mut HashSet<src::BlockId>,
127    body: &src::ExprBody,
128    block_id: src::BlockId,
129) {
130    // Check if we already explored the current node
131    if explored.contains(&block_id) {
132        return;
133    }
134    explored.insert(block_id);
135
136    // Insert the current block in the set of ancestors blocks
137    let mut ancestors = ancestors.clone();
138    ancestors.insert(block_id);
139
140    // Check if it is a switch
141    if block_is_switch(body, block_id) {
142        cfg.switch_blocks.insert(block_id);
143    }
144
145    // Retrieve the block targets
146    let targets = body.body.get(block_id).unwrap().targets();
147    let mut has_backward_edge = false;
148
149    // Add edges for all the targets and explore them, if they are not predecessors
150    for tgt in &targets {
151        // Insert the edge in the "regular" CFG
152        cfg.cfg.add_edge(block_id, *tgt, ());
153
154        // We need to check if it is a backward edge before inserting it in the
155        // CFG without backward edges and exploring it
156        if ancestors.contains(tgt) {
157            // This is a backward edge
158            has_backward_edge = true;
159            cfg.loop_entries.insert(*tgt);
160            cfg.backward_edges.insert((block_id, *tgt));
161        } else {
162            // Not a backward edge: insert the edge and explore
163            cfg.cfg_no_be.add_edge(block_id, *tgt, ());
164            ensure_sufficient_stack(|| {
165                build_cfg_partial_info_edges(cfg, &ancestors, explored, body, *tgt);
166            })
167        }
168    }
169
170    // Check if this node can only reach error nodes:
171    // - we check if the current node ends with an error terminator
172    // - or check that all the targets lead to error nodes
173    // Note that if there is a backward edge, we consider that we don't necessarily
174    // go to error.
175    if !has_backward_edge
176        && (block_is_error(body, block_id)
177            || (
178                // The targets are empty if this is an error node *or* a return node
179                !targets.is_empty() && targets.iter().all(|tgt| cfg.only_reach_error.contains(tgt))
180            ))
181    {
182        cfg.only_reach_error.insert(block_id);
183    }
184}
185
186#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
187struct BlockWithRank<T> {
188    /// A "rank" quantity that we use to order the nodes.
189    /// By placing the `rank` field before the `id`, we make sure that
190    /// we use the rank first in the lexicographic order.
191    rank: T,
192    id: src::BlockId,
193}
194
195type OrdBlockId = BlockWithRank<usize>;
196
197/// For the rank we use:
198/// - a "flow" quantity (see [BlocksInfo])
199/// - the *inverse* rank in the topological sort (i.e., `- topo_rank`)
200type FlowBlockId = BlockWithRank<(BigRational, isize)>;
201
202#[derive(Debug, Clone)]
203struct LoopExitCandidateInfo {
204    /// The occurrences going to this exit.
205    /// For every occurrence, we register the distance between the loop entry
206    /// and the node from which the edge going to the exit starts.
207    /// If later we have to choose between candidates with the same number
208    /// of occurrences, we choose the one with the smallest distances.
209    ///
210    /// Note that it *may* happen that we have several exit candidates referenced
211    /// more than once for one loop. This comes from the fact that whenever we
212    /// reach a node from which the loop entry is not reachable (without using a
213    /// backward edge leading to an outer loop entry), we register this node
214    /// as well as all its children as exit candidates.
215    /// Consider the following example:
216    /// ```text
217    /// while i < max {
218    ///     if cond {
219    ///         break;
220    ///     }
221    ///     s += i;
222    ///     i += 1
223    /// }
224    /// // All the below nodes are exit candidates (each of them is referenced twice)
225    /// s += 1;
226    /// return s;
227    /// ```
228    pub occurrences: Vec<usize>,
229}
230
231/// Check if a loop entry is reachable from a node, in a graph where we remove
232/// the backward edges going directly to the loop entry.
233///
234/// If the loop entry is not reachable, it means that:
235/// - the loop entry is not reachable at all
236/// - or it is only reachable through an outer loop
237///
238/// The starting node should be a (transitive) child of the loop entry.
239/// We use this to find candidates for loop exits.
240fn loop_entry_is_reachable_from_inner(
241    cfg: &CfgInfo,
242    loop_entry: src::BlockId,
243    block_id: src::BlockId,
244) -> bool {
245    // It is reachable in the complete graph. Check if it is reachable by not
246    // going through backward edges which go to outer loops. In practice, we
247    // just need to forbid the use of any backward edges at the exception of
248    // those which go directly to the current loop's entry. This means that we
249    // ignore backward edges to outer loops of course, but also backward edges
250    // to inner loops because we shouldn't need to follow those (there should be
251    // more direct paths).
252
253    // Explore the graph starting at block_id
254    let mut explored: HashSet<src::BlockId> = HashSet::new();
255    let mut stack: VecDeque<src::BlockId> = VecDeque::new();
256    stack.push_back(block_id);
257    while !stack.is_empty() {
258        let bid = stack.pop_front().unwrap();
259        if explored.contains(&bid) {
260            continue;
261        }
262        explored.insert(bid);
263
264        let next_ids = cfg.cfg.neighbors_directed(bid, Direction::Outgoing);
265        for next_id in next_ids {
266            // Check if this is a backward edge
267            if cfg.backward_edges.contains(&(bid, next_id)) {
268                // Backward edge: only allow those going directly to the current
269                // loop's entry
270                if next_id == loop_entry {
271                    // The loop entry is reachable
272                    return true;
273                } else {
274                    // Forbidden edge: ignore
275                    continue;
276                }
277            } else {
278                // Nothing special: add the node to the stack for further
279                // exploration
280                stack.push_back(next_id);
281            }
282        }
283    }
284
285    // The loop entry is not reachable
286    false
287}
288
289struct FilteredLoopParents {
290    remaining_parents: Vec<(src::BlockId, usize)>,
291    removed_parents: Vec<(src::BlockId, usize)>,
292}
293
294fn filter_loop_parents(
295    cfg: &CfgInfo,
296    parent_loops: &Vec<(src::BlockId, usize)>,
297    block_id: src::BlockId,
298) -> Option<FilteredLoopParents> {
299    let mut eliminate: usize = 0;
300    for (loop_id, _ldist) in parent_loops.iter().rev() {
301        if !loop_entry_is_reachable_from_inner(cfg, *loop_id, block_id) {
302            eliminate += 1;
303        } else {
304            break;
305        }
306    }
307
308    if eliminate > 0 {
309        // Split the vector of parents
310        let (remaining_parents, removed_parents) =
311            parent_loops.split_at(parent_loops.len() - eliminate);
312        let (mut remaining_parents, removed_parents) =
313            (remaining_parents.to_vec(), removed_parents.to_vec());
314
315        // Update the distance to the last loop - we just increment the distance
316        // by 1, because from the point of view of the parent loop, we just exited
317        // a block and go to the next sequence of instructions.
318        if !remaining_parents.is_empty() {
319            remaining_parents.last_mut().unwrap().1 += 1;
320        }
321
322        Some(FilteredLoopParents {
323            remaining_parents,
324            removed_parents,
325        })
326    } else {
327        None
328    }
329}
330
331/// List the nodes reachable from a starting point.
332/// We list the nodes and the depth (in the AST) at which they were found.
333fn list_reachable(cfg: &Cfg, start: src::BlockId) -> HashMap<src::BlockId, usize> {
334    let mut reachable: HashMap<src::BlockId, usize> = HashMap::new();
335    let mut stack: VecDeque<(src::BlockId, usize)> = VecDeque::new();
336    stack.push_back((start, 0));
337
338    while !stack.is_empty() {
339        let (bid, dist) = stack.pop_front().unwrap();
340
341        // Ignore this node if we already registered it with a better distance
342        match reachable.get(&bid) {
343            None => (),
344            Some(original_dist) => {
345                if *original_dist < dist {
346                    continue;
347                }
348            }
349        }
350
351        // Inset the node with its distance
352        reachable.insert(bid, dist);
353
354        // Add the children to the stack
355        for child in cfg.neighbors(bid) {
356            stack.push_back((child, dist + 1));
357        }
358    }
359
360    // Return
361    reachable
362}
363
364/// Register a node and its children as exit candidates for a list of
365/// parent loops.
366fn register_children_as_loop_exit_candidates(
367    cfg: &CfgInfo,
368    loop_exits: &mut HashMap<src::BlockId, IndexMap<src::BlockId, LoopExitCandidateInfo>>,
369    removed_parent_loops: &Vec<(src::BlockId, usize)>,
370    block_id: src::BlockId,
371) {
372    // List the reachable nodes
373    let reachable = list_reachable(&cfg.cfg_no_be, block_id);
374
375    let mut base_dist = 0;
376    // For every parent loop, in reverse order (we go from last to first in
377    // order to correctly compute the distances)
378    for (loop_id, loop_dist) in removed_parent_loops.iter().rev() {
379        // Update the distance to the loop entry
380        base_dist += *loop_dist;
381
382        // Retrieve the candidates
383        let candidates = loop_exits.get_mut(loop_id).unwrap();
384
385        // Update them
386        for (bid, dist) in reachable.iter() {
387            let distance = base_dist + *dist;
388            match candidates.get_mut(bid) {
389                None => {
390                    candidates.insert(
391                        *bid,
392                        LoopExitCandidateInfo {
393                            occurrences: vec![distance],
394                        },
395                    );
396                }
397                Some(c) => {
398                    c.occurrences.push(distance);
399                }
400            }
401        }
402    }
403}
404
405/// Compute the loop exit candidates.
406///
407/// There may be several candidates with the same "optimality" (same number of
408/// occurrences, etc.), in which case we choose the first one which was registered
409/// (the order in which we explore the graph is deterministic): this is why we
410/// store the candidates in a linked hash map.
411fn compute_loop_exit_candidates(
412    cfg: &CfgInfo,
413    explored: &mut HashSet<src::BlockId>,
414    ordered_loops: &mut Vec<src::BlockId>,
415    loop_exits: &mut HashMap<src::BlockId, IndexMap<src::BlockId, LoopExitCandidateInfo>>,
416    // List of parent loops, with the distance to the entry of the loop (the distance
417    // is the distance between the current node and the loop entry for the last parent,
418    // and the distance between the parents for the others).
419    mut parent_loops: Vec<(src::BlockId, usize)>,
420    block_id: src::BlockId,
421) {
422    if explored.contains(&block_id) {
423        return;
424    }
425    explored.insert(block_id);
426
427    // Check if we enter a loop - add the corresponding node if necessary
428    if cfg.loop_entries.contains(&block_id) {
429        parent_loops.push((block_id, 1));
430        ordered_loops.push(block_id);
431    } else {
432        // Increase the distance with the parent loop
433        if !parent_loops.is_empty() {
434            parent_loops.last_mut().unwrap().1 += 1;
435        }
436    };
437
438    // Retrieve the children - note that we ignore the back edges
439    let children = cfg.cfg_no_be.neighbors(block_id);
440    for child in children {
441        // If the parent loop entry is not reachable from the child without going
442        // through a backward edge which goes directly to the loop entry, consider
443        // this node a potential exit.
444        ensure_sufficient_stack(|| {
445            match filter_loop_parents(cfg, &parent_loops, child) {
446                None => {
447                    compute_loop_exit_candidates(
448                        cfg,
449                        explored,
450                        ordered_loops,
451                        loop_exits,
452                        parent_loops.clone(),
453                        child,
454                    );
455                }
456                Some(fparent_loops) => {
457                    // We filtered some parent loops: it means this child and its
458                    // children are loop exit candidates for all those loops: we must
459                    // thus register them.
460                    // Note that we register the child *and* its children: the reason
461                    // is that we might do something *then* actually jump to the exit.
462                    // For instance, the following block of code:
463                    // ```
464                    // if cond { break; } else { ... }
465                    // ```
466                    //
467                    // Gets translated in MIR to something like this:
468                    // ```
469                    // bb1: {
470                    //   if cond -> bb2 else -> bb3; // bb2 is not the real exit
471                    // }
472                    //
473                    // bb2: {
474                    //   goto bb4; // bb4 is the real exit
475                    // }
476                    // ```
477                    register_children_as_loop_exit_candidates(
478                        cfg,
479                        loop_exits,
480                        &fparent_loops.removed_parents,
481                        child,
482                    );
483
484                    // Explore, with the filtered parents
485                    compute_loop_exit_candidates(
486                        cfg,
487                        explored,
488                        ordered_loops,
489                        loop_exits,
490                        fparent_loops.remaining_parents,
491                        child,
492                    );
493                }
494            }
495        })
496    }
497}
498
499/// See [`compute_loop_switch_exits`](compute_loop_switch_exits) for
500/// explanations about what "exits" are.
501///
502/// The following function computes the loop exits. It acts as follows.
503///
504/// We keep track of a stack of the loops in which we entered.
505/// It is very easy to check when we enter a loop: loop entries are destinations
506/// of backward edges, which can be spotted with a simple graph exploration (see
507/// [`build_cfg_partial_info`](build_cfg_partial_info).
508/// The criteria to consider whether we exit a loop is the following:
509/// - we exit a loop if we go to a block from which we can't reach the loop
510///   entry at all
511/// - or if we can reach the loop entry, but must use a backward edge which goes
512///   to an outer loop
513///
514/// It is better explained on the following example:
515/// ```text
516/// 'outer while i < max {
517///     'inner while j < max {
518///        j += 1;
519///     }
520///     // (i)
521///     i += 1;
522/// }
523/// ```
524/// If we enter the inner loop then go to (i) from the inner loop, we consider
525/// that we exited the outer loop because:
526/// - we can reach the entry of the inner loop from (i) (by finishing then
527///   starting again an iteration of the outer loop)
528/// - but doing this requires taking a backward edge which goes to the outer loop
529///
530/// Whenever we exit a loop, we save the block we went to as an exit candidate
531/// for this loop. Note that there may by many exit candidates. For instance,
532/// in the below example:
533/// ```text
534/// while ... {
535///    ...
536///    if ... {
537///        // We can't reach the loop entry from here: this is an exit
538///        // candidate
539///        return;
540///    }
541/// }
542/// // This is another exit candidate - and this is the one we want to use
543/// // as the "real" exit...
544/// ...
545/// ```
546///
547/// Also note that it may happen that we go several times to the same exit (if
548/// we use breaks for instance): we record the number of times an exit candidate
549/// is used.
550///
551/// Once we listed all the exit candidates, we find the "best" one for every
552/// loop, starting with the outer loops. We start with outer loops because
553/// inner loops might use breaks to exit to the exit of outer loops: if we
554/// start with the inner loops, the exit which is "natural" for the outer loop
555/// might end up being used for one of the inner loops...
556///
557/// The best exit is the following one:
558/// - it is the one which is used the most times (note that there can be
559///   several candidates which are referenced strictly more than once: see the
560///   comment below)
561/// - if several exits have the same number of occurrences, we choose the one
562///   for which we goto the "earliest" (earliest meaning that the goto is close to
563///   the loop entry node in the AST). The reason is that all the loops should
564///   have an outer if ... then ... else ... which executes the loop body or goes
565///   to the exit (note that this is not necessarily the first
566///   if ... then ... else ... we find: loop conditions can be arbitrary
567///   expressions, containing branchings).
568///
569/// # Several candidates for a loop exit:
570/// =====================================
571/// There used to be a sanity check to ensure there are no two different
572/// candidates with exactly the same number of occurrences and distance from
573/// the entry of the loop, if the number of occurrences is > 1.
574///
575/// We removed it because it does happen, for instance here (the match
576/// introduces an `unreachable` node, and it has the same number of
577/// occurrences and the same distance to the loop entry as the `panic`
578/// node):
579///
580/// ```text
581/// pub fn list_nth_mut_loop_pair<'a, T>(
582///     mut ls: &'a mut List<T>,
583///     mut i: u32,
584/// ) -> &'a mut T {
585///     loop {
586///         match ls {
587///             List::Nil => {
588///                 panic!() // <-- best candidate
589///             }
590///             List::Cons(x, tl) => {
591///                 if i == 0 {
592///                     return x;
593///                 } else {
594///                     ls = tl;
595///                     i -= 1;
596///                 }
597///             }
598///             _ => {
599///               // Note that Rustc always introduces an unreachable branch after
600///               // desugaring matches.
601///               unreachable!(), // <-- best candidate
602///             }
603///         }
604///     }
605/// }
606/// ```
607///
608/// When this happens we choose an exit candidate whose edges don't necessarily
609/// lead to an error (above there are none, so we don't choose any exits). Note
610/// that this last condition is important to prevent loops from being unnecessarily
611/// nested:
612///
613/// ```text
614/// pub fn nested_loops_enum(step_out: usize, step_in: usize) -> usize {
615///     let mut s = 0;
616///
617///     for _ in 0..128 { // We don't want this loop to be nested with the loops below
618///         s += 1;
619///     }
620///
621///     for _ in 0..(step_out) {
622///         for _ in 0..(step_in) {
623///             s += 1;
624///         }
625///     }
626///
627///     s
628/// }
629/// ```
630fn compute_loop_exits(
631    ctx: &mut TransformCtx,
632    body: &src::ExprBody,
633    cfg: &CfgInfo,
634) -> HashMap<src::BlockId, Option<src::BlockId>> {
635    let mut explored = HashSet::new();
636    let mut ordered_loops = Vec::new();
637    let mut loop_exits = HashMap::new();
638
639    // Initialize the loop exits candidates
640    for loop_id in &cfg.loop_entries {
641        loop_exits.insert(*loop_id, IndexMap::new());
642    }
643
644    // Compute the candidates
645    compute_loop_exit_candidates(
646        cfg,
647        &mut explored,
648        &mut ordered_loops,
649        &mut loop_exits,
650        Vec::new(),
651        src::BlockId::ZERO,
652    );
653
654    {
655        // Debugging
656        let candidates: Vec<String> = loop_exits
657            .iter()
658            .map(|(loop_id, candidates)| format!("{loop_id} -> {candidates:?}"))
659            .collect();
660        trace!("Loop exit candidates:\n{}", candidates.join("\n"));
661    }
662
663    // Choose one candidate among the potential candidates.
664    let mut exits: HashSet<src::BlockId> = HashSet::new();
665    let mut chosen_loop_exits: HashMap<src::BlockId, Option<src::BlockId>> = HashMap::new();
666    // For every loop
667    for loop_id in ordered_loops {
668        // Check the candidates.
669        // Ignore the candidates which have already been chosen as exits for other
670        // loops (which should be outer loops).
671        // We choose the exit with:
672        // - the most occurrences
673        // - the least total distance (if there are several possibilities)
674        // - doesn't necessarily lead to an error (panic, unreachable)
675
676        // First:
677        // - filter the candidates
678        // - compute the number of occurrences
679        // - compute the sum of distances
680        // TODO: we could simply order by using a lexicographic order
681        let loop_exits = loop_exits
682            .get(&loop_id)
683            .unwrap()
684            .iter()
685            // If candidate already selected for another loop: ignore
686            .filter(|(candidate_id, _)| !exits.contains(candidate_id))
687            .map(|(candidate_id, candidate_info)| {
688                let num_occurrences = candidate_info.occurrences.len();
689                let dist_sum = candidate_info.occurrences.iter().sum();
690                (*candidate_id, num_occurrences, dist_sum)
691            })
692            .collect_vec();
693
694        trace!(
695            "Loop {}: possible exits:\n{}",
696            loop_id,
697            loop_exits
698                .iter()
699                .map(|(bid, occs, dsum)| format!(
700                    "{bid} -> {{ occurrences: {occs}, dist_sum: {dsum} }}",
701                ))
702                .collect::<Vec<String>>()
703                .join("\n")
704        );
705
706        // Second: actually select the proper candidate.
707
708        // We find the one with the highest occurrence and the smallest distance
709        // from the entry of the loop (note that we take care of listing the exit
710        // candidates in a deterministic order).
711        let mut best_exit: Option<src::BlockId> = None;
712        let mut best_occurrences = 0;
713        let mut best_dist_sum = std::usize::MAX;
714        for (candidate_id, occurrences, dist_sum) in &loop_exits {
715            if (*occurrences > best_occurrences)
716                || (*occurrences == best_occurrences && *dist_sum < best_dist_sum)
717            {
718                best_exit = Some(*candidate_id);
719                best_occurrences = *occurrences;
720                best_dist_sum = *dist_sum;
721            }
722        }
723
724        let possible_candidates: Vec<_> = loop_exits
725            .iter()
726            .filter_map(|(bid, occs, dsum)| {
727                if *occs == best_occurrences && *dsum == best_dist_sum {
728                    Some(*bid)
729                } else {
730                    None
731                }
732            })
733            .collect();
734        let num_possible_candidates = loop_exits.len();
735
736        // If there is exactly one one best candidate, it is easy.
737        // Otherwise we need to split further.
738        if num_possible_candidates > 1 {
739            trace!("Best candidates: {:?}", possible_candidates);
740            // TODO: if we use a lexicographic order we can merge this with the code
741            // above.
742            // Remove the candidates which only lead to errors (panic or unreachable).
743            let candidates: Vec<_> = possible_candidates
744                .iter()
745                .filter(|bid| !cfg.only_reach_error.contains(bid))
746                .collect();
747            // If there is exactly one candidate we select it
748            if candidates.len() == 1 {
749                let exit_id = *candidates[0];
750                exits.insert(exit_id);
751                trace!("Loop {loop_id}: selected the best exit candidate {exit_id}");
752                chosen_loop_exits.insert(loop_id, Some(exit_id));
753            } else {
754                // Otherwise we do not select any exit.
755                // We don't want to select any exit if we are in the below situation
756                // (all paths lead to errors). We added a sanity check below to
757                // catch the situations where there are several exits which don't
758                // lead to errors.
759                //
760                // Example:
761                // ========
762                // ```
763                // loop {
764                //     match ls {
765                //         List::Nil => {
766                //             panic!() // <-- best candidate
767                //         }
768                //         List::Cons(x, tl) => {
769                //             if i == 0 {
770                //                 return x;
771                //             } else {
772                //                 ls = tl;
773                //                 i -= 1;
774                //             }
775                //         }
776                //         _ => {
777                //           unreachable!(); // <-- best candidate (Rustc introduces an `unreachable` case)
778                //         }
779                //     }
780                // }
781                // ```
782                //
783                // Adding this sanity check so that we can see when there are
784                // several candidates.
785                let span = body.body[loop_id].terminator.span; // Taking *a* span from the block
786                sanity_check!(ctx, span, candidates.is_empty());
787                trace!("Loop {loop_id}: did not select an exit candidate because they all lead to panics");
788                chosen_loop_exits.insert(loop_id, None);
789            }
790        } else {
791            // Register the exit, if there is one
792            match best_exit {
793                None => {
794                    // No exit was found
795                    trace!("Loop {loop_id}: could not find an exit candidate");
796                    chosen_loop_exits.insert(loop_id, None);
797                }
798                Some(exit_id) => {
799                    exits.insert(exit_id);
800                    trace!("Loop {loop_id}: selected the unique exit candidate {exit_id}");
801                    chosen_loop_exits.insert(loop_id, Some(exit_id));
802                }
803            }
804        }
805    }
806
807    // Return the chosen exits
808    trace!("Chosen loop exits: {:?}", chosen_loop_exits);
809    chosen_loop_exits
810}
811
812/// Information used to compute the switch exits.
813/// We compute this information for every block in the graph.
814/// Note that we make sure to use immutable sets because we rely a lot
815/// on cloning.
816#[derive(Debug, Clone)]
817struct BlocksInfo {
818    id: src::BlockId,
819    /// All the successors of the block
820    succs: BTreeSet<OrdBlockId>,
821    /// Let's say we put a quantity of water equal to 1 on the block, and the
822    /// water flows downards. Whenever there is a branching, the quantity of
823    /// water gets equally divided between the branches. When the control flows
824    /// join, we put the water back together. The set below computes the amount
825    /// of water received by each descendant of the node.
826    ///
827    /// TODO: there must be a known algorithm which computes this, right?...
828    /// This is exactly this problems:
829    /// https://stackoverflow.com/questions/78221666/algorithm-for-total-flow-through-weighted-directed-acyclic-graph
830    /// TODO: the way I compute this is not efficient.
831    ///
832    /// Remark: in order to rank the nodes, we also use the negation of the
833    /// rank given by the topological order. The last elements of the set
834    /// have the highest flow, that is they are the nodes to which the maximum
835    /// number of paths converge. If several nodes have the same flow, we want
836    /// to take the highest one in the hierarchy: hence the use of the inverse
837    /// of the topological rank.
838    ///
839    /// Ex.:
840    /// ```text
841    /// A  -- we start here
842    /// |
843    /// |---------------------------------------
844    /// |            |            |            |
845    /// B:(0.25,-1)  C:(0.25,-2)  D:(0.25,-3)  E:(0.25,-4)
846    /// |            |            |
847    /// |--------------------------
848    /// |
849    /// F:(0.75,-5)
850    /// |
851    /// |
852    /// G:(0.75,-6)
853    /// ```
854    /// The "best" node (with the highest (flow, rank) in the graph above is F.
855    flow: BTreeSet<FlowBlockId>,
856}
857
858/// Create an [OrdBlockId] from a block id and a rank given by a map giving
859/// a sort (topological in our use cases) over the graph.
860fn make_ord_block_id(
861    block_id: src::BlockId,
862    sort_map: &HashMap<src::BlockId, usize>,
863) -> OrdBlockId {
864    let rank = *sort_map.get(&block_id).unwrap();
865    OrdBlockId { id: block_id, rank }
866}
867
868/// Compute [BlocksInfo] for every block in the graph.
869/// This information is then used to compute the switch exits.
870fn compute_switch_exits_explore(
871    cfg: &CfgInfo,
872    tsort_map: &HashMap<src::BlockId, usize>,
873    memoized: &mut HashMap<src::BlockId, BlocksInfo>,
874    block_id: src::BlockId,
875) {
876    // Check if we already computer the info
877    if memoized.get(&block_id).is_some() {
878        return;
879    }
880
881    // Compute the block information for the children
882    let children_ids: Vec<src::BlockId> = cfg.cfg_no_be.neighbors(block_id).collect_vec();
883    children_ids
884        .iter()
885        .for_each(|bid| compute_switch_exits_explore(cfg, tsort_map, memoized, *bid));
886
887    // Retrieve the information
888    let children: Vec<&BlocksInfo> = children_ids
889        .iter()
890        .map(|bid| memoized.get(bid).unwrap())
891        .collect();
892
893    // Compute the successors
894    // Add the children themselves in their sets of successors
895    let all_succs: BTreeSet<OrdBlockId> = children.iter().fold(BTreeSet::new(), |acc, s| {
896        // Add the successors to the accumulator
897        let mut acc: BTreeSet<_> = acc.union(&s.succs).copied().collect();
898        // Add the child itself
899        acc.insert(make_ord_block_id(s.id, tsort_map));
900        acc
901    });
902
903    // Compute the "flows" (if there are children)
904    // TODO: this is computationally expensive...
905    let mut flow: HashMap<src::BlockId, (BigRational, isize)> = HashMap::new();
906
907    if children.len() > 0 {
908        // We need to divide the initial flow equally between the children
909        let factor = BigRational::new(BigInt::from(1), BigInt::from(children.len()));
910
911        // Small helper
912        let mut add_to_flow = |(id, f0): (src::BlockId, (BigRational, isize))| match flow.get(&id) {
913            None => flow.insert(id, f0),
914            Some(f1) => {
915                let f = f0.0 + f1.0.clone();
916                assert!(f0.1 == f1.1);
917                flow.insert(id, (f, f0.1))
918            }
919        };
920
921        // For each child, multiply the flows of its own children by the ratio,
922        // and add.
923        for child in children {
924            // First, add the child itself
925            let rank = isize::try_from(*tsort_map.get(&child.id).unwrap()).unwrap();
926            add_to_flow((child.id, (factor.clone(), -rank)));
927
928            // Then add its successors
929            for child1 in &child.flow {
930                add_to_flow((
931                    child1.id,
932                    (factor.clone() * child1.rank.0.clone(), child1.rank.1),
933                ));
934            }
935        }
936    }
937
938    // Put everything in an ordered set: the first block id will be the one with
939    // the highest flow, and in case of equality it will be the one with the
940    // smallest block id.
941    let flow: BTreeSet<FlowBlockId> = flow
942        .into_iter()
943        .map(|(id, rank)| BlockWithRank { rank, id })
944        .collect();
945
946    trace!("block: {block_id}, all successors: {all_succs:?}, flow: {flow:?}");
947
948    // Memoize
949    let info = BlocksInfo {
950        id: block_id,
951        succs: all_succs,
952        flow,
953    };
954    memoized.insert(block_id, info.clone());
955}
956
957/// Auxiliary helper
958///
959/// Check if it is possible to reach the exit of an outer switch from [bid]
960/// without going through the [exit_candidate]. We use the graph without
961/// backward edges.
962fn can_reach_outer_exit(
963    cfg: &CfgInfo,
964    outer_exits: &HashSet<src::BlockId>,
965    start_bid: src::BlockId,
966    exit_candidate: src::BlockId,
967) -> bool {
968    // The stack of blocks
969    let mut stack: Vec<src::BlockId> = vec![start_bid];
970    let mut explored: HashSet<src::BlockId> = HashSet::new();
971
972    while let Some(bid) = stack.pop() {
973        // Check if already explored
974        if explored.contains(&bid) {
975            break;
976        }
977        explored.insert(bid);
978
979        // Check if this is the exit candidate
980        if bid == exit_candidate {
981            // Stop exploring
982            break;
983        }
984
985        // Check if this is an outer exit
986        if outer_exits.contains(&bid) {
987            return true;
988        }
989
990        // Add the children to the stack
991        for child in cfg.cfg_no_be.neighbors(bid) {
992            stack.push(child);
993        }
994    }
995
996    false
997}
998
999/// See [`compute_loop_switch_exits`](compute_loop_switch_exits) for
1000/// explanations about what "exits" are.
1001///
1002/// In order to compute the switch exits, we simply recursively compute a
1003/// topologically ordered set of "filtered successors" as follows (note
1004/// that we work in the CFG *without* back edges):
1005/// - for a block which doesn't branch (only one successor), the filtered
1006///   successors is the set of reachable nodes.
1007/// - for a block which branches, we compute the nodes reachable from all
1008///   the children, and find the "best" intersection between those.
1009///   Note that we find the "best" intersection (a pair of branches which
1010///   maximize the intersection of filtered successors) because some branches
1011///   might never join the control-flow of the other branches, if they contain
1012///   a `break`, `return`, `panic`, etc., like here:
1013///   ```text
1014///   if b { x = 3; } { return; }
1015///   y += x;
1016///   ...
1017///   ```
1018/// Note that with nested switches, the branches of the inner switches might
1019/// goto the exits of the outer switches: for this reason, we give precedence
1020/// to the outer switches.
1021fn compute_switch_exits(
1022    cfg: &CfgInfo,
1023    tsort_map: &HashMap<src::BlockId, usize>,
1024) -> HashMap<src::BlockId, Option<src::BlockId>> {
1025    // Compute the successors info map, starting at the root node
1026    let mut succs_info_map = HashMap::new();
1027    trace!(
1028        "- cfg.cfg:\n{:?}\n- cfg.cfg_no_be:\n{:?}\n- cfg.switch_blocks:\n{:?}",
1029        cfg.cfg,
1030        cfg.cfg_no_be,
1031        cfg.switch_blocks
1032    );
1033    let _ = compute_switch_exits_explore(cfg, tsort_map, &mut succs_info_map, src::BlockId::ZERO);
1034
1035    // We need to give precedence to the outer switches: we thus iterate
1036    // over the switch blocks in topological order.
1037    let mut sorted_switch_blocks: BTreeSet<OrdBlockId> = BTreeSet::new();
1038    for bid in cfg.switch_blocks.iter() {
1039        sorted_switch_blocks.insert(make_ord_block_id(*bid, tsort_map));
1040    }
1041    trace!("sorted_switch_blocks: {:?}", sorted_switch_blocks);
1042
1043    // Debugging: print all the successors
1044    {
1045        trace!("Successors info:\n{}\n", {
1046            let mut out = vec![];
1047            for (bid, info) in &succs_info_map {
1048                out.push(
1049                    format!(
1050                        "{} -> {{succs: {:?}, flow: {:?}}}",
1051                        bid, &info.succs, &info.flow
1052                    )
1053                    .to_string(),
1054                );
1055            }
1056            out.join("\n")
1057        });
1058    }
1059
1060    // For every node which is a switch, retrieve the exit.
1061    // As the set of intersection of successors is topologically sorted, the
1062    // exit should be the first node in the set (if the set is non empty).
1063    // Also, we need to explore the nodes in topological order, to give
1064    // precedence to the outer switches.
1065    let mut exits_set = HashSet::new();
1066    let mut ord_exits_set = BTreeSet::new();
1067    let mut exits = HashMap::new();
1068    for bid in sorted_switch_blocks {
1069        trace!("Finding exit candidate for: {bid:?}");
1070        let bid = bid.id;
1071        let info = succs_info_map.get(&bid).unwrap();
1072        let succs = &info.flow;
1073        // Find the best successor: this is the last one (with the highest flow,
1074        // and the highest reverse topological rank).
1075        if succs.is_empty() {
1076            trace!("{bid:?} has no successors");
1077            exits.insert(bid, None);
1078        } else {
1079            // We have an exit candidate: check that it was not already
1080            // taken by an external switch
1081            let exit = succs.last().unwrap();
1082            trace!("{bid:?} has an exit candidate: {exit:?}");
1083            if exits_set.contains(&exit.id) {
1084                trace!("Ignoring the exit candidate because already taken by an external switch");
1085                exits.insert(bid, None);
1086            } else {
1087                // It was not taken by an external switch.
1088                //
1089                // We must check that we can't reach the exit of an external
1090                // switch from one of the branches, without going through the
1091                // exit candidate.
1092                // We do this by simply checking that we can't reach any exits
1093                // (and use the fact that we explore the switch by using a
1094                // topological order to not discard valid exit candidates).
1095                //
1096                // The reason is that it can lead to code like the following:
1097                // ```
1098                // if ... { // if #1
1099                //   if ... { // if #2
1100                //     ...
1101                //     // here, we have a `goto b1`, where b1 is the exit
1102                //     // of if #2: we thus stop translating the blocks.
1103                //   }
1104                //   else {
1105                //     ...
1106                //     // here, we have a `goto b2`, where b2 is the exit
1107                //     // of if #1: we thus stop translating the blocks.
1108                //   }
1109                //   // We insert code for the block b1 here (which is the exit of
1110                //   // the exit of if #2). However, this block should only
1111                //   // be executed in the branch "then" of the if #2, not in
1112                //   // the branch "else".
1113                //   ...
1114                // }
1115                // else {
1116                //   ...
1117                // }
1118                // ```
1119
1120                // First: we do a quick check (does the set of all successors
1121                // intersect the set of exits for outer blocks?). If yes, we do
1122                // a more precise analysis: we check if we can reach the exit
1123                // *without going through* the exit candidate.
1124                if info.succs.intersection(&ord_exits_set).next().is_none()
1125                    || !can_reach_outer_exit(cfg, &exits_set, bid, exit.id)
1126                {
1127                    trace!("Keeping the exit candidate");
1128                    // No intersection: ok
1129                    exits_set.insert(exit.id);
1130                    ord_exits_set.insert(make_ord_block_id(exit.id, tsort_map));
1131                    exits.insert(bid, Some(exit.id));
1132                } else {
1133                    trace!("Ignoring the exit candidate because of an intersection with external switches");
1134                    exits.insert(bid, None);
1135                }
1136            }
1137        }
1138    }
1139
1140    exits
1141}
1142
1143/// The exits of a graph
1144#[derive(Debug, Clone)]
1145struct ExitInfo {
1146    /// The loop exits
1147    loop_exits: HashMap<src::BlockId, Option<src::BlockId>>,
1148    /// Some loop exits actually belong to outer switches. We still need
1149    /// to track them in the loop exits, in order to know when we should
1150    /// insert a break. However, we need to make sure we don't add the
1151    /// corresponding block in a sequence, after having translated the
1152    /// loop, like so:
1153    /// ```text
1154    /// loop {
1155    ///   loop_body
1156    /// };
1157    /// exit_blocks // OK if the exit "belongs" to the loop
1158    /// ```
1159    ///
1160    /// In case the exit doesn't belong to the loop:
1161    /// ```text
1162    /// if b {
1163    ///   loop {
1164    ///     loop_body
1165    ///   } // no exit blocks after the loop
1166    /// }
1167    /// else {
1168    ///   ...
1169    /// };
1170    /// exit_blocks // the exit blocks are here
1171    /// ```
1172    owned_loop_exits: HashMap<src::BlockId, Option<src::BlockId>>,
1173    /// The switch exits.
1174    /// Note that the switch exits are always owned.
1175    owned_switch_exits: HashMap<src::BlockId, Option<src::BlockId>>,
1176}
1177
1178/// Compute the exits for the loops and the switches (switch on integer and
1179/// if ... then ... else ...). We need to do this because control-flow in MIR
1180/// is destructured: we have gotos everywhere.
1181///
1182/// Let's consider the following piece of code:
1183/// ```text
1184/// if cond1 { ... } else { ... };
1185/// if cond2 { ... } else { ... };
1186/// ```
1187/// Once converted to MIR, the control-flow is destructured, which means we
1188/// have gotos everywhere. When reconstructing the control-flow, we have
1189/// to be careful about the point where we should join the two branches of
1190/// the first if.
1191/// For instance, if we don't notice they should be joined at some point (i.e,
1192/// whatever the branch we take, there is a moment when we go to the exact
1193/// same place, just before the second if), we might generate code like
1194/// this, with some duplicata:
1195/// ```text
1196/// if cond1 { ...; if cond2 { ... } else { ...} }
1197/// else { ...; if cond2 { ... } else { ...} }
1198/// ```
1199///
1200/// Such a reconstructed program is valid, but it is definitely non-optimal:
1201/// it is very different from the original program (making it less clean and
1202/// clear), more bloated, and might involve duplicating the proof effort.
1203///
1204/// For this reason, we need to find the "exit" of the first loop, which is
1205/// the point where the two branches join. Note that this can be a bit tricky,
1206/// because there may be more than two branches (if we do `switch(x) { ... }`),
1207/// and some of them might not join (if they contain a `break`, `panic`,
1208/// `return`, etc.).
1209///
1210/// Finally, some similar issues arise for loops. For instance, let's consider
1211/// the following piece of code:
1212/// ```text
1213/// while cond1 {
1214///   e1;
1215///   if cond2 {
1216///     break;
1217///   }
1218///   e2;
1219/// }
1220/// e3;
1221/// return;
1222/// ```
1223///
1224/// Note that in MIR, the loop gets desugared to an if ... then ... else ....
1225/// From the MIR, We want to generate something like this:
1226/// ```text
1227/// loop {
1228///   if cond1 {
1229///     e1;
1230///     if cond2 {
1231///       break;
1232///     }
1233///     e2;
1234///     continue;
1235///   }
1236///   else {
1237///     break;
1238///   }
1239/// };
1240/// e3;
1241/// return;
1242/// ```
1243///
1244/// But if we don't pay attention, we might end up with that, once again with
1245/// duplications:
1246/// ```text
1247/// loop {
1248///   if cond1 {
1249///     e1;
1250///     if cond2 {
1251///       e3;
1252///       return;
1253///     }
1254///     e2;
1255///     continue;
1256///   }
1257///   else {
1258///     e3;
1259///     return;
1260///   }
1261/// }
1262/// ```
1263/// We thus have to notice that if the loop condition is false, we goto the same
1264/// block as when following the goto introduced by the break inside the loop, and
1265/// this block is dubbed the "loop exit".
1266///
1267/// The following function thus computes the "exits" for loops and switches, which
1268/// are basically the points where control-flow joins.
1269fn compute_loop_switch_exits(
1270    ctx: &mut TransformCtx,
1271    body: &src::ExprBody,
1272    cfg_info: &CfgInfo,
1273) -> ExitInfo {
1274    // Use the CFG without backward edges to topologically sort the nodes.
1275    // Note that `toposort` returns `Err` if and only if it finds cycles (which
1276    // can't happen).
1277    let tsorted: Vec<src::BlockId> = toposort(&cfg_info.cfg_no_be, None).unwrap();
1278
1279    // Build the map: block id -> topological sort rank
1280    let tsort_map: HashMap<src::BlockId, usize> = tsorted
1281        .into_iter()
1282        .enumerate()
1283        .map(|(i, block_id)| (block_id, i))
1284        .collect();
1285    trace!("tsort_map:\n{:?}", tsort_map);
1286
1287    // Compute the loop exits
1288    let loop_exits = compute_loop_exits(ctx, body, cfg_info);
1289    trace!("loop_exits:\n{:?}", loop_exits);
1290
1291    // Compute the switch exits
1292    let switch_exits = compute_switch_exits(cfg_info, &tsort_map);
1293    trace!("switch_exits:\n{:?}", switch_exits);
1294
1295    // Compute the exit info
1296    let mut exit_info = ExitInfo {
1297        loop_exits: HashMap::new(),
1298        owned_loop_exits: HashMap::new(),
1299        owned_switch_exits: HashMap::new(),
1300    };
1301
1302    // We need to give precedence to the outer switches and loops: we thus iterate
1303    // over the blocks in topological order.
1304    let mut sorted_blocks: BTreeSet<OrdBlockId> = BTreeSet::new();
1305    for bid in cfg_info
1306        .loop_entries
1307        .iter()
1308        .chain(cfg_info.switch_blocks.iter())
1309    {
1310        sorted_blocks.insert(make_ord_block_id(*bid, &tsort_map));
1311    }
1312
1313    // Keep track of the exits which were already attributed
1314    let mut all_exits = HashSet::new();
1315
1316    // Put all this together
1317    for bid in sorted_blocks {
1318        let bid = bid.id;
1319        // Check if loop or switch block
1320        if cfg_info.loop_entries.contains(&bid) {
1321            // This is a loop.
1322            //
1323            // For loops, we always register the exit (if there is one).
1324            // However, the exit may be owned by an outer switch (note
1325            // that we already took care of spreading the exits between
1326            // the inner/outer loops)
1327            let exit_id = loop_exits.get(&bid).unwrap();
1328            exit_info.loop_exits.insert(bid, *exit_id);
1329
1330            // Check if we "own" the exit
1331            match exit_id {
1332                None => {
1333                    // No exit
1334                    exit_info.owned_loop_exits.insert(bid, None);
1335                }
1336                Some(exit_id) => {
1337                    if all_exits.contains(exit_id) {
1338                        // We don't own it
1339                        exit_info.owned_loop_exits.insert(bid, None);
1340                    } else {
1341                        // We own it
1342                        exit_info.owned_loop_exits.insert(bid, Some(*exit_id));
1343                        all_exits.insert(*exit_id);
1344                    }
1345                }
1346            }
1347        } else {
1348            // For switches: check that the exit was not already given to a
1349            // loop
1350            let exit_id = switch_exits.get(&bid).unwrap();
1351
1352            match exit_id {
1353                None => {
1354                    // No exit
1355                    exit_info.owned_switch_exits.insert(bid, None);
1356                }
1357                Some(exit_id) => {
1358                    if all_exits.contains(exit_id) {
1359                        // We don't own it
1360                        exit_info.owned_switch_exits.insert(bid, None);
1361                    } else {
1362                        // We own it
1363                        exit_info.owned_switch_exits.insert(bid, Some(*exit_id));
1364                        all_exits.insert(*exit_id);
1365                    }
1366                }
1367            }
1368        }
1369    }
1370
1371    exit_info
1372}
1373
1374fn get_goto_kind(
1375    exits_info: &ExitInfo,
1376    parent_loops: &Vec<src::BlockId>,
1377    switch_exit_blocks: &HashSet<src::BlockId>,
1378    next_block_id: src::BlockId,
1379) -> GotoKind {
1380    // First explore the parent loops in revert order
1381    for (i, loop_id) in parent_loops.iter().rev().enumerate() {
1382        // If we goto a loop entry node: this is a 'continue'
1383        if next_block_id == *loop_id {
1384            return GotoKind::Continue(i);
1385        } else {
1386            // If we goto a loop exit node: this is a 'break'
1387            if let Some(exit_id) = exits_info.loop_exits.get(loop_id).unwrap() {
1388                if next_block_id == *exit_id {
1389                    return GotoKind::Break(i);
1390                }
1391            }
1392        }
1393    }
1394
1395    // Check if the goto exits the current block
1396    if switch_exit_blocks.contains(&next_block_id) {
1397        return GotoKind::ExitBlock;
1398    }
1399
1400    // Default
1401    GotoKind::Goto
1402}
1403
1404enum GotoKind {
1405    Break(usize),
1406    Continue(usize),
1407    ExitBlock,
1408    Goto,
1409}
1410
1411/// `parent_span`: we need some span data for the new statement.
1412/// We use the one for the parent terminator.
1413fn translate_child_block(
1414    info: &mut BlockInfo<'_>,
1415    parent_loops: &Vec<src::BlockId>,
1416    switch_exit_blocks: &HashSet<src::BlockId>,
1417    parent_span: Span,
1418    child_id: src::BlockId,
1419) -> Option<tgt::Block> {
1420    // Check if this is a backward call
1421    match get_goto_kind(info.exits_info, parent_loops, switch_exit_blocks, child_id) {
1422        GotoKind::Break(index) => {
1423            let st = tgt::RawStatement::Break(index);
1424            Some(tgt::Statement::new(parent_span, st).into_block())
1425        }
1426        GotoKind::Continue(index) => {
1427            let st = tgt::RawStatement::Continue(index);
1428            Some(tgt::Statement::new(parent_span, st).into_block())
1429        }
1430        // If we are going to an exit block we simply ignore the goto
1431        GotoKind::ExitBlock => None,
1432        GotoKind::Goto => {
1433            // "Standard" goto: just recursively translate
1434            ensure_sufficient_stack(|| {
1435                Some(translate_block(
1436                    info,
1437                    parent_loops,
1438                    switch_exit_blocks,
1439                    child_id,
1440                ))
1441            })
1442        }
1443    }
1444}
1445
1446fn opt_block_unwrap_or_nop(span: Span, opt_block: Option<tgt::Block>) -> tgt::Block {
1447    opt_block.unwrap_or_else(|| tgt::Statement::new(span, tgt::RawStatement::Nop).into_block())
1448}
1449
1450fn translate_statement(st: &src::Statement) -> Option<tgt::Statement> {
1451    let src_span = st.span;
1452    let st = match st.content.clone() {
1453        src::RawStatement::Assign(place, rvalue) => tgt::RawStatement::Assign(place, rvalue),
1454        src::RawStatement::Call(s) => tgt::RawStatement::Call(s),
1455        src::RawStatement::SetDiscriminant(place, variant_id) => {
1456            tgt::RawStatement::SetDiscriminant(place, variant_id)
1457        }
1458        src::RawStatement::StorageLive(var_id) => tgt::RawStatement::StorageLive(var_id),
1459        src::RawStatement::StorageDead(var_id) => tgt::RawStatement::StorageDead(var_id),
1460        src::RawStatement::Deinit(place) => tgt::RawStatement::Deinit(place),
1461        src::RawStatement::Drop(place) => tgt::RawStatement::Drop(place),
1462        src::RawStatement::Assert(assert) => tgt::RawStatement::Assert(assert),
1463        src::RawStatement::Nop => tgt::RawStatement::Nop,
1464        src::RawStatement::Error(s) => tgt::RawStatement::Error(s),
1465    };
1466    Some(tgt::Statement::new(src_span, st))
1467}
1468
1469fn translate_terminator(
1470    info: &mut BlockInfo<'_>,
1471    parent_loops: &Vec<src::BlockId>,
1472    switch_exit_blocks: &HashSet<src::BlockId>,
1473    terminator: &src::Terminator,
1474) -> tgt::Block {
1475    let src_span = terminator.span;
1476
1477    match &terminator.content {
1478        src::RawTerminator::Abort(kind) => {
1479            tgt::Statement::new(src_span, tgt::RawStatement::Abort(kind.clone())).into_block()
1480        }
1481        src::RawTerminator::Return => {
1482            tgt::Statement::new(src_span, tgt::RawStatement::Return).into_block()
1483        }
1484        src::RawTerminator::Goto { target } => {
1485            let block = translate_child_block(
1486                info,
1487                parent_loops,
1488                switch_exit_blocks,
1489                terminator.span,
1490                *target,
1491            );
1492            let block = opt_block_unwrap_or_nop(terminator.span, block);
1493            block
1494        }
1495        src::RawTerminator::Switch { discr, targets } => {
1496            // Translate the target expressions
1497            let switch = match &targets {
1498                src::SwitchTargets::If(then_tgt, else_tgt) => {
1499                    // Translate the children expressions
1500                    let then_block = translate_child_block(
1501                        info,
1502                        parent_loops,
1503                        switch_exit_blocks,
1504                        terminator.span,
1505                        *then_tgt,
1506                    );
1507                    // We use the terminator span information in case then
1508                    // then statement is `None`
1509                    let then_block = opt_block_unwrap_or_nop(terminator.span, then_block);
1510                    let else_block = translate_child_block(
1511                        info,
1512                        parent_loops,
1513                        switch_exit_blocks,
1514                        terminator.span,
1515                        *else_tgt,
1516                    );
1517                    let else_block = opt_block_unwrap_or_nop(terminator.span, else_block);
1518
1519                    // Translate
1520                    tgt::Switch::If(discr.clone(), then_block, else_block)
1521                }
1522                src::SwitchTargets::SwitchInt(int_ty, targets, otherwise) => {
1523                    // Note that some branches can be grouped together, like
1524                    // here:
1525                    // ```
1526                    // match e {
1527                    //   E::V1 | E::V2 => ..., // Grouped
1528                    //   E::V3 => ...
1529                    // }
1530                    // ```
1531                    // We detect this by checking if a block has already been
1532                    // translated as one of the branches of the switch.
1533                    //
1534                    // Rk.: note there may be intermediate gotos depending
1535                    // on the MIR we use. Typically, we manage to detect the
1536                    // grouped branches with Optimized MIR, but not with Promoted
1537                    // MIR. See the comment in "tests/src/matches.rs".
1538
1539                    // We link block ids to:
1540                    // - vector of matched integer values
1541                    // - translated blocks
1542                    let mut branches: IndexMap<src::BlockId, (Vec<ScalarValue>, tgt::Block)> =
1543                        IndexMap::new();
1544
1545                    // Translate the children expressions
1546                    for (v, bid) in targets.iter() {
1547                        // Check if the block has already been translated:
1548                        // if yes, it means we need to group branches
1549                        if branches.contains_key(bid) {
1550                            // Already translated: add the matched value to
1551                            // the list of values
1552                            let branch = branches.get_mut(bid).unwrap();
1553                            branch.0.push(*v);
1554                        } else {
1555                            // Not translated: translate it
1556                            let block = translate_child_block(
1557                                info,
1558                                parent_loops,
1559                                switch_exit_blocks,
1560                                terminator.span,
1561                                *bid,
1562                            );
1563                            // We use the terminator span information in case then
1564                            // then statement is `None`
1565                            let block = opt_block_unwrap_or_nop(terminator.span, block);
1566                            branches.insert(*bid, (vec![*v], block));
1567                        }
1568                    }
1569                    let targets_blocks: Vec<(Vec<ScalarValue>, tgt::Block)> =
1570                        branches.into_iter().map(|(_, x)| x).collect();
1571
1572                    let otherwise_block = translate_child_block(
1573                        info,
1574                        parent_loops,
1575                        switch_exit_blocks,
1576                        terminator.span,
1577                        *otherwise,
1578                    );
1579                    // We use the terminator span information in case then
1580                    // then statement is `None`
1581                    let otherwise_block = opt_block_unwrap_or_nop(terminator.span, otherwise_block);
1582
1583                    // Translate
1584                    tgt::Switch::SwitchInt(discr.clone(), *int_ty, targets_blocks, otherwise_block)
1585                }
1586            };
1587
1588            // Return
1589            let span = tgt::combine_switch_targets_span(&switch);
1590            let span = combine_span(&src_span, &span);
1591            let st = tgt::RawStatement::Switch(switch);
1592            tgt::Statement::new(span, st).into_block()
1593        }
1594    }
1595}
1596
1597/// Return `true` if whatever the path we take, evaluating the statement
1598/// necessarily leads to:
1599/// - a panic or return
1600/// - a break which goes to a loop outside the expression
1601/// - a continue statement
1602fn is_terminal(block: &tgt::Block) -> bool {
1603    is_terminal_explore_block(0, block)
1604}
1605
1606fn is_terminal_explore(num_loops: usize, st: &tgt::Statement) -> bool {
1607    match &st.content {
1608        tgt::RawStatement::Assign(_, _)
1609        | tgt::RawStatement::SetDiscriminant(_, _)
1610        | tgt::RawStatement::StorageLive(_)
1611        | tgt::RawStatement::StorageDead(_)
1612        | tgt::RawStatement::Deinit(_)
1613        | tgt::RawStatement::Drop(_)
1614        | tgt::RawStatement::Assert(_)
1615        | tgt::RawStatement::Call(_)
1616        | tgt::RawStatement::Nop
1617        | tgt::RawStatement::Error(_) => false,
1618        tgt::RawStatement::Abort(..) | tgt::RawStatement::Return => true,
1619        tgt::RawStatement::Break(index) => *index >= num_loops,
1620        tgt::RawStatement::Continue(_index) => true,
1621        tgt::RawStatement::Switch(switch) => switch
1622            .iter_targets()
1623            .all(|tgt_st| is_terminal_explore_block(num_loops, tgt_st)),
1624        tgt::RawStatement::Loop(loop_st) => is_terminal_explore_block(num_loops + 1, loop_st),
1625    }
1626}
1627fn is_terminal_explore_block(num_loops: usize, block: &tgt::Block) -> bool {
1628    block
1629        .statements
1630        .iter()
1631        .any(|st| is_terminal_explore(num_loops, st))
1632}
1633
1634/// Remark: some values are boxed (here, the returned statement) so that they
1635/// are allocated on the heap. This reduces stack usage (we had problems with
1636/// stack overflows in the past). A more efficient solution would be to use loops
1637/// to make this code constant space, but that would require a serious rewriting.
1638fn translate_block(
1639    info: &mut BlockInfo<'_>,
1640    parent_loops: &Vec<src::BlockId>,
1641    switch_exit_blocks: &HashSet<src::BlockId>,
1642    block_id: src::BlockId,
1643) -> tgt::Block {
1644    // If the user activated this check: check that we didn't already translate
1645    // this block, and insert the block id in the set of already translated blocks.
1646    trace!(
1647        "Parent loops: {:?}, Parent switch exits: {:?}, Block id: {}",
1648        parent_loops,
1649        switch_exit_blocks,
1650        block_id
1651    );
1652    if info.no_code_duplication {
1653        assert!(!info.explored.contains(&block_id));
1654    }
1655    info.explored.insert(block_id);
1656
1657    let block = info.body.body.get(block_id).unwrap();
1658
1659    // Check if we enter a loop: if so, update parent_loops and the current_exit_block
1660    let is_loop = info.cfg.loop_entries.contains(&block_id);
1661    let mut nparent_loops: Vec<src::BlockId>;
1662    let nparent_loops = if info.cfg.loop_entries.contains(&block_id) {
1663        nparent_loops = parent_loops.clone();
1664        nparent_loops.push(block_id);
1665        &nparent_loops
1666    } else {
1667        parent_loops
1668    };
1669
1670    // If we enter a switch or a loop, we need to check if we own the exit
1671    // block, in which case we need to append it to the loop/switch body
1672    // in a sequence
1673    let is_switch = block.terminator.content.is_switch();
1674    let next_block = if is_loop {
1675        *info.exits_info.owned_loop_exits.get(&block_id).unwrap()
1676    } else if is_switch {
1677        *info.exits_info.owned_switch_exits.get(&block_id).unwrap()
1678    } else {
1679        None
1680    };
1681
1682    // If we enter a switch, add the exit block to the set
1683    // of outer exit blocks
1684    let nswitch_exit_blocks = if is_switch {
1685        let mut nexit_blocks = switch_exit_blocks.clone();
1686        match next_block {
1687            None => nexit_blocks,
1688            Some(bid) => {
1689                nexit_blocks.insert(bid);
1690                nexit_blocks
1691            }
1692        }
1693    } else {
1694        switch_exit_blocks.clone()
1695    };
1696
1697    // Translate the terminator and the subsequent blocks.
1698    // Note that this terminator is an option: we might ignore it
1699    // (if it is an exit).
1700
1701    let terminator =
1702        translate_terminator(info, nparent_loops, &nswitch_exit_blocks, &block.terminator);
1703
1704    // Translate the statements inside the block
1705    let statements = block
1706        .statements
1707        .iter()
1708        .filter_map(|st| translate_statement(st))
1709        .collect_vec();
1710
1711    // Prepend the statements to the terminator.
1712    let mut block = if let Some(st) = tgt::Block::from_seq(statements) {
1713        st.merge(terminator)
1714    } else {
1715        terminator
1716    };
1717
1718    if is_loop {
1719        // Put the loop body inside a `Loop`.
1720        block = tgt::Statement::new(block.span, tgt::RawStatement::Loop(block)).into_block()
1721    } else if is_switch {
1722        if next_block.is_some() {
1723            // Sanity check: if there is an exit block, this block must be
1724            // reachable (i.e, there must exist a path in the switch which
1725            // doesn't end with `panic`, `return`, etc.).
1726            assert!(!is_terminal(&block));
1727        }
1728    } else {
1729        assert!(next_block.is_none());
1730    }
1731
1732    // Concatenate the exit expression, if needs be
1733    if let Some(exit_block_id) = next_block {
1734        let next_block = ensure_sufficient_stack(|| {
1735            translate_block(info, parent_loops, switch_exit_blocks, exit_block_id)
1736        });
1737        block = block.merge(next_block);
1738    }
1739
1740    block
1741}
1742
1743fn translate_body_aux(
1744    ctx: &mut TransformCtx,
1745    no_code_duplication: bool,
1746    src_body: &src::ExprBody,
1747) -> tgt::ExprBody {
1748    // Explore the function body to create the control-flow graph without backward
1749    // edges, and identify the loop entries (which are destinations of backward edges).
1750    let cfg_info = build_cfg_info(src_body);
1751    trace!("cfg_info: {:?}", cfg_info);
1752
1753    // Find the exit block for all the loops and switches, if such an exit point
1754    // exists.
1755    let exits_info = compute_loop_switch_exits(ctx, src_body, &cfg_info);
1756
1757    // Debugging
1758    trace!("exits_info:\n{:?}", exits_info);
1759
1760    // Translate the body by reconstructing the loops and the
1761    // conditional branchings.
1762    // Note that we shouldn't get `None`.
1763    let mut explored = HashSet::new();
1764    let mut info = BlockInfo {
1765        no_code_duplication,
1766        cfg: &cfg_info,
1767        body: src_body,
1768        exits_info: &exits_info,
1769        explored: &mut explored,
1770    };
1771    let tgt_body = translate_block(&mut info, &Vec::new(), &HashSet::new(), src::BlockId::ZERO);
1772
1773    // Sanity: check that we translated all the blocks
1774    for (bid, _) in src_body.body.iter_indexed_values() {
1775        assert!(explored.contains(&bid));
1776    }
1777
1778    tgt::ExprBody {
1779        span: src_body.span,
1780        locals: src_body.locals.clone(),
1781        comments: src_body.comments.clone(),
1782        body: tgt_body,
1783    }
1784}
1785
1786fn translate_body(ctx: &mut TransformCtx, no_code_duplication: bool, body: &mut gast::Body) {
1787    use gast::Body::{Structured, Unstructured};
1788    let Unstructured(src_body) = body else {
1789        panic!("Called `ullbc_to_llbc` on an already restructured body")
1790    };
1791    trace!("About to translate to ullbc: {:?}", src_body.span);
1792    let tgt_body = translate_body_aux(ctx, no_code_duplication, src_body);
1793    *body = Structured(tgt_body);
1794}
1795
1796pub struct Transform;
1797impl TransformPass for Transform {
1798    fn transform_ctx(&self, ctx: &mut TransformCtx) {
1799        // Translate the bodies one at a time.
1800        ctx.for_each_body(|ctx, body| {
1801            translate_body(ctx, ctx.options.no_code_duplication, body);
1802        });
1803
1804        // Print the functions
1805        let fmt_ctx = ctx.into_fmt();
1806        for fun in &ctx.translated.fun_decls {
1807            trace!(
1808                "# Signature:\n{}\n\n# Function definition:\n{}\n",
1809                fmt_ctx.format_object(&fun.signature),
1810                fmt_ctx.format_object(fun),
1811            );
1812        }
1813        // Print the global variables
1814        for global in &ctx.translated.global_decls {
1815            trace!(
1816                "# Type:\n{}\n\n# Global definition:\n{}\n",
1817                fmt_ctx.format_object(&global.ty),
1818                fmt_ctx.format_object(global)
1819            );
1820        }
1821
1822        if ctx.options.print_built_llbc {
1823            info!("# LLBC resulting from control-flow reconstruction:\n\n{ctx}\n",);
1824        }
1825    }
1826}