charon_lib/transform/
ullbc_to_llbc.rs

1//! ULLBC to LLBC
2//!
3//! We reconstruct the control-flow in the Unstructured LLBC.
4//!
5//! The reconstruction algorithm is not written to be efficient (its complexity
6//! is probably very bad), but it was not written to be: this is still an early
7//! stage and we want the algorithm to generate the best reconstruction as
8//! possible. We still need to test the algorithm on more interesting examples,
9//! and will consider making it more efficient once it is a bit mature and well
10//! tested.
11//! Also note that we more importantly focus on making the algorithm sound: the
12//! reconstructed program must always be equivalent to the original MIR program,
13//! and the fact that the reconstruction preserves this property must be obvious.
14//!
15//! Finally, we conjecture the execution time shouldn't be too much a problem:
16//! we don't expect the translation mechanism to be applied on super huge functions
17//! (which will be difficult to formally analyze), and the MIR graphs are actually
18//! not that big because statements are grouped into code blocks (a block is made
19//! of a list of statements, followed by a terminator - branchings and jumps can
20//! only be performed by terminators -, meaning that MIR graphs don't have that
21//! many nodes and edges).
22
23use crate::ast::*;
24use crate::common::ensure_sufficient_stack;
25use crate::errors::sanity_check;
26use crate::formatter::IntoFormatter;
27use crate::llbc_ast as tgt;
28use crate::meta::{combine_span, Span};
29use crate::pretty::FmtWithCtx;
30use crate::transform::TransformCtx;
31use crate::ullbc_ast::{self as src};
32use indexmap::IndexMap;
33use itertools::Itertools;
34use num_bigint::BigInt;
35use num_rational::BigRational;
36use petgraph::algo::toposort;
37use petgraph::graphmap::DiGraphMap;
38use petgraph::Direction;
39use std::collections::{BTreeSet, HashMap, HashSet, VecDeque};
40
41use super::ctx::TransformPass;
42
43/// Control-Flow Graph
44type Cfg = DiGraphMap<src::BlockId, ()>;
45
46/// Small utility
47struct BlockInfo<'a> {
48    cfg: &'a CfgInfo,
49    body: &'a src::ExprBody,
50    exits_info: &'a ExitInfo,
51    explored: &'a mut HashSet<src::BlockId>,
52}
53
54/// This structure contains various information about a function's CFG.
55#[derive(Debug)]
56struct CfgInfo {
57    /// The CFG
58    pub cfg: Cfg,
59    /// The CFG where all the backward edges have been removed
60    pub cfg_no_be: Cfg,
61    /// We consider the destination of the backward edges to be loop entries and
62    /// store them here.
63    pub loop_entries: HashSet<src::BlockId>,
64    /// The backward edges
65    pub backward_edges: HashSet<(src::BlockId, src::BlockId)>,
66    /// The blocks whose terminators are a switch are stored here.
67    pub switch_blocks: HashSet<src::BlockId>,
68    /// The set of nodes from where we can only reach error nodes (panic, etc.)
69    pub only_reach_error: HashSet<src::BlockId>,
70}
71
72/// Build the CFGs (the "regular" CFG and the CFG without backward edges) and
73/// compute some information like the loop entries and the switch blocks.
74fn build_cfg_info(body: &src::ExprBody) -> CfgInfo {
75    let mut cfg = CfgInfo {
76        cfg: Cfg::new(),
77        cfg_no_be: Cfg::new(),
78        loop_entries: HashSet::new(),
79        backward_edges: HashSet::new(),
80        switch_blocks: HashSet::new(),
81        only_reach_error: HashSet::new(),
82    };
83
84    // Add the nodes
85    for block_id in body.body.iter_indices() {
86        cfg.cfg.add_node(block_id);
87        cfg.cfg_no_be.add_node(block_id);
88    }
89
90    // Add the edges
91    let ancestors = HashSet::new();
92    let mut explored = HashSet::new();
93    build_cfg_partial_info_edges(
94        &mut cfg,
95        &ancestors,
96        &mut explored,
97        body,
98        src::BlockId::ZERO,
99    );
100
101    cfg
102}
103
104fn block_is_switch(body: &src::ExprBody, block_id: src::BlockId) -> bool {
105    let block = body.body.get(block_id).unwrap();
106    block.terminator.content.is_switch()
107}
108
109/// The terminator of the block is a panic, etc.
110fn block_is_error(body: &src::ExprBody, block_id: src::BlockId) -> bool {
111    let block = body.body.get(block_id).unwrap();
112    use src::RawTerminator::*;
113    match &block.terminator.content {
114        Abort(..) => true,
115        Goto { .. } | Switch { .. } | Return | Call { .. } | UnwindResume => false,
116    }
117}
118
119fn build_cfg_partial_info_edges(
120    cfg: &mut CfgInfo,
121    ancestors: &HashSet<src::BlockId>,
122    explored: &mut HashSet<src::BlockId>,
123    body: &src::ExprBody,
124    block_id: src::BlockId,
125) {
126    // Check if we already explored the current node
127    if explored.contains(&block_id) {
128        return;
129    }
130    explored.insert(block_id);
131
132    // Insert the current block in the set of ancestors blocks
133    let mut ancestors = ancestors.clone();
134    ancestors.insert(block_id);
135
136    // Check if it is a switch
137    if block_is_switch(body, block_id) {
138        cfg.switch_blocks.insert(block_id);
139    }
140
141    // Retrieve the block targets
142    let mut targets = body.body.get(block_id).unwrap().targets();
143    // Hack: we don't translate unwind paths in llbc so we ignore them here.
144    if let src::RawTerminator::Call { target, .. } =
145        body.body.get(block_id).unwrap().terminator.content
146    {
147        targets = vec![target];
148    }
149
150    let mut has_backward_edge = false;
151
152    // Add edges for all the targets and explore them, if they are not predecessors
153    for tgt in &targets {
154        // Insert the edge in the "regular" CFG
155        cfg.cfg.add_edge(block_id, *tgt, ());
156
157        // We need to check if it is a backward edge before inserting it in the
158        // CFG without backward edges and exploring it
159        if ancestors.contains(tgt) {
160            // This is a backward edge
161            has_backward_edge = true;
162            cfg.loop_entries.insert(*tgt);
163            cfg.backward_edges.insert((block_id, *tgt));
164        } else {
165            // Not a backward edge: insert the edge and explore
166            cfg.cfg_no_be.add_edge(block_id, *tgt, ());
167            ensure_sufficient_stack(|| {
168                build_cfg_partial_info_edges(cfg, &ancestors, explored, body, *tgt);
169            })
170        }
171    }
172
173    // Check if this node can only reach error nodes:
174    // - we check if the current node ends with an error terminator
175    // - or check that all the targets lead to error nodes
176    // Note that if there is a backward edge, we consider that we don't necessarily
177    // go to error.
178    if !has_backward_edge
179        && (block_is_error(body, block_id)
180            || (
181                // The targets are empty if this is an error node *or* a return node
182                !targets.is_empty() && targets.iter().all(|tgt| cfg.only_reach_error.contains(tgt))
183            ))
184    {
185        cfg.only_reach_error.insert(block_id);
186    }
187}
188
189#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
190struct BlockWithRank<T> {
191    /// A "rank" quantity that we use to order the nodes.
192    /// By placing the `rank` field before the `id`, we make sure that
193    /// we use the rank first in the lexicographic order.
194    rank: T,
195    id: src::BlockId,
196}
197
198type OrdBlockId = BlockWithRank<usize>;
199
200/// For the rank we use:
201/// - a "flow" quantity (see [BlocksInfo])
202/// - the *inverse* rank in the topological sort (i.e., `- topo_rank`)
203type FlowBlockId = BlockWithRank<(BigRational, isize)>;
204
205#[derive(Debug, Clone)]
206struct LoopExitCandidateInfo {
207    /// The occurrences going to this exit.
208    /// For every occurrence, we register the distance between the loop entry
209    /// and the node from which the edge going to the exit starts.
210    /// If later we have to choose between candidates with the same number
211    /// of occurrences, we choose the one with the smallest distances.
212    ///
213    /// Note that it *may* happen that we have several exit candidates referenced
214    /// more than once for one loop. This comes from the fact that whenever we
215    /// reach a node from which the loop entry is not reachable (without using a
216    /// backward edge leading to an outer loop entry), we register this node
217    /// as well as all its children as exit candidates.
218    /// Consider the following example:
219    /// ```text
220    /// while i < max {
221    ///     if cond {
222    ///         break;
223    ///     }
224    ///     s += i;
225    ///     i += 1
226    /// }
227    /// // All the below nodes are exit candidates (each of them is referenced twice)
228    /// s += 1;
229    /// return s;
230    /// ```
231    pub occurrences: Vec<usize>,
232}
233
234/// Check if a loop entry is reachable from a node, in a graph where we remove
235/// the backward edges going directly to the loop entry.
236///
237/// If the loop entry is not reachable, it means that:
238/// - the loop entry is not reachable at all
239/// - or it is only reachable through an outer loop
240///
241/// The starting node should be a (transitive) child of the loop entry.
242/// We use this to find candidates for loop exits.
243fn loop_entry_is_reachable_from_inner(
244    cfg: &CfgInfo,
245    loop_entry: src::BlockId,
246    block_id: src::BlockId,
247) -> bool {
248    // It is reachable in the complete graph. Check if it is reachable by not
249    // going through backward edges which go to outer loops. In practice, we
250    // just need to forbid the use of any backward edges at the exception of
251    // those which go directly to the current loop's entry. This means that we
252    // ignore backward edges to outer loops of course, but also backward edges
253    // to inner loops because we shouldn't need to follow those (there should be
254    // more direct paths).
255
256    // Explore the graph starting at block_id
257    let mut explored: HashSet<src::BlockId> = HashSet::new();
258    let mut stack: VecDeque<src::BlockId> = VecDeque::new();
259    stack.push_back(block_id);
260    while !stack.is_empty() {
261        let bid = stack.pop_front().unwrap();
262        if explored.contains(&bid) {
263            continue;
264        }
265        explored.insert(bid);
266
267        let next_ids = cfg.cfg.neighbors_directed(bid, Direction::Outgoing);
268        for next_id in next_ids {
269            // Check if this is a backward edge
270            if cfg.backward_edges.contains(&(bid, next_id)) {
271                // Backward edge: only allow those going directly to the current
272                // loop's entry
273                if next_id == loop_entry {
274                    // The loop entry is reachable
275                    return true;
276                } else {
277                    // Forbidden edge: ignore
278                    continue;
279                }
280            } else {
281                // Nothing special: add the node to the stack for further
282                // exploration
283                stack.push_back(next_id);
284            }
285        }
286    }
287
288    // The loop entry is not reachable
289    false
290}
291
292struct FilteredLoopParents {
293    remaining_parents: Vec<(src::BlockId, usize)>,
294    removed_parents: Vec<(src::BlockId, usize)>,
295}
296
297fn filter_loop_parents(
298    cfg: &CfgInfo,
299    parent_loops: &Vec<(src::BlockId, usize)>,
300    block_id: src::BlockId,
301) -> Option<FilteredLoopParents> {
302    let mut eliminate: usize = 0;
303    for (loop_id, _ldist) in parent_loops.iter().rev() {
304        if !loop_entry_is_reachable_from_inner(cfg, *loop_id, block_id) {
305            eliminate += 1;
306        } else {
307            break;
308        }
309    }
310
311    if eliminate > 0 {
312        // Split the vector of parents
313        let (remaining_parents, removed_parents) =
314            parent_loops.split_at(parent_loops.len() - eliminate);
315        let (mut remaining_parents, removed_parents) =
316            (remaining_parents.to_vec(), removed_parents.to_vec());
317
318        // Update the distance to the last loop - we just increment the distance
319        // by 1, because from the point of view of the parent loop, we just exited
320        // a block and go to the next sequence of instructions.
321        if !remaining_parents.is_empty() {
322            remaining_parents.last_mut().unwrap().1 += 1;
323        }
324
325        Some(FilteredLoopParents {
326            remaining_parents,
327            removed_parents,
328        })
329    } else {
330        None
331    }
332}
333
334/// List the nodes reachable from a starting point.
335/// We list the nodes and the depth (in the AST) at which they were found.
336fn list_reachable(cfg: &Cfg, start: src::BlockId) -> HashMap<src::BlockId, usize> {
337    let mut reachable: HashMap<src::BlockId, usize> = HashMap::new();
338    let mut stack: VecDeque<(src::BlockId, usize)> = VecDeque::new();
339    stack.push_back((start, 0));
340
341    while !stack.is_empty() {
342        let (bid, dist) = stack.pop_front().unwrap();
343
344        // Ignore this node if we already registered it with a better distance
345        match reachable.get(&bid) {
346            None => (),
347            Some(original_dist) => {
348                if *original_dist < dist {
349                    continue;
350                }
351            }
352        }
353
354        // Inset the node with its distance
355        reachable.insert(bid, dist);
356
357        // Add the children to the stack
358        for child in cfg.neighbors(bid) {
359            stack.push_back((child, dist + 1));
360        }
361    }
362
363    // Return
364    reachable
365}
366
367/// Register a node and its children as exit candidates for a list of
368/// parent loops.
369fn register_children_as_loop_exit_candidates(
370    cfg: &CfgInfo,
371    loop_exits: &mut HashMap<src::BlockId, IndexMap<src::BlockId, LoopExitCandidateInfo>>,
372    removed_parent_loops: &Vec<(src::BlockId, usize)>,
373    block_id: src::BlockId,
374) {
375    // List the reachable nodes
376    let reachable = list_reachable(&cfg.cfg_no_be, block_id);
377
378    let mut base_dist = 0;
379    // For every parent loop, in reverse order (we go from last to first in
380    // order to correctly compute the distances)
381    for (loop_id, loop_dist) in removed_parent_loops.iter().rev() {
382        // Update the distance to the loop entry
383        base_dist += *loop_dist;
384
385        // Retrieve the candidates
386        let candidates = loop_exits.get_mut(loop_id).unwrap();
387
388        // Update them
389        for (bid, dist) in reachable.iter() {
390            let distance = base_dist + *dist;
391            match candidates.get_mut(bid) {
392                None => {
393                    candidates.insert(
394                        *bid,
395                        LoopExitCandidateInfo {
396                            occurrences: vec![distance],
397                        },
398                    );
399                }
400                Some(c) => {
401                    c.occurrences.push(distance);
402                }
403            }
404        }
405    }
406}
407
408/// Compute the loop exit candidates.
409///
410/// There may be several candidates with the same "optimality" (same number of
411/// occurrences, etc.), in which case we choose the first one which was registered
412/// (the order in which we explore the graph is deterministic): this is why we
413/// store the candidates in a linked hash map.
414fn compute_loop_exit_candidates(
415    cfg: &CfgInfo,
416    explored: &mut HashSet<src::BlockId>,
417    ordered_loops: &mut Vec<src::BlockId>,
418    loop_exits: &mut HashMap<src::BlockId, IndexMap<src::BlockId, LoopExitCandidateInfo>>,
419    // List of parent loops, with the distance to the entry of the loop (the distance
420    // is the distance between the current node and the loop entry for the last parent,
421    // and the distance between the parents for the others).
422    mut parent_loops: Vec<(src::BlockId, usize)>,
423    block_id: src::BlockId,
424) {
425    if explored.contains(&block_id) {
426        return;
427    }
428    explored.insert(block_id);
429
430    // Check if we enter a loop - add the corresponding node if necessary
431    if cfg.loop_entries.contains(&block_id) {
432        parent_loops.push((block_id, 1));
433        ordered_loops.push(block_id);
434    } else {
435        // Increase the distance with the parent loop
436        if !parent_loops.is_empty() {
437            parent_loops.last_mut().unwrap().1 += 1;
438        }
439    };
440
441    // Retrieve the children - note that we ignore the back edges
442    let children = cfg.cfg_no_be.neighbors(block_id);
443    for child in children {
444        // If the parent loop entry is not reachable from the child without going
445        // through a backward edge which goes directly to the loop entry, consider
446        // this node a potential exit.
447        ensure_sufficient_stack(|| {
448            match filter_loop_parents(cfg, &parent_loops, child) {
449                None => {
450                    compute_loop_exit_candidates(
451                        cfg,
452                        explored,
453                        ordered_loops,
454                        loop_exits,
455                        parent_loops.clone(),
456                        child,
457                    );
458                }
459                Some(fparent_loops) => {
460                    // We filtered some parent loops: it means this child and its
461                    // children are loop exit candidates for all those loops: we must
462                    // thus register them.
463                    // Note that we register the child *and* its children: the reason
464                    // is that we might do something *then* actually jump to the exit.
465                    // For instance, the following block of code:
466                    // ```
467                    // if cond { break; } else { ... }
468                    // ```
469                    //
470                    // Gets translated in MIR to something like this:
471                    // ```
472                    // bb1: {
473                    //   if cond -> bb2 else -> bb3; // bb2 is not the real exit
474                    // }
475                    //
476                    // bb2: {
477                    //   goto bb4; // bb4 is the real exit
478                    // }
479                    // ```
480                    register_children_as_loop_exit_candidates(
481                        cfg,
482                        loop_exits,
483                        &fparent_loops.removed_parents,
484                        child,
485                    );
486
487                    // Explore, with the filtered parents
488                    compute_loop_exit_candidates(
489                        cfg,
490                        explored,
491                        ordered_loops,
492                        loop_exits,
493                        fparent_loops.remaining_parents,
494                        child,
495                    );
496                }
497            }
498        })
499    }
500}
501
502/// See [`compute_loop_switch_exits`] for
503/// explanations about what "exits" are.
504///
505/// The following function computes the loop exits. It acts as follows.
506///
507/// We keep track of a stack of the loops in which we entered.
508/// It is very easy to check when we enter a loop: loop entries are destinations
509/// of backward edges, which can be spotted with a simple graph exploration (see
510/// [`build_cfg_partial_info_edges`].
511/// The criteria to consider whether we exit a loop is the following:
512/// - we exit a loop if we go to a block from which we can't reach the loop
513///   entry at all
514/// - or if we can reach the loop entry, but must use a backward edge which goes
515///   to an outer loop
516///
517/// It is better explained on the following example:
518/// ```text
519/// 'outer while i < max {
520///     'inner while j < max {
521///        j += 1;
522///     }
523///     // (i)
524///     i += 1;
525/// }
526/// ```
527/// If we enter the inner loop then go to (i) from the inner loop, we consider
528/// that we exited the outer loop because:
529/// - we can reach the entry of the inner loop from (i) (by finishing then
530///   starting again an iteration of the outer loop)
531/// - but doing this requires taking a backward edge which goes to the outer loop
532///
533/// Whenever we exit a loop, we save the block we went to as an exit candidate
534/// for this loop. Note that there may by many exit candidates. For instance,
535/// in the below example:
536/// ```text
537/// while ... {
538///    ...
539///    if ... {
540///        // We can't reach the loop entry from here: this is an exit
541///        // candidate
542///        return;
543///    }
544/// }
545/// // This is another exit candidate - and this is the one we want to use
546/// // as the "real" exit...
547/// ...
548/// ```
549///
550/// Also note that it may happen that we go several times to the same exit (if
551/// we use breaks for instance): we record the number of times an exit candidate
552/// is used.
553///
554/// Once we listed all the exit candidates, we find the "best" one for every
555/// loop, starting with the outer loops. We start with outer loops because
556/// inner loops might use breaks to exit to the exit of outer loops: if we
557/// start with the inner loops, the exit which is "natural" for the outer loop
558/// might end up being used for one of the inner loops...
559///
560/// The best exit is the following one:
561/// - it is the one which is used the most times (note that there can be
562///   several candidates which are referenced strictly more than once: see the
563///   comment below)
564/// - if several exits have the same number of occurrences, we choose the one
565///   for which we goto the "earliest" (earliest meaning that the goto is close to
566///   the loop entry node in the AST). The reason is that all the loops should
567///   have an outer if ... then ... else ... which executes the loop body or goes
568///   to the exit (note that this is not necessarily the first
569///   if ... then ... else ... we find: loop conditions can be arbitrary
570///   expressions, containing branchings).
571///
572/// # Several candidates for a loop exit:
573/// =====================================
574/// There used to be a sanity check to ensure there are no two different
575/// candidates with exactly the same number of occurrences and distance from
576/// the entry of the loop, if the number of occurrences is > 1.
577///
578/// We removed it because it does happen, for instance here (the match
579/// introduces an `unreachable` node, and it has the same number of
580/// occurrences and the same distance to the loop entry as the `panic`
581/// node):
582///
583/// ```text
584/// pub fn list_nth_mut_loop_pair<'a, T>(
585///     mut ls: &'a mut List<T>,
586///     mut i: u32,
587/// ) -> &'a mut T {
588///     loop {
589///         match ls {
590///             List::Nil => {
591///                 panic!() // <-- best candidate
592///             }
593///             List::Cons(x, tl) => {
594///                 if i == 0 {
595///                     return x;
596///                 } else {
597///                     ls = tl;
598///                     i -= 1;
599///                 }
600///             }
601///             _ => {
602///               // Note that Rustc always introduces an unreachable branch after
603///               // desugaring matches.
604///               unreachable!(), // <-- best candidate
605///             }
606///         }
607///     }
608/// }
609/// ```
610///
611/// When this happens we choose an exit candidate whose edges don't necessarily
612/// lead to an error (above there are none, so we don't choose any exits). Note
613/// that this last condition is important to prevent loops from being unnecessarily
614/// nested:
615///
616/// ```text
617/// pub fn nested_loops_enum(step_out: usize, step_in: usize) -> usize {
618///     let mut s = 0;
619///
620///     for _ in 0..128 { // We don't want this loop to be nested with the loops below
621///         s += 1;
622///     }
623///
624///     for _ in 0..(step_out) {
625///         for _ in 0..(step_in) {
626///             s += 1;
627///         }
628///     }
629///
630///     s
631/// }
632/// ```
633fn compute_loop_exits(
634    ctx: &mut TransformCtx,
635    body: &src::ExprBody,
636    cfg: &CfgInfo,
637) -> HashMap<src::BlockId, Option<src::BlockId>> {
638    let mut explored = HashSet::new();
639    let mut ordered_loops = Vec::new();
640    let mut loop_exits = HashMap::new();
641
642    // Initialize the loop exits candidates
643    for loop_id in &cfg.loop_entries {
644        loop_exits.insert(*loop_id, IndexMap::new());
645    }
646
647    // Compute the candidates
648    compute_loop_exit_candidates(
649        cfg,
650        &mut explored,
651        &mut ordered_loops,
652        &mut loop_exits,
653        Vec::new(),
654        src::BlockId::ZERO,
655    );
656
657    {
658        // Debugging
659        let candidates: Vec<String> = loop_exits
660            .iter()
661            .map(|(loop_id, candidates)| format!("{loop_id} -> {candidates:?}"))
662            .collect();
663        trace!("Loop exit candidates:\n{}", candidates.join("\n"));
664    }
665
666    // Choose one candidate among the potential candidates.
667    let mut exits: HashSet<src::BlockId> = HashSet::new();
668    let mut chosen_loop_exits: HashMap<src::BlockId, Option<src::BlockId>> = HashMap::new();
669    // For every loop
670    for loop_id in ordered_loops {
671        // Check the candidates.
672        // Ignore the candidates which have already been chosen as exits for other
673        // loops (which should be outer loops).
674        // We choose the exit with:
675        // - the most occurrences
676        // - the least total distance (if there are several possibilities)
677        // - doesn't necessarily lead to an error (panic, unreachable)
678
679        // First:
680        // - filter the candidates
681        // - compute the number of occurrences
682        // - compute the sum of distances
683        // TODO: we could simply order by using a lexicographic order
684        let loop_exits = loop_exits
685            .get(&loop_id)
686            .unwrap()
687            .iter()
688            // If candidate already selected for another loop: ignore
689            .filter(|(candidate_id, _)| !exits.contains(candidate_id))
690            .map(|(candidate_id, candidate_info)| {
691                let num_occurrences = candidate_info.occurrences.len();
692                let dist_sum = candidate_info.occurrences.iter().sum();
693                (*candidate_id, num_occurrences, dist_sum)
694            })
695            .collect_vec();
696
697        trace!(
698            "Loop {}: possible exits:\n{}",
699            loop_id,
700            loop_exits
701                .iter()
702                .map(|(bid, occs, dsum)| format!(
703                    "{bid} -> {{ occurrences: {occs}, dist_sum: {dsum} }}",
704                ))
705                .collect::<Vec<String>>()
706                .join("\n")
707        );
708
709        // Second: actually select the proper candidate.
710
711        // We find the one with the highest occurrence and the smallest distance
712        // from the entry of the loop (note that we take care of listing the exit
713        // candidates in a deterministic order).
714        let mut best_exit: Option<src::BlockId> = None;
715        let mut best_occurrences = 0;
716        let mut best_dist_sum = std::usize::MAX;
717        for (candidate_id, occurrences, dist_sum) in &loop_exits {
718            if (*occurrences > best_occurrences)
719                || (*occurrences == best_occurrences && *dist_sum < best_dist_sum)
720            {
721                best_exit = Some(*candidate_id);
722                best_occurrences = *occurrences;
723                best_dist_sum = *dist_sum;
724            }
725        }
726
727        let possible_candidates: Vec<_> = loop_exits
728            .iter()
729            .filter_map(|(bid, occs, dsum)| {
730                if *occs == best_occurrences && *dsum == best_dist_sum {
731                    Some(*bid)
732                } else {
733                    None
734                }
735            })
736            .collect();
737        let num_possible_candidates = loop_exits.len();
738
739        // If there is exactly one one best candidate, it is easy.
740        // Otherwise we need to split further.
741        if num_possible_candidates > 1 {
742            trace!("Best candidates: {:?}", possible_candidates);
743            // TODO: if we use a lexicographic order we can merge this with the code
744            // above.
745            // Remove the candidates which only lead to errors (panic or unreachable).
746            let candidates: Vec<_> = possible_candidates
747                .iter()
748                .filter(|bid| !cfg.only_reach_error.contains(bid))
749                .collect();
750            // If there is exactly one candidate we select it
751            if candidates.len() == 1 {
752                let exit_id = *candidates[0];
753                exits.insert(exit_id);
754                trace!("Loop {loop_id}: selected the best exit candidate {exit_id}");
755                chosen_loop_exits.insert(loop_id, Some(exit_id));
756            } else {
757                // Otherwise we do not select any exit.
758                // We don't want to select any exit if we are in the below situation
759                // (all paths lead to errors). We added a sanity check below to
760                // catch the situations where there are several exits which don't
761                // lead to errors.
762                //
763                // Example:
764                // ========
765                // ```
766                // loop {
767                //     match ls {
768                //         List::Nil => {
769                //             panic!() // <-- best candidate
770                //         }
771                //         List::Cons(x, tl) => {
772                //             if i == 0 {
773                //                 return x;
774                //             } else {
775                //                 ls = tl;
776                //                 i -= 1;
777                //             }
778                //         }
779                //         _ => {
780                //           unreachable!(); // <-- best candidate (Rustc introduces an `unreachable` case)
781                //         }
782                //     }
783                // }
784                // ```
785                //
786                // Adding this sanity check so that we can see when there are
787                // several candidates.
788                let span = body.body[loop_id].terminator.span; // Taking *a* span from the block
789                sanity_check!(ctx, span, candidates.is_empty());
790                trace!("Loop {loop_id}: did not select an exit candidate because they all lead to panics");
791                chosen_loop_exits.insert(loop_id, None);
792            }
793        } else {
794            // Register the exit, if there is one
795            match best_exit {
796                None => {
797                    // No exit was found
798                    trace!("Loop {loop_id}: could not find an exit candidate");
799                    chosen_loop_exits.insert(loop_id, None);
800                }
801                Some(exit_id) => {
802                    exits.insert(exit_id);
803                    trace!("Loop {loop_id}: selected the unique exit candidate {exit_id}");
804                    chosen_loop_exits.insert(loop_id, Some(exit_id));
805                }
806            }
807        }
808    }
809
810    // Return the chosen exits
811    trace!("Chosen loop exits: {:?}", chosen_loop_exits);
812    chosen_loop_exits
813}
814
815/// Information used to compute the switch exits.
816/// We compute this information for every block in the graph.
817/// Note that we make sure to use immutable sets because we rely a lot
818/// on cloning.
819#[derive(Debug, Clone)]
820struct BlocksInfo {
821    id: src::BlockId,
822    /// All the successors of the block
823    succs: BTreeSet<OrdBlockId>,
824    /// Let's say we put a quantity of water equal to 1 on the block, and the
825    /// water flows downards. Whenever there is a branching, the quantity of
826    /// water gets equally divided between the branches. When the control flows
827    /// join, we put the water back together. The set below computes the amount
828    /// of water received by each descendant of the node.
829    ///
830    /// TODO: there must be a known algorithm which computes this, right?...
831    /// This is exactly this problems:
832    /// <https://stackoverflow.com/questions/78221666/algorithm-for-total-flow-through-weighted-directed-acyclic-graph>
833    /// TODO: the way I compute this is not efficient.
834    ///
835    /// Remark: in order to rank the nodes, we also use the negation of the
836    /// rank given by the topological order. The last elements of the set
837    /// have the highest flow, that is they are the nodes to which the maximum
838    /// number of paths converge. If several nodes have the same flow, we want
839    /// to take the highest one in the hierarchy: hence the use of the inverse
840    /// of the topological rank.
841    ///
842    /// Ex.:
843    /// ```text
844    /// A  -- we start here
845    /// |
846    /// |---------------------------------------
847    /// |            |            |            |
848    /// B:(0.25,-1)  C:(0.25,-2)  D:(0.25,-3)  E:(0.25,-4)
849    /// |            |            |
850    /// |--------------------------
851    /// |
852    /// F:(0.75,-5)
853    /// |
854    /// |
855    /// G:(0.75,-6)
856    /// ```
857    /// The "best" node (with the highest (flow, rank) in the graph above is F.
858    flow: BTreeSet<FlowBlockId>,
859}
860
861/// Create an [OrdBlockId] from a block id and a rank given by a map giving
862/// a sort (topological in our use cases) over the graph.
863fn make_ord_block_id(
864    block_id: src::BlockId,
865    sort_map: &HashMap<src::BlockId, usize>,
866) -> OrdBlockId {
867    let rank = *sort_map.get(&block_id).unwrap();
868    OrdBlockId { id: block_id, rank }
869}
870
871/// Compute [BlocksInfo] for every block in the graph.
872/// This information is then used to compute the switch exits.
873fn compute_switch_exits_explore(
874    cfg: &CfgInfo,
875    tsort_map: &HashMap<src::BlockId, usize>,
876    memoized: &mut HashMap<src::BlockId, BlocksInfo>,
877    block_id: src::BlockId,
878) {
879    // Check if we already computer the info
880    if memoized.get(&block_id).is_some() {
881        return;
882    }
883
884    // Compute the block information for the children
885    let children_ids: Vec<src::BlockId> = cfg.cfg_no_be.neighbors(block_id).collect_vec();
886    children_ids
887        .iter()
888        .for_each(|bid| compute_switch_exits_explore(cfg, tsort_map, memoized, *bid));
889
890    // Retrieve the information
891    let children: Vec<&BlocksInfo> = children_ids
892        .iter()
893        .map(|bid| memoized.get(bid).unwrap())
894        .collect();
895
896    // Compute the successors
897    // Add the children themselves in their sets of successors
898    let all_succs: BTreeSet<OrdBlockId> = children.iter().fold(BTreeSet::new(), |acc, s| {
899        // Add the successors to the accumulator
900        let mut acc: BTreeSet<_> = acc.union(&s.succs).copied().collect();
901        // Add the child itself
902        acc.insert(make_ord_block_id(s.id, tsort_map));
903        acc
904    });
905
906    // Compute the "flows" (if there are children)
907    // TODO: this is computationally expensive...
908    let mut flow: HashMap<src::BlockId, (BigRational, isize)> = HashMap::new();
909
910    if children.len() > 0 {
911        // We need to divide the initial flow equally between the children
912        let factor = BigRational::new(BigInt::from(1), BigInt::from(children.len()));
913
914        // Small helper
915        let mut add_to_flow = |(id, f0): (src::BlockId, (BigRational, isize))| match flow.get(&id) {
916            None => flow.insert(id, f0),
917            Some(f1) => {
918                let f = f0.0 + f1.0.clone();
919                assert!(f0.1 == f1.1);
920                flow.insert(id, (f, f0.1))
921            }
922        };
923
924        // For each child, multiply the flows of its own children by the ratio,
925        // and add.
926        for child in children {
927            // First, add the child itself
928            let rank = isize::try_from(*tsort_map.get(&child.id).unwrap()).unwrap();
929            add_to_flow((child.id, (factor.clone(), -rank)));
930
931            // Then add its successors
932            for child1 in &child.flow {
933                add_to_flow((
934                    child1.id,
935                    (factor.clone() * child1.rank.0.clone(), child1.rank.1),
936                ));
937            }
938        }
939    }
940
941    // Put everything in an ordered set: the first block id will be the one with
942    // the highest flow, and in case of equality it will be the one with the
943    // smallest block id.
944    let flow: BTreeSet<FlowBlockId> = flow
945        .into_iter()
946        .map(|(id, rank)| BlockWithRank { rank, id })
947        .collect();
948
949    trace!("block: {block_id}, all successors: {all_succs:?}, flow: {flow:?}");
950
951    // Memoize
952    let info = BlocksInfo {
953        id: block_id,
954        succs: all_succs,
955        flow,
956    };
957    memoized.insert(block_id, info.clone());
958}
959
960/// Auxiliary helper
961///
962/// Check if it is possible to reach the exit of an outer switch from `start_bid`
963/// without going through the `exit_candidate`. We use the graph without
964/// backward edges.
965fn can_reach_outer_exit(
966    cfg: &CfgInfo,
967    outer_exits: &HashSet<src::BlockId>,
968    start_bid: src::BlockId,
969    exit_candidate: src::BlockId,
970) -> bool {
971    // The stack of blocks
972    let mut stack: Vec<src::BlockId> = vec![start_bid];
973    let mut explored: HashSet<src::BlockId> = HashSet::new();
974
975    while let Some(bid) = stack.pop() {
976        // Check if already explored
977        if explored.contains(&bid) {
978            break;
979        }
980        explored.insert(bid);
981
982        // Check if this is the exit candidate
983        if bid == exit_candidate {
984            // Stop exploring
985            break;
986        }
987
988        // Check if this is an outer exit
989        if outer_exits.contains(&bid) {
990            return true;
991        }
992
993        // Add the children to the stack
994        for child in cfg.cfg_no_be.neighbors(bid) {
995            stack.push(child);
996        }
997    }
998
999    false
1000}
1001
1002/// See [`compute_loop_switch_exits`] for
1003/// explanations about what "exits" are.
1004///
1005/// In order to compute the switch exits, we simply recursively compute a
1006/// topologically ordered set of "filtered successors" as follows (note
1007/// that we work in the CFG *without* back edges):
1008/// - for a block which doesn't branch (only one successor), the filtered
1009///   successors is the set of reachable nodes.
1010/// - for a block which branches, we compute the nodes reachable from all
1011///   the children, and find the "best" intersection between those.
1012///   Note that we find the "best" intersection (a pair of branches which
1013///   maximize the intersection of filtered successors) because some branches
1014///   might never join the control-flow of the other branches, if they contain
1015///   a `break`, `return`, `panic`, etc., like here:
1016///   ```text
1017///   if b { x = 3; } { return; }
1018///   y += x;
1019///   ...
1020///   ```
1021/// Note that with nested switches, the branches of the inner switches might
1022/// goto the exits of the outer switches: for this reason, we give precedence
1023/// to the outer switches.
1024fn compute_switch_exits(
1025    cfg: &CfgInfo,
1026    tsort_map: &HashMap<src::BlockId, usize>,
1027) -> HashMap<src::BlockId, Option<src::BlockId>> {
1028    // Compute the successors info map, starting at the root node
1029    let mut succs_info_map = HashMap::new();
1030    trace!(
1031        "- cfg.cfg:\n{:?}\n- cfg.cfg_no_be:\n{:?}\n- cfg.switch_blocks:\n{:?}",
1032        cfg.cfg,
1033        cfg.cfg_no_be,
1034        cfg.switch_blocks
1035    );
1036    let _ = compute_switch_exits_explore(cfg, tsort_map, &mut succs_info_map, src::BlockId::ZERO);
1037
1038    // We need to give precedence to the outer switches: we thus iterate
1039    // over the switch blocks in topological order.
1040    let mut sorted_switch_blocks: BTreeSet<OrdBlockId> = BTreeSet::new();
1041    for bid in cfg.switch_blocks.iter() {
1042        sorted_switch_blocks.insert(make_ord_block_id(*bid, tsort_map));
1043    }
1044    trace!("sorted_switch_blocks: {:?}", sorted_switch_blocks);
1045
1046    // Debugging: print all the successors
1047    {
1048        trace!("Successors info:\n{}\n", {
1049            let mut out = vec![];
1050            for (bid, info) in &succs_info_map {
1051                out.push(
1052                    format!(
1053                        "{} -> {{succs: {:?}, flow: {:?}}}",
1054                        bid, &info.succs, &info.flow
1055                    )
1056                    .to_string(),
1057                );
1058            }
1059            out.join("\n")
1060        });
1061    }
1062
1063    // For every node which is a switch, retrieve the exit.
1064    // As the set of intersection of successors is topologically sorted, the
1065    // exit should be the first node in the set (if the set is non empty).
1066    // Also, we need to explore the nodes in topological order, to give
1067    // precedence to the outer switches.
1068    let mut exits_set = HashSet::new();
1069    let mut ord_exits_set = BTreeSet::new();
1070    let mut exits = HashMap::new();
1071    for bid in sorted_switch_blocks {
1072        trace!("Finding exit candidate for: {bid:?}");
1073        let bid = bid.id;
1074        let info = succs_info_map.get(&bid).unwrap();
1075        let succs = &info.flow;
1076        // Find the best successor: this is the last one (with the highest flow,
1077        // and the highest reverse topological rank).
1078        if succs.is_empty() {
1079            trace!("{bid:?} has no successors");
1080            exits.insert(bid, None);
1081        } else {
1082            // We have an exit candidate: check that it was not already
1083            // taken by an external switch
1084            let exit = succs.last().unwrap();
1085            trace!("{bid:?} has an exit candidate: {exit:?}");
1086            if exits_set.contains(&exit.id) {
1087                trace!("Ignoring the exit candidate because already taken by an external switch");
1088                exits.insert(bid, None);
1089            } else {
1090                // It was not taken by an external switch.
1091                //
1092                // We must check that we can't reach the exit of an external
1093                // switch from one of the branches, without going through the
1094                // exit candidate.
1095                // We do this by simply checking that we can't reach any exits
1096                // (and use the fact that we explore the switch by using a
1097                // topological order to not discard valid exit candidates).
1098                //
1099                // The reason is that it can lead to code like the following:
1100                // ```
1101                // if ... { // if #1
1102                //   if ... { // if #2
1103                //     ...
1104                //     // here, we have a `goto b1`, where b1 is the exit
1105                //     // of if #2: we thus stop translating the blocks.
1106                //   }
1107                //   else {
1108                //     ...
1109                //     // here, we have a `goto b2`, where b2 is the exit
1110                //     // of if #1: we thus stop translating the blocks.
1111                //   }
1112                //   // We insert code for the block b1 here (which is the exit of
1113                //   // the exit of if #2). However, this block should only
1114                //   // be executed in the branch "then" of the if #2, not in
1115                //   // the branch "else".
1116                //   ...
1117                // }
1118                // else {
1119                //   ...
1120                // }
1121                // ```
1122
1123                // First: we do a quick check (does the set of all successors
1124                // intersect the set of exits for outer blocks?). If yes, we do
1125                // a more precise analysis: we check if we can reach the exit
1126                // *without going through* the exit candidate.
1127                if info.succs.intersection(&ord_exits_set).next().is_none()
1128                    || !can_reach_outer_exit(cfg, &exits_set, bid, exit.id)
1129                {
1130                    trace!("Keeping the exit candidate");
1131                    // No intersection: ok
1132                    exits_set.insert(exit.id);
1133                    ord_exits_set.insert(make_ord_block_id(exit.id, tsort_map));
1134                    exits.insert(bid, Some(exit.id));
1135                } else {
1136                    trace!("Ignoring the exit candidate because of an intersection with external switches");
1137                    exits.insert(bid, None);
1138                }
1139            }
1140        }
1141    }
1142
1143    exits
1144}
1145
1146/// The exits of a graph
1147#[derive(Debug, Clone)]
1148struct ExitInfo {
1149    /// The loop exits
1150    loop_exits: HashMap<src::BlockId, Option<src::BlockId>>,
1151    /// Some loop exits actually belong to outer switches. We still need
1152    /// to track them in the loop exits, in order to know when we should
1153    /// insert a break. However, we need to make sure we don't add the
1154    /// corresponding block in a sequence, after having translated the
1155    /// loop, like so:
1156    /// ```text
1157    /// loop {
1158    ///   loop_body
1159    /// };
1160    /// exit_blocks // OK if the exit "belongs" to the loop
1161    /// ```
1162    ///
1163    /// In case the exit doesn't belong to the loop:
1164    /// ```text
1165    /// if b {
1166    ///   loop {
1167    ///     loop_body
1168    ///   } // no exit blocks after the loop
1169    /// }
1170    /// else {
1171    ///   ...
1172    /// };
1173    /// exit_blocks // the exit blocks are here
1174    /// ```
1175    owned_loop_exits: HashMap<src::BlockId, Option<src::BlockId>>,
1176    /// The switch exits.
1177    /// Note that the switch exits are always owned.
1178    owned_switch_exits: HashMap<src::BlockId, Option<src::BlockId>>,
1179}
1180
1181/// Compute the exits for the loops and the switches (switch on integer and
1182/// if ... then ... else ...). We need to do this because control-flow in MIR
1183/// is destructured: we have gotos everywhere.
1184///
1185/// Let's consider the following piece of code:
1186/// ```text
1187/// if cond1 { ... } else { ... };
1188/// if cond2 { ... } else { ... };
1189/// ```
1190/// Once converted to MIR, the control-flow is destructured, which means we
1191/// have gotos everywhere. When reconstructing the control-flow, we have
1192/// to be careful about the point where we should join the two branches of
1193/// the first if.
1194/// For instance, if we don't notice they should be joined at some point (i.e,
1195/// whatever the branch we take, there is a moment when we go to the exact
1196/// same place, just before the second if), we might generate code like
1197/// this, with some duplicata:
1198/// ```text
1199/// if cond1 { ...; if cond2 { ... } else { ...} }
1200/// else { ...; if cond2 { ... } else { ...} }
1201/// ```
1202///
1203/// Such a reconstructed program is valid, but it is definitely non-optimal:
1204/// it is very different from the original program (making it less clean and
1205/// clear), more bloated, and might involve duplicating the proof effort.
1206///
1207/// For this reason, we need to find the "exit" of the first loop, which is
1208/// the point where the two branches join. Note that this can be a bit tricky,
1209/// because there may be more than two branches (if we do `switch(x) { ... }`),
1210/// and some of them might not join (if they contain a `break`, `panic`,
1211/// `return`, etc.).
1212///
1213/// Finally, some similar issues arise for loops. For instance, let's consider
1214/// the following piece of code:
1215/// ```text
1216/// while cond1 {
1217///   e1;
1218///   if cond2 {
1219///     break;
1220///   }
1221///   e2;
1222/// }
1223/// e3;
1224/// return;
1225/// ```
1226///
1227/// Note that in MIR, the loop gets desugared to an if ... then ... else ....
1228/// From the MIR, We want to generate something like this:
1229/// ```text
1230/// loop {
1231///   if cond1 {
1232///     e1;
1233///     if cond2 {
1234///       break;
1235///     }
1236///     e2;
1237///     continue;
1238///   }
1239///   else {
1240///     break;
1241///   }
1242/// };
1243/// e3;
1244/// return;
1245/// ```
1246///
1247/// But if we don't pay attention, we might end up with that, once again with
1248/// duplications:
1249/// ```text
1250/// loop {
1251///   if cond1 {
1252///     e1;
1253///     if cond2 {
1254///       e3;
1255///       return;
1256///     }
1257///     e2;
1258///     continue;
1259///   }
1260///   else {
1261///     e3;
1262///     return;
1263///   }
1264/// }
1265/// ```
1266/// We thus have to notice that if the loop condition is false, we goto the same
1267/// block as when following the goto introduced by the break inside the loop, and
1268/// this block is dubbed the "loop exit".
1269///
1270/// The following function thus computes the "exits" for loops and switches, which
1271/// are basically the points where control-flow joins.
1272fn compute_loop_switch_exits(
1273    ctx: &mut TransformCtx,
1274    body: &src::ExprBody,
1275    cfg_info: &CfgInfo,
1276) -> ExitInfo {
1277    // Use the CFG without backward edges to topologically sort the nodes.
1278    // Note that `toposort` returns `Err` if and only if it finds cycles (which
1279    // can't happen).
1280    let tsorted: Vec<src::BlockId> = toposort(&cfg_info.cfg_no_be, None).unwrap();
1281
1282    // Build the map: block id -> topological sort rank
1283    let tsort_map: HashMap<src::BlockId, usize> = tsorted
1284        .into_iter()
1285        .enumerate()
1286        .map(|(i, block_id)| (block_id, i))
1287        .collect();
1288    trace!("tsort_map:\n{:?}", tsort_map);
1289
1290    // Compute the loop exits
1291    let loop_exits = compute_loop_exits(ctx, body, cfg_info);
1292    trace!("loop_exits:\n{:?}", loop_exits);
1293
1294    // Compute the switch exits
1295    let switch_exits = compute_switch_exits(cfg_info, &tsort_map);
1296    trace!("switch_exits:\n{:?}", switch_exits);
1297
1298    // Compute the exit info
1299    let mut exit_info = ExitInfo {
1300        loop_exits: HashMap::new(),
1301        owned_loop_exits: HashMap::new(),
1302        owned_switch_exits: HashMap::new(),
1303    };
1304
1305    // We need to give precedence to the outer switches and loops: we thus iterate
1306    // over the blocks in topological order.
1307    let mut sorted_blocks: BTreeSet<OrdBlockId> = BTreeSet::new();
1308    for bid in cfg_info
1309        .loop_entries
1310        .iter()
1311        .chain(cfg_info.switch_blocks.iter())
1312    {
1313        sorted_blocks.insert(make_ord_block_id(*bid, &tsort_map));
1314    }
1315
1316    // Keep track of the exits which were already attributed
1317    let mut all_exits = HashSet::new();
1318
1319    // Put all this together
1320    for bid in sorted_blocks {
1321        let bid = bid.id;
1322        // Check if loop or switch block
1323        if cfg_info.loop_entries.contains(&bid) {
1324            // This is a loop.
1325            //
1326            // For loops, we always register the exit (if there is one).
1327            // However, the exit may be owned by an outer switch (note
1328            // that we already took care of spreading the exits between
1329            // the inner/outer loops)
1330            let exit_id = loop_exits.get(&bid).unwrap();
1331            exit_info.loop_exits.insert(bid, *exit_id);
1332
1333            // Check if we "own" the exit
1334            match exit_id {
1335                None => {
1336                    // No exit
1337                    exit_info.owned_loop_exits.insert(bid, None);
1338                }
1339                Some(exit_id) => {
1340                    if all_exits.contains(exit_id) {
1341                        // We don't own it
1342                        exit_info.owned_loop_exits.insert(bid, None);
1343                    } else {
1344                        // We own it
1345                        exit_info.owned_loop_exits.insert(bid, Some(*exit_id));
1346                        all_exits.insert(*exit_id);
1347                    }
1348                }
1349            }
1350        } else {
1351            // For switches: check that the exit was not already given to a
1352            // loop
1353            let exit_id = switch_exits.get(&bid).unwrap();
1354
1355            match exit_id {
1356                None => {
1357                    // No exit
1358                    exit_info.owned_switch_exits.insert(bid, None);
1359                }
1360                Some(exit_id) => {
1361                    if all_exits.contains(exit_id) {
1362                        // We don't own it
1363                        exit_info.owned_switch_exits.insert(bid, None);
1364                    } else {
1365                        // We own it
1366                        exit_info.owned_switch_exits.insert(bid, Some(*exit_id));
1367                        all_exits.insert(*exit_id);
1368                    }
1369                }
1370            }
1371        }
1372    }
1373
1374    exit_info
1375}
1376
1377fn get_goto_kind(
1378    exits_info: &ExitInfo,
1379    parent_loops: &Vec<src::BlockId>,
1380    switch_exit_blocks: &HashSet<src::BlockId>,
1381    next_block_id: src::BlockId,
1382) -> GotoKind {
1383    // First explore the parent loops in revert order
1384    for (i, loop_id) in parent_loops.iter().rev().enumerate() {
1385        // If we goto a loop entry node: this is a 'continue'
1386        if next_block_id == *loop_id {
1387            return GotoKind::Continue(i);
1388        } else {
1389            // If we goto a loop exit node: this is a 'break'
1390            if let Some(exit_id) = exits_info.loop_exits.get(loop_id).unwrap() {
1391                if next_block_id == *exit_id {
1392                    return GotoKind::Break(i);
1393                }
1394            }
1395        }
1396    }
1397
1398    // Check if the goto exits the current block
1399    if switch_exit_blocks.contains(&next_block_id) {
1400        return GotoKind::ExitBlock;
1401    }
1402
1403    // Default
1404    GotoKind::Goto
1405}
1406
1407enum GotoKind {
1408    Break(usize),
1409    Continue(usize),
1410    ExitBlock,
1411    Goto,
1412}
1413
1414/// `parent_span`: we need some span data for the new statement.
1415/// We use the one for the parent terminator.
1416fn translate_child_block(
1417    info: &mut BlockInfo<'_>,
1418    parent_loops: &Vec<src::BlockId>,
1419    switch_exit_blocks: &HashSet<src::BlockId>,
1420    parent_span: Span,
1421    child_id: src::BlockId,
1422) -> Option<tgt::Block> {
1423    // Check if this is a backward call
1424    match get_goto_kind(info.exits_info, parent_loops, switch_exit_blocks, child_id) {
1425        GotoKind::Break(index) => {
1426            let st = tgt::RawStatement::Break(index);
1427            Some(tgt::Statement::new(parent_span, st).into_block())
1428        }
1429        GotoKind::Continue(index) => {
1430            let st = tgt::RawStatement::Continue(index);
1431            Some(tgt::Statement::new(parent_span, st).into_block())
1432        }
1433        // If we are going to an exit block we simply ignore the goto
1434        GotoKind::ExitBlock => None,
1435        GotoKind::Goto => {
1436            // "Standard" goto: just recursively translate
1437            ensure_sufficient_stack(|| {
1438                Some(translate_block(
1439                    info,
1440                    parent_loops,
1441                    switch_exit_blocks,
1442                    child_id,
1443                ))
1444            })
1445        }
1446    }
1447}
1448
1449fn opt_block_unwrap_or_nop(span: Span, opt_block: Option<tgt::Block>) -> tgt::Block {
1450    opt_block.unwrap_or_else(|| tgt::Statement::new(span, tgt::RawStatement::Nop).into_block())
1451}
1452
1453fn translate_statement(st: &src::Statement) -> Option<tgt::Statement> {
1454    let src_span = st.span;
1455    let st = match st.content.clone() {
1456        src::RawStatement::Assign(place, rvalue) => tgt::RawStatement::Assign(place, rvalue),
1457        src::RawStatement::SetDiscriminant(place, variant_id) => {
1458            tgt::RawStatement::SetDiscriminant(place, variant_id)
1459        }
1460        src::RawStatement::CopyNonOverlapping(copy) => tgt::RawStatement::CopyNonOverlapping(copy),
1461        src::RawStatement::StorageLive(var_id) => tgt::RawStatement::StorageLive(var_id),
1462        src::RawStatement::StorageDead(var_id) => tgt::RawStatement::StorageDead(var_id),
1463        src::RawStatement::Deinit(place) => tgt::RawStatement::Deinit(place),
1464        src::RawStatement::Drop(place) => tgt::RawStatement::Drop(place),
1465        src::RawStatement::Assert(assert) => tgt::RawStatement::Assert(assert),
1466        src::RawStatement::Nop => tgt::RawStatement::Nop,
1467        src::RawStatement::Error(s) => tgt::RawStatement::Error(s),
1468    };
1469    Some(tgt::Statement::new(src_span, st))
1470}
1471
1472fn translate_terminator(
1473    info: &mut BlockInfo<'_>,
1474    parent_loops: &Vec<src::BlockId>,
1475    switch_exit_blocks: &HashSet<src::BlockId>,
1476    terminator: &src::Terminator,
1477) -> tgt::Block {
1478    let src_span = terminator.span;
1479
1480    match &terminator.content {
1481        src::RawTerminator::Abort(kind) => {
1482            tgt::Statement::new(src_span, tgt::RawStatement::Abort(kind.clone())).into_block()
1483        }
1484        src::RawTerminator::Return => {
1485            tgt::Statement::new(src_span, tgt::RawStatement::Return).into_block()
1486        }
1487        src::RawTerminator::UnwindResume => {
1488            tgt::Statement::new(src_span, tgt::RawStatement::Abort(AbortKind::Panic(None)))
1489                .into_block()
1490        }
1491        src::RawTerminator::Call {
1492            call,
1493            target,
1494            on_unwind: _,
1495        } => {
1496            // TODO: Have unwinds in the LLBC
1497            let target_block = translate_child_block(
1498                info,
1499                parent_loops,
1500                switch_exit_blocks,
1501                terminator.span,
1502                *target,
1503            );
1504            let mut block = opt_block_unwrap_or_nop(terminator.span, target_block);
1505            let st = tgt::Statement::new(src_span, tgt::RawStatement::Call(call.clone()));
1506            block.statements.insert(0, st);
1507            block
1508        }
1509        src::RawTerminator::Goto { target } => {
1510            let block = translate_child_block(
1511                info,
1512                parent_loops,
1513                switch_exit_blocks,
1514                terminator.span,
1515                *target,
1516            );
1517            let block = opt_block_unwrap_or_nop(terminator.span, block);
1518            block
1519        }
1520        src::RawTerminator::Switch { discr, targets } => {
1521            // Translate the target expressions
1522            let switch = match &targets {
1523                src::SwitchTargets::If(then_tgt, else_tgt) => {
1524                    // Translate the children expressions
1525                    let then_block = translate_child_block(
1526                        info,
1527                        parent_loops,
1528                        switch_exit_blocks,
1529                        terminator.span,
1530                        *then_tgt,
1531                    );
1532                    // We use the terminator span information in case then
1533                    // then statement is `None`
1534                    let then_block = opt_block_unwrap_or_nop(terminator.span, then_block);
1535                    let else_block = translate_child_block(
1536                        info,
1537                        parent_loops,
1538                        switch_exit_blocks,
1539                        terminator.span,
1540                        *else_tgt,
1541                    );
1542                    let else_block = opt_block_unwrap_or_nop(terminator.span, else_block);
1543
1544                    // Translate
1545                    tgt::Switch::If(discr.clone(), then_block, else_block)
1546                }
1547                src::SwitchTargets::SwitchInt(int_ty, targets, otherwise) => {
1548                    // Note that some branches can be grouped together, like
1549                    // here:
1550                    // ```
1551                    // match e {
1552                    //   E::V1 | E::V2 => ..., // Grouped
1553                    //   E::V3 => ...
1554                    // }
1555                    // ```
1556                    // We detect this by checking if a block has already been
1557                    // translated as one of the branches of the switch.
1558                    //
1559                    // Rk.: note there may be intermediate gotos depending
1560                    // on the MIR we use. Typically, we manage to detect the
1561                    // grouped branches with Optimized MIR, but not with Promoted
1562                    // MIR. See the comment in "tests/src/matches.rs".
1563
1564                    // We link block ids to:
1565                    // - vector of matched integer values
1566                    // - translated blocks
1567                    let mut branches: IndexMap<src::BlockId, (Vec<ScalarValue>, tgt::Block)> =
1568                        IndexMap::new();
1569
1570                    // Translate the children expressions
1571                    for (v, bid) in targets.iter() {
1572                        // Check if the block has already been translated:
1573                        // if yes, it means we need to group branches
1574                        if branches.contains_key(bid) {
1575                            // Already translated: add the matched value to
1576                            // the list of values
1577                            let branch = branches.get_mut(bid).unwrap();
1578                            branch.0.push(*v);
1579                        } else {
1580                            // Not translated: translate it
1581                            let block = translate_child_block(
1582                                info,
1583                                parent_loops,
1584                                switch_exit_blocks,
1585                                terminator.span,
1586                                *bid,
1587                            );
1588                            // We use the terminator span information in case then
1589                            // then statement is `None`
1590                            let block = opt_block_unwrap_or_nop(terminator.span, block);
1591                            branches.insert(*bid, (vec![*v], block));
1592                        }
1593                    }
1594                    let targets_blocks: Vec<(Vec<ScalarValue>, tgt::Block)> =
1595                        branches.into_iter().map(|(_, x)| x).collect();
1596
1597                    let otherwise_block = translate_child_block(
1598                        info,
1599                        parent_loops,
1600                        switch_exit_blocks,
1601                        terminator.span,
1602                        *otherwise,
1603                    );
1604                    // We use the terminator span information in case then
1605                    // then statement is `None`
1606                    let otherwise_block = opt_block_unwrap_or_nop(terminator.span, otherwise_block);
1607
1608                    // Translate
1609                    tgt::Switch::SwitchInt(discr.clone(), *int_ty, targets_blocks, otherwise_block)
1610                }
1611            };
1612
1613            // Return
1614            let span = tgt::combine_switch_targets_span(&switch);
1615            let span = combine_span(&src_span, &span);
1616            let st = tgt::RawStatement::Switch(switch);
1617            tgt::Statement::new(span, st).into_block()
1618        }
1619    }
1620}
1621
1622/// Remark: some values are boxed (here, the returned statement) so that they
1623/// are allocated on the heap. This reduces stack usage (we had problems with
1624/// stack overflows in the past). A more efficient solution would be to use loops
1625/// to make this code constant space, but that would require a serious rewriting.
1626fn translate_block(
1627    info: &mut BlockInfo<'_>,
1628    parent_loops: &Vec<src::BlockId>,
1629    switch_exit_blocks: &HashSet<src::BlockId>,
1630    block_id: src::BlockId,
1631) -> tgt::Block {
1632    // If the user activated this check: check that we didn't already translate
1633    // this block, and insert the block id in the set of already translated blocks.
1634    trace!(
1635        "Parent loops: {:?}, Parent switch exits: {:?}, Block id: {}",
1636        parent_loops,
1637        switch_exit_blocks,
1638        block_id
1639    );
1640    info.explored.insert(block_id);
1641
1642    let block = info.body.body.get(block_id).unwrap();
1643
1644    // Check if we enter a loop: if so, update parent_loops and the current_exit_block
1645    let is_loop = info.cfg.loop_entries.contains(&block_id);
1646    let mut nparent_loops: Vec<src::BlockId>;
1647    let nparent_loops = if info.cfg.loop_entries.contains(&block_id) {
1648        nparent_loops = parent_loops.clone();
1649        nparent_loops.push(block_id);
1650        &nparent_loops
1651    } else {
1652        parent_loops
1653    };
1654
1655    // If we enter a switch or a loop, we need to check if we own the exit
1656    // block, in which case we need to append it to the loop/switch body
1657    // in a sequence
1658    let is_switch = block.terminator.content.is_switch();
1659    let next_block = if is_loop {
1660        *info.exits_info.owned_loop_exits.get(&block_id).unwrap()
1661    } else if is_switch {
1662        *info.exits_info.owned_switch_exits.get(&block_id).unwrap()
1663    } else {
1664        None
1665    };
1666
1667    // If we enter a switch, add the exit block to the set
1668    // of outer exit blocks
1669    let nswitch_exit_blocks = if is_switch {
1670        let mut nexit_blocks = switch_exit_blocks.clone();
1671        match next_block {
1672            None => nexit_blocks,
1673            Some(bid) => {
1674                nexit_blocks.insert(bid);
1675                nexit_blocks
1676            }
1677        }
1678    } else {
1679        switch_exit_blocks.clone()
1680    };
1681
1682    // Translate the terminator and the subsequent blocks.
1683    // Note that this terminator is an option: we might ignore it
1684    // (if it is an exit).
1685
1686    let terminator =
1687        translate_terminator(info, nparent_loops, &nswitch_exit_blocks, &block.terminator);
1688
1689    // Translate the statements inside the block
1690    let statements = block
1691        .statements
1692        .iter()
1693        .filter_map(|st| translate_statement(st))
1694        .collect_vec();
1695
1696    // Prepend the statements to the terminator.
1697    let mut block = if let Some(st) = tgt::Block::from_seq(statements) {
1698        st.merge(terminator)
1699    } else {
1700        terminator
1701    };
1702
1703    if is_loop {
1704        // Put the loop body inside a `Loop`.
1705        block = tgt::Statement::new(block.span, tgt::RawStatement::Loop(block)).into_block()
1706    } else if !is_switch {
1707        assert!(next_block.is_none());
1708    }
1709
1710    // Concatenate the exit expression, if needs be
1711    if let Some(exit_block_id) = next_block {
1712        let next_block = ensure_sufficient_stack(|| {
1713            translate_block(info, parent_loops, switch_exit_blocks, exit_block_id)
1714        });
1715        block = block.merge(next_block);
1716    }
1717
1718    block
1719}
1720
1721fn translate_body_aux(ctx: &mut TransformCtx, src_body: &src::ExprBody) -> tgt::ExprBody {
1722    // Explore the function body to create the control-flow graph without backward
1723    // edges, and identify the loop entries (which are destinations of backward edges).
1724    let cfg_info = build_cfg_info(src_body);
1725    trace!("cfg_info: {:?}", cfg_info);
1726
1727    // Find the exit block for all the loops and switches, if such an exit point
1728    // exists.
1729    let exits_info = compute_loop_switch_exits(ctx, src_body, &cfg_info);
1730
1731    // Debugging
1732    trace!("exits_info:\n{:?}", exits_info);
1733
1734    // Translate the body by reconstructing the loops and the
1735    // conditional branchings.
1736    // Note that we shouldn't get `None`.
1737    let mut explored = HashSet::new();
1738    let mut info = BlockInfo {
1739        cfg: &cfg_info,
1740        body: src_body,
1741        exits_info: &exits_info,
1742        explored: &mut explored,
1743    };
1744    let tgt_body = translate_block(&mut info, &Vec::new(), &HashSet::new(), src::BlockId::ZERO);
1745
1746    tgt::ExprBody {
1747        span: src_body.span,
1748        locals: src_body.locals.clone(),
1749        comments: src_body.comments.clone(),
1750        body: tgt_body,
1751    }
1752}
1753
1754fn translate_body(ctx: &mut TransformCtx, body: &mut gast::Body) {
1755    use gast::Body::{Structured, Unstructured};
1756    let Unstructured(src_body) = body else {
1757        panic!("Called `ullbc_to_llbc` on an already restructured body")
1758    };
1759    trace!("About to translate to ullbc: {:?}", src_body.span);
1760    let tgt_body = translate_body_aux(ctx, src_body);
1761    *body = Structured(tgt_body);
1762}
1763
1764pub struct Transform;
1765impl TransformPass for Transform {
1766    fn transform_ctx(&self, ctx: &mut TransformCtx) {
1767        // Translate the bodies one at a time.
1768        ctx.for_each_body(|ctx, body| {
1769            translate_body(ctx, body);
1770        });
1771
1772        // Print the functions
1773        let fmt_ctx = &ctx.into_fmt();
1774        for fun in &ctx.translated.fun_decls {
1775            trace!(
1776                "# Signature:\n{}\n\n# Function definition:\n{}\n",
1777                fun.signature.with_ctx(fmt_ctx),
1778                fun.with_ctx(fmt_ctx),
1779            );
1780        }
1781        // Print the global variables
1782        for global in &ctx.translated.global_decls {
1783            trace!(
1784                "# Type:\n{}\n\n# Global definition:\n{}\n",
1785                global.ty.with_ctx(fmt_ctx),
1786                global.with_ctx(fmt_ctx),
1787            );
1788        }
1789
1790        if ctx.options.print_built_llbc {
1791            info!("# LLBC resulting from control-flow reconstruction:\n\n{ctx}\n",);
1792        }
1793    }
1794}