charon_lib/transform/
ullbc_to_llbc.rs

1//! ULLBC to LLBC
2//!
3//! We reconstruct the control-flow in the Unstructured LLBC.
4//!
5//! The reconstruction algorithm is not written to be efficient (its complexity
6//! is probably very bad), but it was not written to be: this is still an early
7//! stage and we want the algorithm to generate the best reconstruction as
8//! possible. We still need to test the algorithm on more interesting examples,
9//! and will consider making it more efficient once it is a bit mature and well
10//! tested.
11//! Also note that we more importantly focus on making the algorithm sound: the
12//! reconstructed program must always be equivalent to the original MIR program,
13//! and the fact that the reconstruction preserves this property must be obvious.
14//!
15//! Finally, we conjecture the execution time shouldn't be too much a problem:
16//! we don't expect the translation mechanism to be applied on super huge functions
17//! (which will be difficult to formally analyze), and the MIR graphs are actually
18//! not that big because statements are grouped into code blocks (a block is made
19//! of a list of statements, followed by a terminator - branchings and jumps can
20//! only be performed by terminators -, meaning that MIR graphs don't have that
21//! many nodes and edges).
22
23use crate::ast::*;
24use crate::common::ensure_sufficient_stack;
25use crate::errors::sanity_check;
26use crate::formatter::IntoFormatter;
27use crate::llbc_ast as tgt;
28use crate::meta::{Span, combine_span};
29use crate::pretty::FmtWithCtx;
30use crate::transform::TransformCtx;
31use crate::ullbc_ast::{self as src};
32use indexmap::IndexMap;
33use itertools::Itertools;
34use num_bigint::BigInt;
35use num_rational::BigRational;
36use petgraph::Direction;
37use petgraph::algo::toposort;
38use petgraph::graphmap::DiGraphMap;
39use std::collections::{BTreeSet, HashMap, HashSet, VecDeque};
40
41use super::ctx::TransformPass;
42
43/// Control-Flow Graph
44type Cfg = DiGraphMap<src::BlockId, ()>;
45
46/// Small utility
47struct BlockInfo<'a> {
48    cfg: &'a CfgInfo,
49    body: &'a src::ExprBody,
50    exits_info: &'a ExitInfo,
51    explored: &'a mut HashSet<src::BlockId>,
52}
53
54/// This structure contains various information about a function's CFG.
55#[derive(Debug)]
56struct CfgInfo {
57    /// The CFG
58    pub cfg: Cfg,
59    /// The CFG where all the backward edges have been removed
60    pub cfg_no_be: Cfg,
61    /// We consider the destination of the backward edges to be loop entries and
62    /// store them here.
63    pub loop_entries: HashSet<src::BlockId>,
64    /// The backward edges
65    pub backward_edges: HashSet<(src::BlockId, src::BlockId)>,
66    /// The blocks whose terminators are a switch are stored here.
67    pub switch_blocks: HashSet<src::BlockId>,
68    /// The set of nodes from where we can only reach error nodes (panic, etc.)
69    pub only_reach_error: HashSet<src::BlockId>,
70}
71
72/// Build the CFGs (the "regular" CFG and the CFG without backward edges) and
73/// compute some information like the loop entries and the switch blocks.
74fn build_cfg_info(body: &src::ExprBody) -> CfgInfo {
75    let mut cfg = CfgInfo {
76        cfg: Cfg::new(),
77        cfg_no_be: Cfg::new(),
78        loop_entries: HashSet::new(),
79        backward_edges: HashSet::new(),
80        switch_blocks: HashSet::new(),
81        only_reach_error: HashSet::new(),
82    };
83
84    // Add the nodes
85    for block_id in body.body.iter_indices() {
86        cfg.cfg.add_node(block_id);
87        cfg.cfg_no_be.add_node(block_id);
88    }
89
90    // Add the edges
91    let ancestors = HashSet::new();
92    let mut explored = HashSet::new();
93    build_cfg_partial_info_edges(
94        &mut cfg,
95        &ancestors,
96        &mut explored,
97        body,
98        src::BlockId::ZERO,
99    );
100
101    cfg
102}
103
104fn block_is_switch(body: &src::ExprBody, block_id: src::BlockId) -> bool {
105    let block = body.body.get(block_id).unwrap();
106    block.terminator.content.is_switch()
107}
108
109/// The terminator of the block is a panic, etc.
110fn block_is_error(body: &src::ExprBody, block_id: src::BlockId) -> bool {
111    let block = body.body.get(block_id).unwrap();
112    use src::RawTerminator::*;
113    match &block.terminator.content {
114        Abort(..) => true,
115        Goto { .. } | Switch { .. } | Return | Call { .. } | UnwindResume => false,
116    }
117}
118
119fn build_cfg_partial_info_edges(
120    cfg: &mut CfgInfo,
121    ancestors: &HashSet<src::BlockId>,
122    explored: &mut HashSet<src::BlockId>,
123    body: &src::ExprBody,
124    block_id: src::BlockId,
125) {
126    // Check if we already explored the current node
127    if explored.contains(&block_id) {
128        return;
129    }
130    explored.insert(block_id);
131
132    // Insert the current block in the set of ancestors blocks
133    let mut ancestors = ancestors.clone();
134    ancestors.insert(block_id);
135
136    // Check if it is a switch
137    if block_is_switch(body, block_id) {
138        cfg.switch_blocks.insert(block_id);
139    }
140
141    // Retrieve the block targets
142    let mut targets = body.body.get(block_id).unwrap().targets();
143    // Hack: we don't translate unwind paths in llbc so we ignore them here.
144    if let src::RawTerminator::Call { target, .. } =
145        body.body.get(block_id).unwrap().terminator.content
146    {
147        targets = vec![target];
148    }
149
150    let mut has_backward_edge = false;
151
152    // Add edges for all the targets and explore them, if they are not predecessors
153    for tgt in &targets {
154        // Insert the edge in the "regular" CFG
155        cfg.cfg.add_edge(block_id, *tgt, ());
156
157        // We need to check if it is a backward edge before inserting it in the
158        // CFG without backward edges and exploring it
159        if ancestors.contains(tgt) {
160            // This is a backward edge
161            has_backward_edge = true;
162            cfg.loop_entries.insert(*tgt);
163            cfg.backward_edges.insert((block_id, *tgt));
164        } else {
165            // Not a backward edge: insert the edge and explore
166            cfg.cfg_no_be.add_edge(block_id, *tgt, ());
167            ensure_sufficient_stack(|| {
168                build_cfg_partial_info_edges(cfg, &ancestors, explored, body, *tgt);
169            })
170        }
171    }
172
173    // Check if this node can only reach error nodes:
174    // - we check if the current node ends with an error terminator
175    // - or check that all the targets lead to error nodes
176    // Note that if there is a backward edge, we consider that we don't necessarily
177    // go to error.
178    if !has_backward_edge
179        && (block_is_error(body, block_id)
180            || (
181                // The targets are empty if this is an error node *or* a return node
182                !targets.is_empty() && targets.iter().all(|tgt| cfg.only_reach_error.contains(tgt))
183            ))
184    {
185        cfg.only_reach_error.insert(block_id);
186    }
187}
188
189#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
190struct BlockWithRank<T> {
191    /// A "rank" quantity that we use to order the nodes.
192    /// By placing the `rank` field before the `id`, we make sure that
193    /// we use the rank first in the lexicographic order.
194    rank: T,
195    id: src::BlockId,
196}
197
198type OrdBlockId = BlockWithRank<usize>;
199
200/// For the rank we use:
201/// - a "flow" quantity (see [BlocksInfo])
202/// - the *inverse* rank in the topological sort (i.e., `- topo_rank`)
203type FlowBlockId = BlockWithRank<(BigRational, isize)>;
204
205#[derive(Debug, Clone)]
206struct LoopExitCandidateInfo {
207    /// The occurrences going to this exit.
208    /// For every occurrence, we register the distance between the loop entry
209    /// and the node from which the edge going to the exit starts.
210    /// If later we have to choose between candidates with the same number
211    /// of occurrences, we choose the one with the smallest distances.
212    ///
213    /// Note that it *may* happen that we have several exit candidates referenced
214    /// more than once for one loop. This comes from the fact that whenever we
215    /// reach a node from which the loop entry is not reachable (without using a
216    /// backward edge leading to an outer loop entry), we register this node
217    /// as well as all its children as exit candidates.
218    /// Consider the following example:
219    /// ```text
220    /// while i < max {
221    ///     if cond {
222    ///         break;
223    ///     }
224    ///     s += i;
225    ///     i += 1
226    /// }
227    /// // All the below nodes are exit candidates (each of them is referenced twice)
228    /// s += 1;
229    /// return s;
230    /// ```
231    pub occurrences: Vec<usize>,
232}
233
234/// Check if a loop entry is reachable from a node, in a graph where we remove
235/// the backward edges going directly to the loop entry.
236///
237/// If the loop entry is not reachable, it means that:
238/// - the loop entry is not reachable at all
239/// - or it is only reachable through an outer loop
240///
241/// The starting node should be a (transitive) child of the loop entry.
242/// We use this to find candidates for loop exits.
243fn loop_entry_is_reachable_from_inner(
244    cfg: &CfgInfo,
245    loop_entry: src::BlockId,
246    block_id: src::BlockId,
247) -> bool {
248    // It is reachable in the complete graph. Check if it is reachable by not
249    // going through backward edges which go to outer loops. In practice, we
250    // just need to forbid the use of any backward edges at the exception of
251    // those which go directly to the current loop's entry. This means that we
252    // ignore backward edges to outer loops of course, but also backward edges
253    // to inner loops because we shouldn't need to follow those (there should be
254    // more direct paths).
255
256    // Explore the graph starting at block_id
257    let mut explored: HashSet<src::BlockId> = HashSet::new();
258    let mut stack: VecDeque<src::BlockId> = VecDeque::new();
259    stack.push_back(block_id);
260    while !stack.is_empty() {
261        let bid = stack.pop_front().unwrap();
262        if explored.contains(&bid) {
263            continue;
264        }
265        explored.insert(bid);
266
267        let next_ids = cfg.cfg.neighbors_directed(bid, Direction::Outgoing);
268        for next_id in next_ids {
269            // Check if this is a backward edge
270            if cfg.backward_edges.contains(&(bid, next_id)) {
271                // Backward edge: only allow those going directly to the current
272                // loop's entry
273                if next_id == loop_entry {
274                    // The loop entry is reachable
275                    return true;
276                } else {
277                    // Forbidden edge: ignore
278                    continue;
279                }
280            } else {
281                // Nothing special: add the node to the stack for further
282                // exploration
283                stack.push_back(next_id);
284            }
285        }
286    }
287
288    // The loop entry is not reachable
289    false
290}
291
292struct FilteredLoopParents {
293    remaining_parents: Vec<(src::BlockId, usize)>,
294    removed_parents: Vec<(src::BlockId, usize)>,
295}
296
297fn filter_loop_parents(
298    cfg: &CfgInfo,
299    parent_loops: &Vec<(src::BlockId, usize)>,
300    block_id: src::BlockId,
301) -> Option<FilteredLoopParents> {
302    let mut eliminate: usize = 0;
303    for (loop_id, _ldist) in parent_loops.iter().rev() {
304        if !loop_entry_is_reachable_from_inner(cfg, *loop_id, block_id) {
305            eliminate += 1;
306        } else {
307            break;
308        }
309    }
310
311    if eliminate > 0 {
312        // Split the vector of parents
313        let (remaining_parents, removed_parents) =
314            parent_loops.split_at(parent_loops.len() - eliminate);
315        let (mut remaining_parents, removed_parents) =
316            (remaining_parents.to_vec(), removed_parents.to_vec());
317
318        // Update the distance to the last loop - we just increment the distance
319        // by 1, because from the point of view of the parent loop, we just exited
320        // a block and go to the next sequence of instructions.
321        if !remaining_parents.is_empty() {
322            remaining_parents.last_mut().unwrap().1 += 1;
323        }
324
325        Some(FilteredLoopParents {
326            remaining_parents,
327            removed_parents,
328        })
329    } else {
330        None
331    }
332}
333
334/// List the nodes reachable from a starting point.
335/// We list the nodes and the depth (in the AST) at which they were found.
336fn list_reachable(cfg: &Cfg, start: src::BlockId) -> HashMap<src::BlockId, usize> {
337    let mut reachable: HashMap<src::BlockId, usize> = HashMap::new();
338    let mut stack: VecDeque<(src::BlockId, usize)> = VecDeque::new();
339    stack.push_back((start, 0));
340
341    while !stack.is_empty() {
342        let (bid, dist) = stack.pop_front().unwrap();
343
344        // Ignore this node if we already registered it with a better distance
345        match reachable.get(&bid) {
346            None => (),
347            Some(original_dist) => {
348                if *original_dist < dist {
349                    continue;
350                }
351            }
352        }
353
354        // Inset the node with its distance
355        reachable.insert(bid, dist);
356
357        // Add the children to the stack
358        for child in cfg.neighbors(bid) {
359            stack.push_back((child, dist + 1));
360        }
361    }
362
363    // Return
364    reachable
365}
366
367/// Register a node and its children as exit candidates for a list of
368/// parent loops.
369fn register_children_as_loop_exit_candidates(
370    cfg: &CfgInfo,
371    loop_exits: &mut HashMap<src::BlockId, IndexMap<src::BlockId, LoopExitCandidateInfo>>,
372    removed_parent_loops: &Vec<(src::BlockId, usize)>,
373    block_id: src::BlockId,
374) {
375    // List the reachable nodes
376    let reachable = list_reachable(&cfg.cfg_no_be, block_id);
377
378    let mut base_dist = 0;
379    // For every parent loop, in reverse order (we go from last to first in
380    // order to correctly compute the distances)
381    for (loop_id, loop_dist) in removed_parent_loops.iter().rev() {
382        // Update the distance to the loop entry
383        base_dist += *loop_dist;
384
385        // Retrieve the candidates
386        let candidates = loop_exits.get_mut(loop_id).unwrap();
387
388        // Update them
389        for (bid, dist) in reachable.iter() {
390            let distance = base_dist + *dist;
391            match candidates.get_mut(bid) {
392                None => {
393                    candidates.insert(
394                        *bid,
395                        LoopExitCandidateInfo {
396                            occurrences: vec![distance],
397                        },
398                    );
399                }
400                Some(c) => {
401                    c.occurrences.push(distance);
402                }
403            }
404        }
405    }
406}
407
408/// Compute the loop exit candidates.
409///
410/// There may be several candidates with the same "optimality" (same number of
411/// occurrences, etc.), in which case we choose the first one which was registered
412/// (the order in which we explore the graph is deterministic): this is why we
413/// store the candidates in a linked hash map.
414fn compute_loop_exit_candidates(
415    cfg: &CfgInfo,
416    explored: &mut HashSet<src::BlockId>,
417    ordered_loops: &mut Vec<src::BlockId>,
418    loop_exits: &mut HashMap<src::BlockId, IndexMap<src::BlockId, LoopExitCandidateInfo>>,
419    // List of parent loops, with the distance to the entry of the loop (the distance
420    // is the distance between the current node and the loop entry for the last parent,
421    // and the distance between the parents for the others).
422    mut parent_loops: Vec<(src::BlockId, usize)>,
423    block_id: src::BlockId,
424) {
425    if explored.contains(&block_id) {
426        return;
427    }
428    explored.insert(block_id);
429
430    // Check if we enter a loop - add the corresponding node if necessary
431    if cfg.loop_entries.contains(&block_id) {
432        parent_loops.push((block_id, 1));
433        ordered_loops.push(block_id);
434    } else {
435        // Increase the distance with the parent loop
436        if !parent_loops.is_empty() {
437            parent_loops.last_mut().unwrap().1 += 1;
438        }
439    };
440
441    // Retrieve the children - note that we ignore the back edges
442    let children = cfg.cfg_no_be.neighbors(block_id);
443    for child in children {
444        // If the parent loop entry is not reachable from the child without going
445        // through a backward edge which goes directly to the loop entry, consider
446        // this node a potential exit.
447        ensure_sufficient_stack(|| {
448            match filter_loop_parents(cfg, &parent_loops, child) {
449                None => {
450                    compute_loop_exit_candidates(
451                        cfg,
452                        explored,
453                        ordered_loops,
454                        loop_exits,
455                        parent_loops.clone(),
456                        child,
457                    );
458                }
459                Some(fparent_loops) => {
460                    // We filtered some parent loops: it means this child and its
461                    // children are loop exit candidates for all those loops: we must
462                    // thus register them.
463                    // Note that we register the child *and* its children: the reason
464                    // is that we might do something *then* actually jump to the exit.
465                    // For instance, the following block of code:
466                    // ```
467                    // if cond { break; } else { ... }
468                    // ```
469                    //
470                    // Gets translated in MIR to something like this:
471                    // ```
472                    // bb1: {
473                    //   if cond -> bb2 else -> bb3; // bb2 is not the real exit
474                    // }
475                    //
476                    // bb2: {
477                    //   goto bb4; // bb4 is the real exit
478                    // }
479                    // ```
480                    register_children_as_loop_exit_candidates(
481                        cfg,
482                        loop_exits,
483                        &fparent_loops.removed_parents,
484                        child,
485                    );
486
487                    // Explore, with the filtered parents
488                    compute_loop_exit_candidates(
489                        cfg,
490                        explored,
491                        ordered_loops,
492                        loop_exits,
493                        fparent_loops.remaining_parents,
494                        child,
495                    );
496                }
497            }
498        })
499    }
500}
501
502/// See [`compute_loop_switch_exits`] for
503/// explanations about what "exits" are.
504///
505/// The following function computes the loop exits. It acts as follows.
506///
507/// We keep track of a stack of the loops in which we entered.
508/// It is very easy to check when we enter a loop: loop entries are destinations
509/// of backward edges, which can be spotted with a simple graph exploration (see
510/// [`build_cfg_partial_info_edges`].
511/// The criteria to consider whether we exit a loop is the following:
512/// - we exit a loop if we go to a block from which we can't reach the loop
513///   entry at all
514/// - or if we can reach the loop entry, but must use a backward edge which goes
515///   to an outer loop
516///
517/// It is better explained on the following example:
518/// ```text
519/// 'outer while i < max {
520///     'inner while j < max {
521///        j += 1;
522///     }
523///     // (i)
524///     i += 1;
525/// }
526/// ```
527/// If we enter the inner loop then go to (i) from the inner loop, we consider
528/// that we exited the outer loop because:
529/// - we can reach the entry of the inner loop from (i) (by finishing then
530///   starting again an iteration of the outer loop)
531/// - but doing this requires taking a backward edge which goes to the outer loop
532///
533/// Whenever we exit a loop, we save the block we went to as an exit candidate
534/// for this loop. Note that there may by many exit candidates. For instance,
535/// in the below example:
536/// ```text
537/// while ... {
538///    ...
539///    if ... {
540///        // We can't reach the loop entry from here: this is an exit
541///        // candidate
542///        return;
543///    }
544/// }
545/// // This is another exit candidate - and this is the one we want to use
546/// // as the "real" exit...
547/// ...
548/// ```
549///
550/// Also note that it may happen that we go several times to the same exit (if
551/// we use breaks for instance): we record the number of times an exit candidate
552/// is used.
553///
554/// Once we listed all the exit candidates, we find the "best" one for every
555/// loop, starting with the outer loops. We start with outer loops because
556/// inner loops might use breaks to exit to the exit of outer loops: if we
557/// start with the inner loops, the exit which is "natural" for the outer loop
558/// might end up being used for one of the inner loops...
559///
560/// The best exit is the following one:
561/// - it is the one which is used the most times (note that there can be
562///   several candidates which are referenced strictly more than once: see the
563///   comment below)
564/// - if several exits have the same number of occurrences, we choose the one
565///   for which we goto the "earliest" (earliest meaning that the goto is close to
566///   the loop entry node in the AST). The reason is that all the loops should
567///   have an outer if ... then ... else ... which executes the loop body or goes
568///   to the exit (note that this is not necessarily the first
569///   if ... then ... else ... we find: loop conditions can be arbitrary
570///   expressions, containing branchings).
571///
572/// # Several candidates for a loop exit:
573/// =====================================
574/// There used to be a sanity check to ensure there are no two different
575/// candidates with exactly the same number of occurrences and distance from
576/// the entry of the loop, if the number of occurrences is > 1.
577///
578/// We removed it because it does happen, for instance here (the match
579/// introduces an `unreachable` node, and it has the same number of
580/// occurrences and the same distance to the loop entry as the `panic`
581/// node):
582///
583/// ```text
584/// pub fn list_nth_mut_loop_pair<'a, T>(
585///     mut ls: &'a mut List<T>,
586///     mut i: u32,
587/// ) -> &'a mut T {
588///     loop {
589///         match ls {
590///             List::Nil => {
591///                 panic!() // <-- best candidate
592///             }
593///             List::Cons(x, tl) => {
594///                 if i == 0 {
595///                     return x;
596///                 } else {
597///                     ls = tl;
598///                     i -= 1;
599///                 }
600///             }
601///             _ => {
602///               // Note that Rustc always introduces an unreachable branch after
603///               // desugaring matches.
604///               unreachable!(), // <-- best candidate
605///             }
606///         }
607///     }
608/// }
609/// ```
610///
611/// When this happens we choose an exit candidate whose edges don't necessarily
612/// lead to an error (above there are none, so we don't choose any exits). Note
613/// that this last condition is important to prevent loops from being unnecessarily
614/// nested:
615///
616/// ```text
617/// pub fn nested_loops_enum(step_out: usize, step_in: usize) -> usize {
618///     let mut s = 0;
619///
620///     for _ in 0..128 { // We don't want this loop to be nested with the loops below
621///         s += 1;
622///     }
623///
624///     for _ in 0..(step_out) {
625///         for _ in 0..(step_in) {
626///             s += 1;
627///         }
628///     }
629///
630///     s
631/// }
632/// ```
633fn compute_loop_exits(
634    ctx: &mut TransformCtx,
635    body: &src::ExprBody,
636    cfg: &CfgInfo,
637) -> HashMap<src::BlockId, Option<src::BlockId>> {
638    let mut explored = HashSet::new();
639    let mut ordered_loops = Vec::new();
640    let mut loop_exits = HashMap::new();
641
642    // Initialize the loop exits candidates
643    for loop_id in &cfg.loop_entries {
644        loop_exits.insert(*loop_id, IndexMap::new());
645    }
646
647    // Compute the candidates
648    compute_loop_exit_candidates(
649        cfg,
650        &mut explored,
651        &mut ordered_loops,
652        &mut loop_exits,
653        Vec::new(),
654        src::BlockId::ZERO,
655    );
656
657    {
658        // Debugging
659        let candidates: Vec<String> = loop_exits
660            .iter()
661            .map(|(loop_id, candidates)| format!("{loop_id} -> {candidates:?}"))
662            .collect();
663        trace!("Loop exit candidates:\n{}", candidates.join("\n"));
664    }
665
666    // Choose one candidate among the potential candidates.
667    let mut exits: HashSet<src::BlockId> = HashSet::new();
668    let mut chosen_loop_exits: HashMap<src::BlockId, Option<src::BlockId>> = HashMap::new();
669    // For every loop
670    for loop_id in ordered_loops {
671        // Check the candidates.
672        // Ignore the candidates which have already been chosen as exits for other
673        // loops (which should be outer loops).
674        // We choose the exit with:
675        // - the most occurrences
676        // - the least total distance (if there are several possibilities)
677        // - doesn't necessarily lead to an error (panic, unreachable)
678
679        // First:
680        // - filter the candidates
681        // - compute the number of occurrences
682        // - compute the sum of distances
683        // TODO: we could simply order by using a lexicographic order
684        let loop_exits = loop_exits
685            .get(&loop_id)
686            .unwrap()
687            .iter()
688            // If candidate already selected for another loop: ignore
689            .filter(|(candidate_id, _)| !exits.contains(candidate_id))
690            .map(|(candidate_id, candidate_info)| {
691                let num_occurrences = candidate_info.occurrences.len();
692                let dist_sum = candidate_info.occurrences.iter().sum();
693                (*candidate_id, num_occurrences, dist_sum)
694            })
695            .collect_vec();
696
697        trace!(
698            "Loop {}: possible exits:\n{}",
699            loop_id,
700            loop_exits
701                .iter()
702                .map(|(bid, occs, dsum)| format!(
703                    "{bid} -> {{ occurrences: {occs}, dist_sum: {dsum} }}",
704                ))
705                .collect::<Vec<String>>()
706                .join("\n")
707        );
708
709        // Second: actually select the proper candidate.
710
711        // We find the one with the highest occurrence and the smallest distance
712        // from the entry of the loop (note that we take care of listing the exit
713        // candidates in a deterministic order).
714        let mut best_exit: Option<src::BlockId> = None;
715        let mut best_occurrences = 0;
716        let mut best_dist_sum = std::usize::MAX;
717        for (candidate_id, occurrences, dist_sum) in &loop_exits {
718            if (*occurrences > best_occurrences)
719                || (*occurrences == best_occurrences && *dist_sum < best_dist_sum)
720            {
721                best_exit = Some(*candidate_id);
722                best_occurrences = *occurrences;
723                best_dist_sum = *dist_sum;
724            }
725        }
726
727        let possible_candidates: Vec<_> = loop_exits
728            .iter()
729            .filter_map(|(bid, occs, dsum)| {
730                if *occs == best_occurrences && *dsum == best_dist_sum {
731                    Some(*bid)
732                } else {
733                    None
734                }
735            })
736            .collect();
737        let num_possible_candidates = loop_exits.len();
738
739        // If there is exactly one one best candidate, it is easy.
740        // Otherwise we need to split further.
741        if num_possible_candidates > 1 {
742            trace!("Best candidates: {:?}", possible_candidates);
743            // TODO: if we use a lexicographic order we can merge this with the code
744            // above.
745            // Remove the candidates which only lead to errors (panic or unreachable).
746            let candidates: Vec<_> = possible_candidates
747                .iter()
748                .filter(|bid| !cfg.only_reach_error.contains(bid))
749                .collect();
750            // If there is exactly one candidate we select it
751            if candidates.len() == 1 {
752                let exit_id = *candidates[0];
753                exits.insert(exit_id);
754                trace!("Loop {loop_id}: selected the best exit candidate {exit_id}");
755                chosen_loop_exits.insert(loop_id, Some(exit_id));
756            } else {
757                // Otherwise we do not select any exit.
758                // We don't want to select any exit if we are in the below situation
759                // (all paths lead to errors). We added a sanity check below to
760                // catch the situations where there are several exits which don't
761                // lead to errors.
762                //
763                // Example:
764                // ========
765                // ```
766                // loop {
767                //     match ls {
768                //         List::Nil => {
769                //             panic!() // <-- best candidate
770                //         }
771                //         List::Cons(x, tl) => {
772                //             if i == 0 {
773                //                 return x;
774                //             } else {
775                //                 ls = tl;
776                //                 i -= 1;
777                //             }
778                //         }
779                //         _ => {
780                //           unreachable!(); // <-- best candidate (Rustc introduces an `unreachable` case)
781                //         }
782                //     }
783                // }
784                // ```
785                //
786                // Adding this sanity check so that we can see when there are
787                // several candidates.
788                let span = body.body[loop_id].terminator.span; // Taking *a* span from the block
789                sanity_check!(ctx, span, candidates.is_empty());
790                trace!(
791                    "Loop {loop_id}: did not select an exit candidate because they all lead to panics"
792                );
793                chosen_loop_exits.insert(loop_id, None);
794            }
795        } else {
796            // Register the exit, if there is one
797            match best_exit {
798                None => {
799                    // No exit was found
800                    trace!("Loop {loop_id}: could not find an exit candidate");
801                    chosen_loop_exits.insert(loop_id, None);
802                }
803                Some(exit_id) => {
804                    exits.insert(exit_id);
805                    trace!("Loop {loop_id}: selected the unique exit candidate {exit_id}");
806                    chosen_loop_exits.insert(loop_id, Some(exit_id));
807                }
808            }
809        }
810    }
811
812    // Return the chosen exits
813    trace!("Chosen loop exits: {:?}", chosen_loop_exits);
814    chosen_loop_exits
815}
816
817/// Information used to compute the switch exits.
818/// We compute this information for every block in the graph.
819/// Note that we make sure to use immutable sets because we rely a lot
820/// on cloning.
821#[derive(Debug, Clone)]
822struct BlocksInfo {
823    id: src::BlockId,
824    /// All the successors of the block
825    succs: BTreeSet<OrdBlockId>,
826    /// Let's say we put a quantity of water equal to 1 on the block, and the
827    /// water flows downards. Whenever there is a branching, the quantity of
828    /// water gets equally divided between the branches. When the control flows
829    /// join, we put the water back together. The set below computes the amount
830    /// of water received by each descendant of the node.
831    ///
832    /// TODO: there must be a known algorithm which computes this, right?...
833    /// This is exactly this problems:
834    /// <https://stackoverflow.com/questions/78221666/algorithm-for-total-flow-through-weighted-directed-acyclic-graph>
835    /// TODO: the way I compute this is not efficient.
836    ///
837    /// Remark: in order to rank the nodes, we also use the negation of the
838    /// rank given by the topological order. The last elements of the set
839    /// have the highest flow, that is they are the nodes to which the maximum
840    /// number of paths converge. If several nodes have the same flow, we want
841    /// to take the highest one in the hierarchy: hence the use of the inverse
842    /// of the topological rank.
843    ///
844    /// Ex.:
845    /// ```text
846    /// A  -- we start here
847    /// |
848    /// |---------------------------------------
849    /// |            |            |            |
850    /// B:(0.25,-1)  C:(0.25,-2)  D:(0.25,-3)  E:(0.25,-4)
851    /// |            |            |
852    /// |--------------------------
853    /// |
854    /// F:(0.75,-5)
855    /// |
856    /// |
857    /// G:(0.75,-6)
858    /// ```
859    /// The "best" node (with the highest (flow, rank) in the graph above is F.
860    flow: BTreeSet<FlowBlockId>,
861}
862
863/// Create an [OrdBlockId] from a block id and a rank given by a map giving
864/// a sort (topological in our use cases) over the graph.
865fn make_ord_block_id(
866    block_id: src::BlockId,
867    sort_map: &HashMap<src::BlockId, usize>,
868) -> OrdBlockId {
869    let rank = *sort_map.get(&block_id).unwrap();
870    OrdBlockId { id: block_id, rank }
871}
872
873/// Compute [BlocksInfo] for every block in the graph.
874/// This information is then used to compute the switch exits.
875fn compute_switch_exits_explore(
876    cfg: &CfgInfo,
877    tsort_map: &HashMap<src::BlockId, usize>,
878    memoized: &mut HashMap<src::BlockId, BlocksInfo>,
879    block_id: src::BlockId,
880) {
881    // Check if we already computer the info
882    if memoized.get(&block_id).is_some() {
883        return;
884    }
885
886    // Compute the block information for the children
887    let children_ids: Vec<src::BlockId> = cfg.cfg_no_be.neighbors(block_id).collect_vec();
888    children_ids
889        .iter()
890        .for_each(|bid| compute_switch_exits_explore(cfg, tsort_map, memoized, *bid));
891
892    // Retrieve the information
893    let children: Vec<&BlocksInfo> = children_ids
894        .iter()
895        .map(|bid| memoized.get(bid).unwrap())
896        .collect();
897
898    // Compute the successors
899    // Add the children themselves in their sets of successors
900    let all_succs: BTreeSet<OrdBlockId> = children.iter().fold(BTreeSet::new(), |acc, s| {
901        // Add the successors to the accumulator
902        let mut acc: BTreeSet<_> = acc.union(&s.succs).copied().collect();
903        // Add the child itself
904        acc.insert(make_ord_block_id(s.id, tsort_map));
905        acc
906    });
907
908    // Compute the "flows" (if there are children)
909    // TODO: this is computationally expensive...
910    let mut flow: HashMap<src::BlockId, (BigRational, isize)> = HashMap::new();
911
912    if children.len() > 0 {
913        // We need to divide the initial flow equally between the children
914        let factor = BigRational::new(BigInt::from(1), BigInt::from(children.len()));
915
916        // Small helper
917        let mut add_to_flow = |(id, f0): (src::BlockId, (BigRational, isize))| match flow.get(&id) {
918            None => flow.insert(id, f0),
919            Some(f1) => {
920                let f = f0.0 + f1.0.clone();
921                assert!(f0.1 == f1.1);
922                flow.insert(id, (f, f0.1))
923            }
924        };
925
926        // For each child, multiply the flows of its own children by the ratio,
927        // and add.
928        for child in children {
929            // First, add the child itself
930            let rank = isize::try_from(*tsort_map.get(&child.id).unwrap()).unwrap();
931            add_to_flow((child.id, (factor.clone(), -rank)));
932
933            // Then add its successors
934            for child1 in &child.flow {
935                add_to_flow((
936                    child1.id,
937                    (factor.clone() * child1.rank.0.clone(), child1.rank.1),
938                ));
939            }
940        }
941    }
942
943    // Put everything in an ordered set: the first block id will be the one with
944    // the highest flow, and in case of equality it will be the one with the
945    // smallest block id.
946    let flow: BTreeSet<FlowBlockId> = flow
947        .into_iter()
948        .map(|(id, rank)| BlockWithRank { rank, id })
949        .collect();
950
951    trace!("block: {block_id}, all successors: {all_succs:?}, flow: {flow:?}");
952
953    // Memoize
954    let info = BlocksInfo {
955        id: block_id,
956        succs: all_succs,
957        flow,
958    };
959    memoized.insert(block_id, info.clone());
960}
961
962/// Auxiliary helper
963///
964/// Check if it is possible to reach the exit of an outer switch from `start_bid`
965/// without going through the `exit_candidate`. We use the graph without
966/// backward edges.
967fn can_reach_outer_exit(
968    cfg: &CfgInfo,
969    outer_exits: &HashSet<src::BlockId>,
970    start_bid: src::BlockId,
971    exit_candidate: src::BlockId,
972) -> bool {
973    // The stack of blocks
974    let mut stack: Vec<src::BlockId> = vec![start_bid];
975    let mut explored: HashSet<src::BlockId> = HashSet::new();
976
977    while let Some(bid) = stack.pop() {
978        // Check if already explored
979        if explored.contains(&bid) {
980            break;
981        }
982        explored.insert(bid);
983
984        // Check if this is the exit candidate
985        if bid == exit_candidate {
986            // Stop exploring
987            break;
988        }
989
990        // Check if this is an outer exit
991        if outer_exits.contains(&bid) {
992            return true;
993        }
994
995        // Add the children to the stack
996        for child in cfg.cfg_no_be.neighbors(bid) {
997            stack.push(child);
998        }
999    }
1000
1001    false
1002}
1003
1004/// See [`compute_loop_switch_exits`] for
1005/// explanations about what "exits" are.
1006///
1007/// In order to compute the switch exits, we simply recursively compute a
1008/// topologically ordered set of "filtered successors" as follows (note
1009/// that we work in the CFG *without* back edges):
1010/// - for a block which doesn't branch (only one successor), the filtered
1011///   successors is the set of reachable nodes.
1012/// - for a block which branches, we compute the nodes reachable from all
1013///   the children, and find the "best" intersection between those.
1014///   Note that we find the "best" intersection (a pair of branches which
1015///   maximize the intersection of filtered successors) because some branches
1016///   might never join the control-flow of the other branches, if they contain
1017///   a `break`, `return`, `panic`, etc., like here:
1018///   ```text
1019///   if b { x = 3; } { return; }
1020///   y += x;
1021///   ...
1022///   ```
1023/// Note that with nested switches, the branches of the inner switches might
1024/// goto the exits of the outer switches: for this reason, we give precedence
1025/// to the outer switches.
1026fn compute_switch_exits(
1027    cfg: &CfgInfo,
1028    tsort_map: &HashMap<src::BlockId, usize>,
1029) -> HashMap<src::BlockId, Option<src::BlockId>> {
1030    // Compute the successors info map, starting at the root node
1031    let mut succs_info_map = HashMap::new();
1032    trace!(
1033        "- cfg.cfg:\n{:?}\n- cfg.cfg_no_be:\n{:?}\n- cfg.switch_blocks:\n{:?}",
1034        cfg.cfg, cfg.cfg_no_be, cfg.switch_blocks
1035    );
1036    let _ = compute_switch_exits_explore(cfg, tsort_map, &mut succs_info_map, src::BlockId::ZERO);
1037
1038    // We need to give precedence to the outer switches: we thus iterate
1039    // over the switch blocks in topological order.
1040    let mut sorted_switch_blocks: BTreeSet<OrdBlockId> = BTreeSet::new();
1041    for bid in cfg.switch_blocks.iter() {
1042        sorted_switch_blocks.insert(make_ord_block_id(*bid, tsort_map));
1043    }
1044    trace!("sorted_switch_blocks: {:?}", sorted_switch_blocks);
1045
1046    // Debugging: print all the successors
1047    {
1048        trace!("Successors info:\n{}\n", {
1049            let mut out = vec![];
1050            for (bid, info) in &succs_info_map {
1051                out.push(
1052                    format!(
1053                        "{} -> {{succs: {:?}, flow: {:?}}}",
1054                        bid, &info.succs, &info.flow
1055                    )
1056                    .to_string(),
1057                );
1058            }
1059            out.join("\n")
1060        });
1061    }
1062
1063    // For every node which is a switch, retrieve the exit.
1064    // As the set of intersection of successors is topologically sorted, the
1065    // exit should be the first node in the set (if the set is non empty).
1066    // Also, we need to explore the nodes in topological order, to give
1067    // precedence to the outer switches.
1068    let mut exits_set = HashSet::new();
1069    let mut ord_exits_set = BTreeSet::new();
1070    let mut exits = HashMap::new();
1071    for bid in sorted_switch_blocks {
1072        trace!("Finding exit candidate for: {bid:?}");
1073        let bid = bid.id;
1074        let info = succs_info_map.get(&bid).unwrap();
1075        let succs = &info.flow;
1076        // Find the best successor: this is the last one (with the highest flow,
1077        // and the highest reverse topological rank).
1078        if succs.is_empty() {
1079            trace!("{bid:?} has no successors");
1080            exits.insert(bid, None);
1081        } else {
1082            // We have an exit candidate: check that it was not already
1083            // taken by an external switch
1084            let exit = succs.last().unwrap();
1085            trace!("{bid:?} has an exit candidate: {exit:?}");
1086            if exits_set.contains(&exit.id) {
1087                trace!("Ignoring the exit candidate because already taken by an external switch");
1088                exits.insert(bid, None);
1089            } else {
1090                // It was not taken by an external switch.
1091                //
1092                // We must check that we can't reach the exit of an external
1093                // switch from one of the branches, without going through the
1094                // exit candidate.
1095                // We do this by simply checking that we can't reach any exits
1096                // (and use the fact that we explore the switch by using a
1097                // topological order to not discard valid exit candidates).
1098                //
1099                // The reason is that it can lead to code like the following:
1100                // ```
1101                // if ... { // if #1
1102                //   if ... { // if #2
1103                //     ...
1104                //     // here, we have a `goto b1`, where b1 is the exit
1105                //     // of if #2: we thus stop translating the blocks.
1106                //   }
1107                //   else {
1108                //     ...
1109                //     // here, we have a `goto b2`, where b2 is the exit
1110                //     // of if #1: we thus stop translating the blocks.
1111                //   }
1112                //   // We insert code for the block b1 here (which is the exit of
1113                //   // the exit of if #2). However, this block should only
1114                //   // be executed in the branch "then" of the if #2, not in
1115                //   // the branch "else".
1116                //   ...
1117                // }
1118                // else {
1119                //   ...
1120                // }
1121                // ```
1122
1123                // First: we do a quick check (does the set of all successors
1124                // intersect the set of exits for outer blocks?). If yes, we do
1125                // a more precise analysis: we check if we can reach the exit
1126                // *without going through* the exit candidate.
1127                if info.succs.intersection(&ord_exits_set).next().is_none()
1128                    || !can_reach_outer_exit(cfg, &exits_set, bid, exit.id)
1129                {
1130                    trace!("Keeping the exit candidate");
1131                    // No intersection: ok
1132                    exits_set.insert(exit.id);
1133                    ord_exits_set.insert(make_ord_block_id(exit.id, tsort_map));
1134                    exits.insert(bid, Some(exit.id));
1135                } else {
1136                    trace!(
1137                        "Ignoring the exit candidate because of an intersection with external switches"
1138                    );
1139                    exits.insert(bid, None);
1140                }
1141            }
1142        }
1143    }
1144
1145    exits
1146}
1147
1148/// The exits of a graph
1149#[derive(Debug, Clone)]
1150struct ExitInfo {
1151    /// The loop exits
1152    loop_exits: HashMap<src::BlockId, Option<src::BlockId>>,
1153    /// Some loop exits actually belong to outer switches. We still need
1154    /// to track them in the loop exits, in order to know when we should
1155    /// insert a break. However, we need to make sure we don't add the
1156    /// corresponding block in a sequence, after having translated the
1157    /// loop, like so:
1158    /// ```text
1159    /// loop {
1160    ///   loop_body
1161    /// };
1162    /// exit_blocks // OK if the exit "belongs" to the loop
1163    /// ```
1164    ///
1165    /// In case the exit doesn't belong to the loop:
1166    /// ```text
1167    /// if b {
1168    ///   loop {
1169    ///     loop_body
1170    ///   } // no exit blocks after the loop
1171    /// }
1172    /// else {
1173    ///   ...
1174    /// };
1175    /// exit_blocks // the exit blocks are here
1176    /// ```
1177    owned_loop_exits: HashMap<src::BlockId, Option<src::BlockId>>,
1178    /// The switch exits.
1179    /// Note that the switch exits are always owned.
1180    owned_switch_exits: HashMap<src::BlockId, Option<src::BlockId>>,
1181}
1182
1183/// Compute the exits for the loops and the switches (switch on integer and
1184/// if ... then ... else ...). We need to do this because control-flow in MIR
1185/// is destructured: we have gotos everywhere.
1186///
1187/// Let's consider the following piece of code:
1188/// ```text
1189/// if cond1 { ... } else { ... };
1190/// if cond2 { ... } else { ... };
1191/// ```
1192/// Once converted to MIR, the control-flow is destructured, which means we
1193/// have gotos everywhere. When reconstructing the control-flow, we have
1194/// to be careful about the point where we should join the two branches of
1195/// the first if.
1196/// For instance, if we don't notice they should be joined at some point (i.e,
1197/// whatever the branch we take, there is a moment when we go to the exact
1198/// same place, just before the second if), we might generate code like
1199/// this, with some duplicata:
1200/// ```text
1201/// if cond1 { ...; if cond2 { ... } else { ...} }
1202/// else { ...; if cond2 { ... } else { ...} }
1203/// ```
1204///
1205/// Such a reconstructed program is valid, but it is definitely non-optimal:
1206/// it is very different from the original program (making it less clean and
1207/// clear), more bloated, and might involve duplicating the proof effort.
1208///
1209/// For this reason, we need to find the "exit" of the first loop, which is
1210/// the point where the two branches join. Note that this can be a bit tricky,
1211/// because there may be more than two branches (if we do `switch(x) { ... }`),
1212/// and some of them might not join (if they contain a `break`, `panic`,
1213/// `return`, etc.).
1214///
1215/// Finally, some similar issues arise for loops. For instance, let's consider
1216/// the following piece of code:
1217/// ```text
1218/// while cond1 {
1219///   e1;
1220///   if cond2 {
1221///     break;
1222///   }
1223///   e2;
1224/// }
1225/// e3;
1226/// return;
1227/// ```
1228///
1229/// Note that in MIR, the loop gets desugared to an if ... then ... else ....
1230/// From the MIR, We want to generate something like this:
1231/// ```text
1232/// loop {
1233///   if cond1 {
1234///     e1;
1235///     if cond2 {
1236///       break;
1237///     }
1238///     e2;
1239///     continue;
1240///   }
1241///   else {
1242///     break;
1243///   }
1244/// };
1245/// e3;
1246/// return;
1247/// ```
1248///
1249/// But if we don't pay attention, we might end up with that, once again with
1250/// duplications:
1251/// ```text
1252/// loop {
1253///   if cond1 {
1254///     e1;
1255///     if cond2 {
1256///       e3;
1257///       return;
1258///     }
1259///     e2;
1260///     continue;
1261///   }
1262///   else {
1263///     e3;
1264///     return;
1265///   }
1266/// }
1267/// ```
1268/// We thus have to notice that if the loop condition is false, we goto the same
1269/// block as when following the goto introduced by the break inside the loop, and
1270/// this block is dubbed the "loop exit".
1271///
1272/// The following function thus computes the "exits" for loops and switches, which
1273/// are basically the points where control-flow joins.
1274fn compute_loop_switch_exits(
1275    ctx: &mut TransformCtx,
1276    body: &src::ExprBody,
1277    cfg_info: &CfgInfo,
1278) -> ExitInfo {
1279    // Use the CFG without backward edges to topologically sort the nodes.
1280    // Note that `toposort` returns `Err` if and only if it finds cycles (which
1281    // can't happen).
1282    let tsorted: Vec<src::BlockId> = toposort(&cfg_info.cfg_no_be, None).unwrap();
1283
1284    // Build the map: block id -> topological sort rank
1285    let tsort_map: HashMap<src::BlockId, usize> = tsorted
1286        .into_iter()
1287        .enumerate()
1288        .map(|(i, block_id)| (block_id, i))
1289        .collect();
1290    trace!("tsort_map:\n{:?}", tsort_map);
1291
1292    // Compute the loop exits
1293    let loop_exits = compute_loop_exits(ctx, body, cfg_info);
1294    trace!("loop_exits:\n{:?}", loop_exits);
1295
1296    // Compute the switch exits
1297    let switch_exits = compute_switch_exits(cfg_info, &tsort_map);
1298    trace!("switch_exits:\n{:?}", switch_exits);
1299
1300    // Compute the exit info
1301    let mut exit_info = ExitInfo {
1302        loop_exits: HashMap::new(),
1303        owned_loop_exits: HashMap::new(),
1304        owned_switch_exits: HashMap::new(),
1305    };
1306
1307    // We need to give precedence to the outer switches and loops: we thus iterate
1308    // over the blocks in topological order.
1309    let mut sorted_blocks: BTreeSet<OrdBlockId> = BTreeSet::new();
1310    for bid in cfg_info
1311        .loop_entries
1312        .iter()
1313        .chain(cfg_info.switch_blocks.iter())
1314    {
1315        sorted_blocks.insert(make_ord_block_id(*bid, &tsort_map));
1316    }
1317
1318    // Keep track of the exits which were already attributed
1319    let mut all_exits = HashSet::new();
1320
1321    // Put all this together
1322    for bid in sorted_blocks {
1323        let bid = bid.id;
1324        // Check if loop or switch block
1325        if cfg_info.loop_entries.contains(&bid) {
1326            // This is a loop.
1327            //
1328            // For loops, we always register the exit (if there is one).
1329            // However, the exit may be owned by an outer switch (note
1330            // that we already took care of spreading the exits between
1331            // the inner/outer loops)
1332            let exit_id = loop_exits.get(&bid).unwrap();
1333            exit_info.loop_exits.insert(bid, *exit_id);
1334
1335            // Check if we "own" the exit
1336            match exit_id {
1337                None => {
1338                    // No exit
1339                    exit_info.owned_loop_exits.insert(bid, None);
1340                }
1341                Some(exit_id) => {
1342                    if all_exits.contains(exit_id) {
1343                        // We don't own it
1344                        exit_info.owned_loop_exits.insert(bid, None);
1345                    } else {
1346                        // We own it
1347                        exit_info.owned_loop_exits.insert(bid, Some(*exit_id));
1348                        all_exits.insert(*exit_id);
1349                    }
1350                }
1351            }
1352        } else {
1353            // For switches: check that the exit was not already given to a
1354            // loop
1355            let exit_id = switch_exits.get(&bid).unwrap();
1356
1357            match exit_id {
1358                None => {
1359                    // No exit
1360                    exit_info.owned_switch_exits.insert(bid, None);
1361                }
1362                Some(exit_id) => {
1363                    if all_exits.contains(exit_id) {
1364                        // We don't own it
1365                        exit_info.owned_switch_exits.insert(bid, None);
1366                    } else {
1367                        // We own it
1368                        exit_info.owned_switch_exits.insert(bid, Some(*exit_id));
1369                        all_exits.insert(*exit_id);
1370                    }
1371                }
1372            }
1373        }
1374    }
1375
1376    exit_info
1377}
1378
1379fn get_goto_kind(
1380    exits_info: &ExitInfo,
1381    parent_loops: &Vec<src::BlockId>,
1382    switch_exit_blocks: &HashSet<src::BlockId>,
1383    next_block_id: src::BlockId,
1384) -> GotoKind {
1385    // First explore the parent loops in revert order
1386    for (i, loop_id) in parent_loops.iter().rev().enumerate() {
1387        // If we goto a loop entry node: this is a 'continue'
1388        if next_block_id == *loop_id {
1389            return GotoKind::Continue(i);
1390        } else {
1391            // If we goto a loop exit node: this is a 'break'
1392            if let Some(exit_id) = exits_info.loop_exits.get(loop_id).unwrap() {
1393                if next_block_id == *exit_id {
1394                    return GotoKind::Break(i);
1395                }
1396            }
1397        }
1398    }
1399
1400    // Check if the goto exits the current block
1401    if switch_exit_blocks.contains(&next_block_id) {
1402        return GotoKind::ExitBlock;
1403    }
1404
1405    // Default
1406    GotoKind::Goto
1407}
1408
1409enum GotoKind {
1410    Break(usize),
1411    Continue(usize),
1412    ExitBlock,
1413    Goto,
1414}
1415
1416/// `parent_span`: we need some span data for the new statement.
1417/// We use the one for the parent terminator.
1418fn translate_child_block(
1419    info: &mut BlockInfo<'_>,
1420    parent_loops: &Vec<src::BlockId>,
1421    switch_exit_blocks: &HashSet<src::BlockId>,
1422    parent_span: Span,
1423    child_id: src::BlockId,
1424) -> Option<tgt::Block> {
1425    // Check if this is a backward call
1426    match get_goto_kind(info.exits_info, parent_loops, switch_exit_blocks, child_id) {
1427        GotoKind::Break(index) => {
1428            let st = tgt::RawStatement::Break(index);
1429            Some(tgt::Statement::new(parent_span, st).into_block())
1430        }
1431        GotoKind::Continue(index) => {
1432            let st = tgt::RawStatement::Continue(index);
1433            Some(tgt::Statement::new(parent_span, st).into_block())
1434        }
1435        // If we are going to an exit block we simply ignore the goto
1436        GotoKind::ExitBlock => None,
1437        GotoKind::Goto => {
1438            // "Standard" goto: just recursively translate
1439            ensure_sufficient_stack(|| {
1440                Some(translate_block(
1441                    info,
1442                    parent_loops,
1443                    switch_exit_blocks,
1444                    child_id,
1445                ))
1446            })
1447        }
1448    }
1449}
1450
1451fn opt_block_unwrap_or_nop(span: Span, opt_block: Option<tgt::Block>) -> tgt::Block {
1452    opt_block.unwrap_or_else(|| tgt::Statement::new(span, tgt::RawStatement::Nop).into_block())
1453}
1454
1455fn translate_statement(st: &src::Statement) -> Option<tgt::Statement> {
1456    let src_span = st.span;
1457    let st = match st.content.clone() {
1458        src::RawStatement::Assign(place, rvalue) => tgt::RawStatement::Assign(place, rvalue),
1459        src::RawStatement::SetDiscriminant(place, variant_id) => {
1460            tgt::RawStatement::SetDiscriminant(place, variant_id)
1461        }
1462        src::RawStatement::CopyNonOverlapping(copy) => tgt::RawStatement::CopyNonOverlapping(copy),
1463        src::RawStatement::StorageLive(var_id) => tgt::RawStatement::StorageLive(var_id),
1464        src::RawStatement::StorageDead(var_id) => tgt::RawStatement::StorageDead(var_id),
1465        src::RawStatement::Deinit(place) => tgt::RawStatement::Deinit(place),
1466        src::RawStatement::Drop(place, tref) => tgt::RawStatement::Drop(place, tref),
1467        src::RawStatement::Assert(assert) => tgt::RawStatement::Assert(assert),
1468        src::RawStatement::Nop => tgt::RawStatement::Nop,
1469        src::RawStatement::Error(s) => tgt::RawStatement::Error(s),
1470    };
1471    Some(tgt::Statement::new(src_span, st))
1472}
1473
1474fn translate_terminator(
1475    info: &mut BlockInfo<'_>,
1476    parent_loops: &Vec<src::BlockId>,
1477    switch_exit_blocks: &HashSet<src::BlockId>,
1478    terminator: &src::Terminator,
1479) -> tgt::Block {
1480    let src_span = terminator.span;
1481
1482    match &terminator.content {
1483        src::RawTerminator::Abort(kind) => {
1484            tgt::Statement::new(src_span, tgt::RawStatement::Abort(kind.clone())).into_block()
1485        }
1486        src::RawTerminator::Return => {
1487            tgt::Statement::new(src_span, tgt::RawStatement::Return).into_block()
1488        }
1489        src::RawTerminator::UnwindResume => {
1490            tgt::Statement::new(src_span, tgt::RawStatement::Abort(AbortKind::Panic(None)))
1491                .into_block()
1492        }
1493        src::RawTerminator::Call {
1494            call,
1495            target,
1496            on_unwind: _,
1497        } => {
1498            // TODO: Have unwinds in the LLBC
1499            let target_block = translate_child_block(
1500                info,
1501                parent_loops,
1502                switch_exit_blocks,
1503                terminator.span,
1504                *target,
1505            );
1506            let mut block = opt_block_unwrap_or_nop(terminator.span, target_block);
1507            let st = tgt::Statement::new(src_span, tgt::RawStatement::Call(call.clone()));
1508            block.statements.insert(0, st);
1509            block
1510        }
1511        src::RawTerminator::Goto { target } => {
1512            let block = translate_child_block(
1513                info,
1514                parent_loops,
1515                switch_exit_blocks,
1516                terminator.span,
1517                *target,
1518            );
1519            let block = opt_block_unwrap_or_nop(terminator.span, block);
1520            block
1521        }
1522        src::RawTerminator::Switch { discr, targets } => {
1523            // Translate the target expressions
1524            let switch = match &targets {
1525                src::SwitchTargets::If(then_tgt, else_tgt) => {
1526                    // Translate the children expressions
1527                    let then_block = translate_child_block(
1528                        info,
1529                        parent_loops,
1530                        switch_exit_blocks,
1531                        terminator.span,
1532                        *then_tgt,
1533                    );
1534                    // We use the terminator span information in case then
1535                    // then statement is `None`
1536                    let then_block = opt_block_unwrap_or_nop(terminator.span, then_block);
1537                    let else_block = translate_child_block(
1538                        info,
1539                        parent_loops,
1540                        switch_exit_blocks,
1541                        terminator.span,
1542                        *else_tgt,
1543                    );
1544                    let else_block = opt_block_unwrap_or_nop(terminator.span, else_block);
1545
1546                    // Translate
1547                    tgt::Switch::If(discr.clone(), then_block, else_block)
1548                }
1549                src::SwitchTargets::SwitchInt(int_ty, targets, otherwise) => {
1550                    // Note that some branches can be grouped together, like
1551                    // here:
1552                    // ```
1553                    // match e {
1554                    //   E::V1 | E::V2 => ..., // Grouped
1555                    //   E::V3 => ...
1556                    // }
1557                    // ```
1558                    // We detect this by checking if a block has already been
1559                    // translated as one of the branches of the switch.
1560                    //
1561                    // Rk.: note there may be intermediate gotos depending
1562                    // on the MIR we use. Typically, we manage to detect the
1563                    // grouped branches with Optimized MIR, but not with Promoted
1564                    // MIR. See the comment in "tests/src/matches.rs".
1565
1566                    // We link block ids to:
1567                    // - vector of matched integer values
1568                    // - translated blocks
1569                    let mut branches: IndexMap<src::BlockId, (Vec<ScalarValue>, tgt::Block)> =
1570                        IndexMap::new();
1571
1572                    // Translate the children expressions
1573                    for (v, bid) in targets.iter() {
1574                        // Check if the block has already been translated:
1575                        // if yes, it means we need to group branches
1576                        if branches.contains_key(bid) {
1577                            // Already translated: add the matched value to
1578                            // the list of values
1579                            let branch = branches.get_mut(bid).unwrap();
1580                            branch.0.push(*v);
1581                        } else {
1582                            // Not translated: translate it
1583                            let block = translate_child_block(
1584                                info,
1585                                parent_loops,
1586                                switch_exit_blocks,
1587                                terminator.span,
1588                                *bid,
1589                            );
1590                            // We use the terminator span information in case then
1591                            // then statement is `None`
1592                            let block = opt_block_unwrap_or_nop(terminator.span, block);
1593                            branches.insert(*bid, (vec![*v], block));
1594                        }
1595                    }
1596                    let targets_blocks: Vec<(Vec<ScalarValue>, tgt::Block)> =
1597                        branches.into_iter().map(|(_, x)| x).collect();
1598
1599                    let otherwise_block = translate_child_block(
1600                        info,
1601                        parent_loops,
1602                        switch_exit_blocks,
1603                        terminator.span,
1604                        *otherwise,
1605                    );
1606                    // We use the terminator span information in case then
1607                    // then statement is `None`
1608                    let otherwise_block = opt_block_unwrap_or_nop(terminator.span, otherwise_block);
1609
1610                    // Translate
1611                    tgt::Switch::SwitchInt(discr.clone(), *int_ty, targets_blocks, otherwise_block)
1612                }
1613            };
1614
1615            // Return
1616            let span = tgt::combine_switch_targets_span(&switch);
1617            let span = combine_span(&src_span, &span);
1618            let st = tgt::RawStatement::Switch(switch);
1619            tgt::Statement::new(span, st).into_block()
1620        }
1621    }
1622}
1623
1624/// Remark: some values are boxed (here, the returned statement) so that they
1625/// are allocated on the heap. This reduces stack usage (we had problems with
1626/// stack overflows in the past). A more efficient solution would be to use loops
1627/// to make this code constant space, but that would require a serious rewriting.
1628fn translate_block(
1629    info: &mut BlockInfo<'_>,
1630    parent_loops: &Vec<src::BlockId>,
1631    switch_exit_blocks: &HashSet<src::BlockId>,
1632    block_id: src::BlockId,
1633) -> tgt::Block {
1634    // If the user activated this check: check that we didn't already translate
1635    // this block, and insert the block id in the set of already translated blocks.
1636    trace!(
1637        "Parent loops: {:?}, Parent switch exits: {:?}, Block id: {}",
1638        parent_loops, switch_exit_blocks, block_id
1639    );
1640    info.explored.insert(block_id);
1641
1642    let block = info.body.body.get(block_id).unwrap();
1643
1644    // Check if we enter a loop: if so, update parent_loops and the current_exit_block
1645    let is_loop = info.cfg.loop_entries.contains(&block_id);
1646    let mut nparent_loops: Vec<src::BlockId>;
1647    let nparent_loops = if info.cfg.loop_entries.contains(&block_id) {
1648        nparent_loops = parent_loops.clone();
1649        nparent_loops.push(block_id);
1650        &nparent_loops
1651    } else {
1652        parent_loops
1653    };
1654
1655    // If we enter a switch or a loop, we need to check if we own the exit
1656    // block, in which case we need to append it to the loop/switch body
1657    // in a sequence
1658    let is_switch = block.terminator.content.is_switch();
1659    let next_block = if is_loop {
1660        *info.exits_info.owned_loop_exits.get(&block_id).unwrap()
1661    } else if is_switch {
1662        *info.exits_info.owned_switch_exits.get(&block_id).unwrap()
1663    } else {
1664        None
1665    };
1666
1667    // If we enter a switch, add the exit block to the set
1668    // of outer exit blocks
1669    let nswitch_exit_blocks = if is_switch {
1670        let mut nexit_blocks = switch_exit_blocks.clone();
1671        match next_block {
1672            None => nexit_blocks,
1673            Some(bid) => {
1674                nexit_blocks.insert(bid);
1675                nexit_blocks
1676            }
1677        }
1678    } else {
1679        switch_exit_blocks.clone()
1680    };
1681
1682    // Translate the terminator and the subsequent blocks.
1683    // Note that this terminator is an option: we might ignore it
1684    // (if it is an exit).
1685
1686    let terminator =
1687        translate_terminator(info, nparent_loops, &nswitch_exit_blocks, &block.terminator);
1688
1689    // Translate the statements inside the block
1690    let statements = block
1691        .statements
1692        .iter()
1693        .filter_map(|st| translate_statement(st))
1694        .collect_vec();
1695
1696    // Prepend the statements to the terminator.
1697    let mut block = if let Some(st) = tgt::Block::from_seq(statements) {
1698        st.merge(terminator)
1699    } else {
1700        terminator
1701    };
1702
1703    if is_loop {
1704        // Put the loop body inside a `Loop`.
1705        block = tgt::Statement::new(block.span, tgt::RawStatement::Loop(block)).into_block()
1706    } else if !is_switch {
1707        assert!(next_block.is_none());
1708    }
1709
1710    // Concatenate the exit expression, if needs be
1711    if let Some(exit_block_id) = next_block {
1712        let next_block = ensure_sufficient_stack(|| {
1713            translate_block(info, parent_loops, switch_exit_blocks, exit_block_id)
1714        });
1715        block = block.merge(next_block);
1716    }
1717
1718    block
1719}
1720
1721fn translate_body_aux(ctx: &mut TransformCtx, src_body: &src::ExprBody) -> tgt::ExprBody {
1722    // Explore the function body to create the control-flow graph without backward
1723    // edges, and identify the loop entries (which are destinations of backward edges).
1724    let cfg_info = build_cfg_info(src_body);
1725    trace!("cfg_info: {:?}", cfg_info);
1726
1727    // Find the exit block for all the loops and switches, if such an exit point
1728    // exists.
1729    let exits_info = compute_loop_switch_exits(ctx, src_body, &cfg_info);
1730
1731    // Debugging
1732    trace!("exits_info:\n{:?}", exits_info);
1733
1734    // Translate the body by reconstructing the loops and the
1735    // conditional branchings.
1736    // Note that we shouldn't get `None`.
1737    let mut explored = HashSet::new();
1738    let mut info = BlockInfo {
1739        cfg: &cfg_info,
1740        body: src_body,
1741        exits_info: &exits_info,
1742        explored: &mut explored,
1743    };
1744    let tgt_body = translate_block(&mut info, &Vec::new(), &HashSet::new(), src::BlockId::ZERO);
1745
1746    tgt::ExprBody {
1747        span: src_body.span,
1748        locals: src_body.locals.clone(),
1749        comments: src_body.comments.clone(),
1750        body: tgt_body,
1751    }
1752}
1753
1754fn translate_body(ctx: &mut TransformCtx, body: &mut gast::Body) {
1755    use gast::Body::{Structured, Unstructured};
1756    let Unstructured(src_body) = body else {
1757        panic!("Called `ullbc_to_llbc` on an already restructured body")
1758    };
1759    trace!("About to translate to ullbc: {:?}", src_body.span);
1760    let tgt_body = translate_body_aux(ctx, src_body);
1761    *body = Structured(tgt_body);
1762}
1763
1764pub struct Transform;
1765impl TransformPass for Transform {
1766    fn transform_ctx(&self, ctx: &mut TransformCtx) {
1767        // Translate the bodies one at a time.
1768        ctx.for_each_body(|ctx, body| {
1769            translate_body(ctx, body);
1770        });
1771
1772        // Print the functions
1773        let fmt_ctx = &ctx.into_fmt();
1774        for fun in &ctx.translated.fun_decls {
1775            trace!(
1776                "# Signature:\n{}\n\n# Function definition:\n{}\n",
1777                fun.signature.with_ctx(fmt_ctx),
1778                fun.with_ctx(fmt_ctx),
1779            );
1780        }
1781        // Print the global variables
1782        for global in &ctx.translated.global_decls {
1783            trace!(
1784                "# Type:\n{}\n\n# Global definition:\n{}\n",
1785                global.ty.with_ctx(fmt_ctx),
1786                global.with_ctx(fmt_ctx),
1787            );
1788        }
1789
1790        if ctx.options.print_built_llbc {
1791            info!("# LLBC resulting from control-flow reconstruction:\n\n{ctx}\n",);
1792        }
1793    }
1794}
charon_lib/transform/ullbc_to_llbc.rs

charon_lib/transform/
ullbc_to_llbc.rs