charon_lib/transform/control_flow/
ullbc_to_llbc.rs

1//! ULLBC to LLBC
2//!
3//! We reconstruct the control-flow in the Unstructured LLBC.
4//!
5//! The reconstruction algorithm is not written to be efficient (its complexity
6//! is probably very bad), but it was not written to be: this is still an early
7//! stage and we want the algorithm to generate the best reconstruction as
8//! possible. We still need to test the algorithm on more interesting examples,
9//! and will consider making it more efficient once it is a bit mature and well
10//! tested.
11//! Also note that we more importantly focus on making the algorithm sound: the
12//! reconstructed program must always be equivalent to the original MIR program,
13//! and the fact that the reconstruction preserves this property must be obvious.
14//!
15//! Finally, we conjecture the execution time shouldn't be too much a problem:
16//! we don't expect the translation mechanism to be applied on super huge functions
17//! (which will be difficult to formally analyze), and the MIR graphs are actually
18//! not that big because statements are grouped into code blocks (a block is made
19//! of a list of statements, followed by a terminator - branchings and jumps can
20//! only be performed by terminators -, meaning that MIR graphs don't have that
21//! many nodes and edges).
22
23use indexmap::IndexMap;
24use itertools::Itertools;
25use petgraph::algo::dijkstra;
26use petgraph::algo::dominators::{Dominators, simple_fast};
27use petgraph::graphmap::DiGraphMap;
28use petgraph::visit::{Dfs, DfsPostOrder, GraphBase, IntoNeighbors, Visitable, Walker};
29use std::collections::{BTreeSet, HashMap, HashSet};
30use std::mem;
31
32use crate::common::ensure_sufficient_stack;
33use crate::errors::sanity_check;
34use crate::formatter::IntoFormatter;
35use crate::llbc_ast as tgt;
36use crate::meta::{Span, combine_span};
37use crate::pretty::FmtWithCtx;
38use crate::transform::TransformCtx;
39use crate::transform::ctx::TransformPass;
40use crate::ullbc_ast::{self as src, BlockId};
41use crate::{ast::*, register_error};
42
43/// Arbitrary-precision numbers
44type BigUint = fraction::DynaInt<u64, fraction::BigUint>;
45type BigRational = fraction::Ratio<BigUint>;
46
47/// Control-Flow Graph
48type Cfg = DiGraphMap<src::BlockId, ()>;
49
50/// Information precomputed about a function's CFG.
51#[derive(Debug)]
52struct CfgInfo {
53    /// The CFG
54    pub cfg: Cfg,
55    /// The CFG where all the backward edges have been removed. Aka "forward CFG".
56    pub fwd_cfg: Cfg,
57    /// We consider the destination of the backward edges to be loop entries and
58    /// store them here.
59    pub loop_entries: HashSet<src::BlockId>,
60    /// The blocks whose terminators are a switch are stored here.
61    pub switch_blocks: HashSet<src::BlockId>,
62    /// Tree of which nodes dominates which other nodes.
63    #[expect(unused)]
64    pub dominator_tree: Dominators<BlockId>,
65    /// Computed data about each block.
66    pub block_data: Vector<BlockId, BlockData>,
67}
68
69#[derive(Debug)]
70struct BlockData {
71    pub id: BlockId,
72    /// Order in a reverse postorder numbering. `None` if the block is unreachable.
73    pub reverse_postorder: Option<u32>,
74    /// Node from where we can only reach error nodes (panic, etc.)
75    pub only_reach_error: bool,
76    /// List of reachable nodes, with the length of shortest path to them. Includes the current
77    /// node.
78    pub shortest_paths: hashbrown::HashMap<BlockId, usize>,
79    /// Let's say we put a quantity of water equal to 1 on the block, and the water flows downards.
80    /// Whenever there is a branching, the quantity of water gets equally divided between the
81    /// branches. When the control flows join, we put the water back together. The set below
82    /// computes the amount of water received by each descendant of the node.
83    ///
84    /// TODO: there must be a known algorithm which computes this, right?...
85    /// This is exactly this problems:
86    /// <https://stackoverflow.com/questions/78221666/algorithm-for-total-flow-through-weighted-directed-acyclic-graph>
87    /// TODO: the way I compute this is not efficient.
88    pub flow: Vector<src::BlockId, BigRational>,
89}
90
91#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
92struct BlockWithRank<T> {
93    /// A "rank" quantity that we use to order the nodes.
94    /// By placing the `rank` field before the `id`, we make sure that
95    /// we use the rank first in the lexicographic order.
96    rank: T,
97    id: src::BlockId,
98}
99type OrdBlockId = BlockWithRank<u32>;
100
101/// Error indicating that the control-flow graph is not reducible. The contained block id is a
102/// block involved in an irreducible subgraph.
103struct Irreducible(BlockId);
104
105impl CfgInfo {
106    /// Build the CFGs (the "regular" CFG and the CFG without backward edges) and
107    /// compute some information like the loop entries and the switch blocks.
108    fn build(body: &src::BodyContents) -> Result<Self, Irreducible> {
109        let start_block = BlockId::ZERO;
110
111        // Build the node graph (we ignore unwind paths for now).
112        let mut cfg = Cfg::new();
113        for (block_id, block) in body.iter_indexed() {
114            cfg.add_node(block_id);
115            for tgt in block.targets_ignoring_unwind() {
116                cfg.add_edge(block_id, tgt, ());
117            }
118        }
119
120        let empty_flow = body.map_ref(|_| BigRational::new(0u64.into(), 1u64.into()));
121        let mut block_data: Vector<BlockId, BlockData> = body.map_ref_indexed(|id, _| BlockData {
122            id,
123            // Default values will stay for unreachable nodes, which are irrelevant.
124            reverse_postorder: None,
125            only_reach_error: false,
126            shortest_paths: Default::default(),
127            flow: empty_flow.clone(),
128        });
129
130        // Compute the dominator tree.
131        let dominator_tree = simple_fast(&cfg, start_block);
132
133        // Compute reverse postorder numbering.
134        for (i, block_id) in DfsPostOrder::new(&cfg, start_block).iter(&cfg).enumerate() {
135            let rev_post_id = body.slot_count() - i;
136            assert!(rev_post_id <= u32::MAX as usize);
137            block_data[block_id].reverse_postorder = Some(rev_post_id as u32);
138        }
139
140        // Compute the forward graph (without backward edges).
141        let mut fwd_cfg = Cfg::new();
142        let mut loop_entries = HashSet::new();
143        let mut switch_blocks = HashSet::new();
144        for src in cfg.nodes() {
145            if block_data[src].reverse_postorder.is_none() {
146                // Unreachable block
147                continue;
148            }
149            fwd_cfg.add_node(src);
150            if body[src].terminator.kind.is_switch() {
151                switch_blocks.insert(src);
152            }
153            for tgt in cfg.neighbors(src) {
154                // Check if the edge is a backward edge.
155                if block_data[src].reverse_postorder >= block_data[tgt].reverse_postorder {
156                    // This is a backward edge
157                    loop_entries.insert(tgt);
158                    // A cfg is reducible iff the target of every back edge dominates the
159                    // edge's source.
160                    if !dominator_tree.dominators(src).unwrap().contains(&tgt) {
161                        return Err(Irreducible(src));
162                    }
163                } else {
164                    fwd_cfg.add_edge(src, tgt, ());
165                }
166            }
167        }
168
169        for block_id in DfsPostOrder::new(&fwd_cfg, start_block).iter(&fwd_cfg) {
170            let block = &body[block_id];
171            let targets = cfg.neighbors(block_id).collect_vec();
172            let fwd_targets = fwd_cfg.neighbors(block_id).collect_vec();
173
174            // Compute the nodes that can only reach error nodes.
175            // The node can only reach error nodes if:
176            // - it is an error node;
177            // - or it has neighbors and they all lead to errors.
178            // Note that if there is a backward edge, `only_reach_error` cannot contain this
179            // node yet. In other words, this does not consider infinite loops as reaching an
180            // error node.
181            if block.terminator.is_error()
182                || (!targets.is_empty()
183                    && targets.iter().all(|&tgt| block_data[tgt].only_reach_error))
184            {
185                block_data[block_id].only_reach_error = true;
186            }
187
188            // Compute the flows between each pair of nodes.
189            let mut flow: Vector<src::BlockId, BigRational> =
190                mem::take(&mut block_data[block_id].flow);
191            if !fwd_targets.is_empty() {
192                // We need to divide the initial flow equally between the children
193                let factor = BigRational::new(1u64.into(), fwd_targets.len().into());
194
195                // For each child, multiply the flows of its own children by the ratio,
196                // and add.
197                for &child_id in &fwd_targets {
198                    // First, add the child itself
199                    flow[child_id] += factor.clone();
200
201                    // Then add its successors
202                    let child = &block_data[child_id];
203                    for grandchild in child.reachable_excluding_self() {
204                        // Flow from `child` to `grandchild`
205                        let child_flow = child.flow[grandchild].clone();
206                        flow[grandchild] += factor.clone() * child_flow;
207                    }
208                }
209            }
210            block_data[block_id].flow = flow;
211
212            // Compute shortest paths to all reachable nodes in the forward graph.
213            block_data[block_id].shortest_paths = dijkstra(&fwd_cfg, block_id, None, |_| 1usize);
214        }
215
216        Ok(CfgInfo {
217            cfg,
218            fwd_cfg,
219            loop_entries,
220            switch_blocks,
221            dominator_tree,
222            block_data,
223        })
224    }
225
226    fn block_data(&self, block_id: BlockId) -> &BlockData {
227        &self.block_data[block_id]
228    }
229    // fn can_reach(&self, src: BlockId, tgt: BlockId) -> bool {
230    //     self.block_data[src].shortest_paths.contains_key(&tgt)
231    // }
232    fn topo_rank(&self, block_id: BlockId) -> u32 {
233        self.block_data[block_id].reverse_postorder.unwrap()
234    }
235    /// Create an [OrdBlockId] from a block id and a rank given by a map giving
236    /// a sort (topological in our use cases) over the graph.
237    fn make_ord_block_id(&self, block_id: BlockId) -> OrdBlockId {
238        OrdBlockId {
239            id: block_id,
240            rank: self.topo_rank(block_id),
241        }
242    }
243    fn is_backward_edge(&self, src: BlockId, tgt: BlockId) -> bool {
244        self.block_data[src].reverse_postorder >= self.block_data[tgt].reverse_postorder
245            && self.cfg.contains_edge(src, tgt)
246    }
247}
248
249impl BlockData {
250    fn shortest_paths_including_self(&self) -> impl Iterator<Item = (BlockId, usize)> {
251        self.shortest_paths.iter().map(|(bid, d)| (*bid, *d))
252    }
253    fn shortest_paths_excluding_self(&self) -> impl Iterator<Item = (BlockId, usize)> {
254        self.shortest_paths_including_self()
255            .filter(move |&(bid, _)| bid != self.id)
256    }
257    #[expect(unused)]
258    fn reachable_including_self(&self) -> impl Iterator<Item = BlockId> {
259        self.shortest_paths_including_self().map(|(bid, _)| bid)
260    }
261    fn reachable_excluding_self(&self) -> impl Iterator<Item = BlockId> {
262        self.shortest_paths_excluding_self().map(|(bid, _)| bid)
263    }
264}
265
266#[derive(Debug, Clone)]
267struct LoopExitCandidateInfo {
268    /// The occurrences going to this exit.
269    /// For every occurrence, we register the distance between the loop entry
270    /// and the node from which the edge going to the exit starts.
271    /// If later we have to choose between candidates with the same number
272    /// of occurrences, we choose the one with the smallest distances.
273    ///
274    /// Note that it *may* happen that we have several exit candidates referenced
275    /// more than once for one loop. This comes from the fact that whenever we
276    /// reach a node from which the loop entry is not reachable (without using a
277    /// backward edge leading to an outer loop entry), we register this node
278    /// as well as all its children as exit candidates.
279    /// Consider the following example:
280    /// ```text
281    /// while i < max {
282    ///     if cond {
283    ///         break;
284    ///     }
285    ///     s += i;
286    ///     i += 1
287    /// }
288    /// // All the below nodes are exit candidates (each of them is referenced twice)
289    /// s += 1;
290    /// return s;
291    /// ```
292    pub occurrences: Vec<usize>,
293}
294
295struct FilteredLoopParents {
296    remaining_parents: Vec<(src::BlockId, usize)>,
297    removed_parents: Vec<(src::BlockId, usize)>,
298}
299
300#[derive(Debug, Default, Clone)]
301struct ExitInfo {
302    is_loop_entry: bool,
303    is_switch_entry: bool,
304    /// The loop exit
305    loop_exit: Option<src::BlockId>,
306    /// Some loop exits actually belong to outer switches. We still need
307    /// to track them in the loop exits, in order to know when we should
308    /// insert a break. However, we need to make sure we don't add the
309    /// corresponding block in a sequence, after having translated the
310    /// loop, like so:
311    /// ```text
312    /// loop {
313    ///   loop_body
314    /// };
315    /// exit_blocks // OK if the exit "belongs" to the loop
316    /// ```
317    ///
318    /// In case the exit doesn't belong to the loop:
319    /// ```text
320    /// if b {
321    ///   loop {
322    ///     loop_body
323    ///   } // no exit blocks after the loop
324    /// }
325    /// else {
326    ///   ...
327    /// };
328    /// exit_blocks // the exit blocks are here
329    /// ```
330    owned_loop_exit: Option<src::BlockId>,
331    /// The switch exit.
332    /// Note that the switch exits are always owned.
333    owned_switch_exit: Option<src::BlockId>,
334}
335
336/// The exits of a graph
337#[derive(Debug, Clone)]
338struct ExitsInfo {
339    exits: Vector<BlockId, ExitInfo>,
340}
341
342impl ExitsInfo {
343    /// Check if a loop entry is reachable from a node, in a graph where we remove
344    /// the backward edges going directly to the loop entry.
345    ///
346    /// If the loop entry is not reachable, it means that:
347    /// - the loop entry is not reachable at all
348    /// - or it is only reachable through an outer loop
349    ///
350    /// The starting node should be a (transitive) child of the loop entry.
351    /// We use this to find candidates for loop exits.
352    fn loop_entry_is_reachable_from_inner(
353        cfg: &CfgInfo,
354        loop_entry: src::BlockId,
355        block_id: src::BlockId,
356    ) -> bool {
357        // It is reachable in the complete graph. Check if it is reachable by not
358        // going through backward edges which go to outer loops. In practice, we
359        // just need to forbid the use of any backward edges at the exception of
360        // those which go directly to the current loop's entry. This means that we
361        // ignore backward edges to outer loops of course, but also backward edges
362        // to inner loops because we shouldn't need to follow those (there should be
363        // more direct paths).
364        Dfs::new(&cfg.fwd_cfg, block_id)
365            .iter(&cfg.fwd_cfg)
366            .any(|bid| cfg.is_backward_edge(bid, loop_entry))
367    }
368
369    fn filter_loop_parents(
370        cfg: &CfgInfo,
371        parent_loops: &Vec<(src::BlockId, usize)>,
372        block_id: src::BlockId,
373    ) -> Option<FilteredLoopParents> {
374        let mut eliminate: usize = 0;
375        for (i, (loop_id, _)) in parent_loops.iter().enumerate().rev() {
376            if Self::loop_entry_is_reachable_from_inner(cfg, *loop_id, block_id) {
377                eliminate = i + 1;
378                break;
379            }
380        }
381
382        // Split the vector of parents
383        let (remaining_parents, removed_parents) = parent_loops.split_at(eliminate);
384        if !removed_parents.is_empty() {
385            let (mut remaining_parents, removed_parents) =
386                (remaining_parents.to_vec(), removed_parents.to_vec());
387
388            // Update the distance to the last loop - we just increment the distance
389            // by 1, because from the point of view of the parent loop, we just exited
390            // a block and go to the next sequence of instructions.
391            if !remaining_parents.is_empty() {
392                remaining_parents.last_mut().unwrap().1 += 1;
393            }
394
395            Some(FilteredLoopParents {
396                remaining_parents,
397                removed_parents,
398            })
399        } else {
400            None
401        }
402    }
403
404    /// Auxiliary helper
405    ///
406    /// Check if it is possible to reach the exit of an outer switch from `start_bid` without going
407    /// through the `exit_candidate`. We use the forward graph.
408    fn can_reach_outer_exit(
409        cfg: &CfgInfo,
410        outer_exits: &HashSet<src::BlockId>,
411        start_bid: src::BlockId,
412        exit_candidate: src::BlockId,
413    ) -> bool {
414        /// Graph that is identical to `Cfg` except that a chosen node is considered to have no neighbors.
415        struct GraphWithoutEdgesFrom<'a> {
416            graph: &'a Cfg,
417            special_node: BlockId,
418        }
419        impl GraphBase for GraphWithoutEdgesFrom<'_> {
420            type EdgeId = <Cfg as GraphBase>::EdgeId;
421            type NodeId = <Cfg as GraphBase>::NodeId;
422        }
423        impl IntoNeighbors for &GraphWithoutEdgesFrom<'_> {
424            type Neighbors = impl Iterator<Item = Self::NodeId>;
425            fn neighbors(self, a: Self::NodeId) -> Self::Neighbors {
426                if a == self.special_node {
427                    None
428                } else {
429                    Some(self.graph.neighbors(a))
430                }
431                .into_iter()
432                .flatten()
433            }
434        }
435        impl Visitable for GraphWithoutEdgesFrom<'_> {
436            type Map = <Cfg as Visitable>::Map;
437            fn visit_map(self: &Self) -> Self::Map {
438                self.graph.visit_map()
439            }
440            fn reset_map(self: &Self, map: &mut Self::Map) {
441                self.graph.reset_map(map);
442            }
443        }
444
445        // Do a DFS over the forward graph where we pretend that the exit candidate has no outgoing
446        // edges. If we reach an outer exit candidate in that graph then the exit candidate does not
447        // dominate the outer exit candidates in the forward graph starting from `start_bid`.
448        let graph = GraphWithoutEdgesFrom {
449            graph: &cfg.fwd_cfg,
450            special_node: exit_candidate,
451        };
452        Dfs::new(&graph, start_bid)
453            .iter(&graph)
454            .any(|bid| outer_exits.contains(&bid))
455    }
456    /// Register a node and its children as exit candidates for a list of
457    /// parent loops.
458    fn register_children_as_loop_exit_candidates(
459        cfg: &CfgInfo,
460        loop_exits: &mut HashMap<src::BlockId, IndexMap<src::BlockId, LoopExitCandidateInfo>>,
461        removed_parent_loops: &Vec<(src::BlockId, usize)>,
462        block_id: src::BlockId,
463    ) {
464        let mut base_dist = 0;
465        // For every parent loop, in reverse order (we go from last to first in
466        // order to correctly compute the distances)
467        for (loop_id, loop_dist) in removed_parent_loops.iter().rev() {
468            // Update the distance to the loop entry
469            base_dist += *loop_dist;
470
471            // Retrieve the candidates
472            let candidates = loop_exits.get_mut(loop_id).unwrap();
473
474            // Update them
475            for (bid, dist) in cfg.block_data(block_id).shortest_paths_including_self() {
476                let distance = base_dist + dist;
477                match candidates.get_mut(&bid) {
478                    None => {
479                        candidates.insert(
480                            bid,
481                            LoopExitCandidateInfo {
482                                occurrences: vec![distance],
483                            },
484                        );
485                    }
486                    Some(c) => {
487                        c.occurrences.push(distance);
488                    }
489                }
490            }
491        }
492    }
493
494    /// Compute the loop exit candidates.
495    ///
496    /// There may be several candidates with the same "optimality" (same number of
497    /// occurrences, etc.), in which case we choose the first one which was registered
498    /// (the order in which we explore the graph is deterministic): this is why we
499    /// store the candidates in a linked hash map.
500    fn compute_loop_exit_candidates(
501        cfg: &CfgInfo,
502        explored: &mut HashSet<src::BlockId>,
503        ordered_loops: &mut Vec<src::BlockId>,
504        loop_exits: &mut HashMap<src::BlockId, IndexMap<src::BlockId, LoopExitCandidateInfo>>,
505        // List of parent loops, with the distance to the entry of the loop (the distance
506        // is the distance between the current node and the loop entry for the last parent,
507        // and the distance between the parents for the others).
508        mut parent_loops: Vec<(src::BlockId, usize)>,
509        block_id: src::BlockId,
510    ) {
511        if explored.contains(&block_id) {
512            return;
513        }
514        explored.insert(block_id);
515
516        // Check if we enter a loop - add the corresponding node if necessary
517        if cfg.loop_entries.contains(&block_id) {
518            parent_loops.push((block_id, 1));
519            ordered_loops.push(block_id);
520        } else {
521            // Increase the distance with the parent loop
522            if !parent_loops.is_empty() {
523                parent_loops.last_mut().unwrap().1 += 1;
524            }
525        };
526
527        // Retrieve the children - note that we ignore the back edges
528        for child in cfg.fwd_cfg.neighbors(block_id) {
529            // If the parent loop entry is not reachable from the child without going
530            // through a backward edge which goes directly to the loop entry, consider
531            // this node a potential exit.
532            ensure_sufficient_stack(|| {
533                let new_parent_loops = match Self::filter_loop_parents(cfg, &parent_loops, child) {
534                    None => parent_loops.clone(),
535                    Some(fparent_loops) => {
536                        // We filtered some parent loops: it means this child and its
537                        // children are loop exit candidates for all those loops: we must
538                        // thus register them.
539                        // Note that we register the child *and* its children: the reason
540                        // is that we might do something *then* actually jump to the exit.
541                        // For instance, the following block of code:
542                        // ```
543                        // if cond { break; } else { ... }
544                        // ```
545                        //
546                        // Gets translated in MIR to something like this:
547                        // ```
548                        // bb1: {
549                        //   if cond -> bb2 else -> bb3; // bb2 is not the real exit
550                        // }
551                        //
552                        // bb2: {
553                        //   goto bb4; // bb4 is the real exit
554                        // }
555                        // ```
556                        Self::register_children_as_loop_exit_candidates(
557                            cfg,
558                            loop_exits,
559                            &fparent_loops.removed_parents,
560                            child,
561                        );
562                        fparent_loops.remaining_parents
563                    }
564                };
565                // Explore, with the filtered parents
566                Self::compute_loop_exit_candidates(
567                    cfg,
568                    explored,
569                    ordered_loops,
570                    loop_exits,
571                    new_parent_loops,
572                    child,
573                );
574            })
575        }
576    }
577
578    /// See [`compute_loop_switch_exits`] for
579    /// explanations about what "exits" are.
580    ///
581    /// The following function computes the loop exits. It acts as follows.
582    ///
583    /// We keep track of a stack of the loops in which we entered.
584    /// It is very easy to check when we enter a loop: loop entries are destinations
585    /// of backward edges, which can be spotted with a simple graph exploration (see
586    /// [`build_cfg_partial_info_edges`].
587    /// The criteria to consider whether we exit a loop is the following:
588    /// - we exit a loop if we go to a block from which we can't reach the loop
589    ///   entry at all
590    /// - or if we can reach the loop entry, but must use a backward edge which goes
591    ///   to an outer loop
592    ///
593    /// It is better explained on the following example:
594    /// ```text
595    /// 'outer while i < max {
596    ///     'inner while j < max {
597    ///        j += 1;
598    ///     }
599    ///     // (i)
600    ///     i += 1;
601    /// }
602    /// ```
603    /// If we enter the inner loop then go to (i) from the inner loop, we consider
604    /// that we exited the outer loop because:
605    /// - we can reach the entry of the inner loop from (i) (by finishing then
606    ///   starting again an iteration of the outer loop)
607    /// - but doing this requires taking a backward edge which goes to the outer loop
608    ///
609    /// Whenever we exit a loop, we save the block we went to as an exit candidate
610    /// for this loop. Note that there may by many exit candidates. For instance,
611    /// in the below example:
612    /// ```text
613    /// while ... {
614    ///    ...
615    ///    if ... {
616    ///        // We can't reach the loop entry from here: this is an exit
617    ///        // candidate
618    ///        return;
619    ///    }
620    /// }
621    /// // This is another exit candidate - and this is the one we want to use
622    /// // as the "real" exit...
623    /// ...
624    /// ```
625    ///
626    /// Also note that it may happen that we go several times to the same exit (if
627    /// we use breaks for instance): we record the number of times an exit candidate
628    /// is used.
629    ///
630    /// Once we listed all the exit candidates, we find the "best" one for every
631    /// loop, starting with the outer loops. We start with outer loops because
632    /// inner loops might use breaks to exit to the exit of outer loops: if we
633    /// start with the inner loops, the exit which is "natural" for the outer loop
634    /// might end up being used for one of the inner loops...
635    ///
636    /// The best exit is the following one:
637    /// - it is the one which is used the most times (note that there can be
638    ///   several candidates which are referenced strictly more than once: see the
639    ///   comment below)
640    /// - if several exits have the same number of occurrences, we choose the one
641    ///   for which we goto the "earliest" (earliest meaning that the goto is close to
642    ///   the loop entry node in the AST). The reason is that all the loops should
643    ///   have an outer if ... then ... else ... which executes the loop body or goes
644    ///   to the exit (note that this is not necessarily the first
645    ///   if ... then ... else ... we find: loop conditions can be arbitrary
646    ///   expressions, containing branchings).
647    ///
648    /// # Several candidates for a loop exit:
649    /// =====================================
650    /// There used to be a sanity check to ensure there are no two different
651    /// candidates with exactly the same number of occurrences and distance from
652    /// the entry of the loop, if the number of occurrences is > 1.
653    ///
654    /// We removed it because it does happen, for instance here (the match
655    /// introduces an `unreachable` node, and it has the same number of
656    /// occurrences and the same distance to the loop entry as the `panic`
657    /// node):
658    ///
659    /// ```text
660    /// pub fn list_nth_mut_loop_pair<'a, T>(
661    ///     mut ls: &'a mut List<T>,
662    ///     mut i: u32,
663    /// ) -> &'a mut T {
664    ///     loop {
665    ///         match ls {
666    ///             List::Nil => {
667    ///                 panic!() // <-- best candidate
668    ///             }
669    ///             List::Cons(x, tl) => {
670    ///                 if i == 0 {
671    ///                     return x;
672    ///                 } else {
673    ///                     ls = tl;
674    ///                     i -= 1;
675    ///                 }
676    ///             }
677    ///             _ => {
678    ///               // Note that Rustc always introduces an unreachable branch after
679    ///               // desugaring matches.
680    ///               unreachable!(), // <-- best candidate
681    ///             }
682    ///         }
683    ///     }
684    /// }
685    /// ```
686    ///
687    /// When this happens we choose an exit candidate whose edges don't necessarily
688    /// lead to an error (above there are none, so we don't choose any exits). Note
689    /// that this last condition is important to prevent loops from being unnecessarily
690    /// nested:
691    ///
692    /// ```text
693    /// pub fn nested_loops_enum(step_out: usize, step_in: usize) -> usize {
694    ///     let mut s = 0;
695    ///
696    ///     for _ in 0..128 { // We don't want this loop to be nested with the loops below
697    ///         s += 1;
698    ///     }
699    ///
700    ///     for _ in 0..(step_out) {
701    ///         for _ in 0..(step_in) {
702    ///             s += 1;
703    ///         }
704    ///     }
705    ///
706    ///     s
707    /// }
708    /// ```
709    fn compute_loop_exits(
710        ctx: &mut TransformCtx,
711        body: &src::ExprBody,
712        cfg: &CfgInfo,
713    ) -> HashMap<src::BlockId, Option<src::BlockId>> {
714        let mut explored = HashSet::new();
715        let mut ordered_loops = Vec::new();
716        let mut loop_exits = HashMap::new();
717
718        // Initialize the loop exits candidates
719        for loop_id in &cfg.loop_entries {
720            loop_exits.insert(*loop_id, IndexMap::new());
721        }
722
723        // Compute the candidates
724        Self::compute_loop_exit_candidates(
725            cfg,
726            &mut explored,
727            &mut ordered_loops,
728            &mut loop_exits,
729            Vec::new(),
730            src::BlockId::ZERO,
731        );
732
733        {
734            // Debugging
735            let candidates: Vec<String> = loop_exits
736                .iter()
737                .map(|(loop_id, candidates)| format!("{loop_id} -> {candidates:?}"))
738                .collect();
739            trace!("Loop exit candidates:\n{}", candidates.join("\n"));
740        }
741
742        // Choose one candidate among the potential candidates.
743        let mut exits: HashSet<src::BlockId> = HashSet::new();
744        let mut chosen_loop_exits: HashMap<src::BlockId, Option<src::BlockId>> = HashMap::new();
745        // For every loop
746        for loop_id in ordered_loops {
747            // Check the candidates.
748            // Ignore the candidates which have already been chosen as exits for other
749            // loops (which should be outer loops).
750            // We choose the exit with:
751            // - the most occurrences
752            // - the least total distance (if there are several possibilities)
753            // - doesn't necessarily lead to an error (panic, unreachable)
754
755            // First:
756            // - filter the candidates
757            // - compute the number of occurrences
758            // - compute the sum of distances
759            // TODO: we could simply order by using a lexicographic order
760            let loop_exits = loop_exits
761                .get(&loop_id)
762                .unwrap()
763                .iter()
764                // If candidate already selected for another loop: ignore
765                .filter(|(candidate_id, _)| !exits.contains(candidate_id))
766                .map(|(candidate_id, candidate_info)| {
767                    let num_occurrences = candidate_info.occurrences.len();
768                    let dist_sum = candidate_info.occurrences.iter().sum();
769                    (*candidate_id, num_occurrences, dist_sum)
770                })
771                .collect_vec();
772
773            trace!(
774                "Loop {}: possible exits:\n{}",
775                loop_id,
776                loop_exits
777                    .iter()
778                    .map(|(bid, occs, dsum)| format!(
779                        "{bid} -> {{ occurrences: {occs}, dist_sum: {dsum} }}",
780                    ))
781                    .collect::<Vec<String>>()
782                    .join("\n")
783            );
784
785            // Second: actually select the proper candidate.
786
787            // We find the one with the highest occurrence and the smallest distance
788            // from the entry of the loop (note that we take care of listing the exit
789            // candidates in a deterministic order).
790            let mut best_exit: Option<src::BlockId> = None;
791            let mut best_occurrences = 0;
792            let mut best_dist_sum = std::usize::MAX;
793            for (candidate_id, occurrences, dist_sum) in &loop_exits {
794                if (*occurrences > best_occurrences)
795                    || (*occurrences == best_occurrences && *dist_sum < best_dist_sum)
796                {
797                    best_exit = Some(*candidate_id);
798                    best_occurrences = *occurrences;
799                    best_dist_sum = *dist_sum;
800                }
801            }
802
803            let possible_candidates: Vec<_> = loop_exits
804                .iter()
805                .filter_map(|(bid, occs, dsum)| {
806                    if *occs == best_occurrences && *dsum == best_dist_sum {
807                        Some(*bid)
808                    } else {
809                        None
810                    }
811                })
812                .collect();
813            let num_possible_candidates = loop_exits.len();
814
815            // If there is exactly one one best candidate, it is easy.
816            // Otherwise we need to split further.
817            if num_possible_candidates > 1 {
818                trace!("Best candidates: {:?}", possible_candidates);
819                // TODO: if we use a lexicographic order we can merge this with the code
820                // above.
821                // Remove the candidates which only lead to errors (panic or unreachable).
822                let candidates: Vec<_> = possible_candidates
823                    .iter()
824                    .filter(|&&bid| !cfg.block_data[bid].only_reach_error)
825                    .collect();
826                // If there is exactly one candidate we select it
827                if candidates.len() == 1 {
828                    let exit_id = *candidates[0];
829                    exits.insert(exit_id);
830                    trace!("Loop {loop_id}: selected the best exit candidate {exit_id}");
831                    chosen_loop_exits.insert(loop_id, Some(exit_id));
832                } else {
833                    // Otherwise we do not select any exit.
834                    // We don't want to select any exit if we are in the below situation
835                    // (all paths lead to errors). We added a sanity check below to
836                    // catch the situations where there are several exits which don't
837                    // lead to errors.
838                    //
839                    // Example:
840                    // ========
841                    // ```
842                    // loop {
843                    //     match ls {
844                    //         List::Nil => {
845                    //             panic!() // <-- best candidate
846                    //         }
847                    //         List::Cons(x, tl) => {
848                    //             if i == 0 {
849                    //                 return x;
850                    //             } else {
851                    //                 ls = tl;
852                    //                 i -= 1;
853                    //             }
854                    //         }
855                    //         _ => {
856                    //           unreachable!(); // <-- best candidate (Rustc introduces an `unreachable` case)
857                    //         }
858                    //     }
859                    // }
860                    // ```
861                    //
862                    // Adding this sanity check so that we can see when there are
863                    // several candidates.
864                    let span = body.body[loop_id].terminator.span; // Taking *a* span from the block
865                    sanity_check!(ctx, span, candidates.is_empty());
866                    trace!(
867                        "Loop {loop_id}: did not select an exit candidate because they all lead to panics"
868                    );
869                    chosen_loop_exits.insert(loop_id, None);
870                }
871            } else {
872                // Register the exit, if there is one
873                if let Some(exit_id) = best_exit {
874                    exits.insert(exit_id);
875                }
876                chosen_loop_exits.insert(loop_id, best_exit);
877            }
878        }
879
880        // Return the chosen exits
881        trace!("Chosen loop exits: {:?}", chosen_loop_exits);
882        chosen_loop_exits
883    }
884
885    /// See [`compute_loop_switch_exits`] for
886    /// explanations about what "exits" are.
887    ///
888    /// In order to compute the switch exits, we simply recursively compute a
889    /// topologically ordered set of "filtered successors" as follows (note
890    /// that we work in the CFG *without* back edges):
891    /// - for a block which doesn't branch (only one successor), the filtered
892    ///   successors is the set of reachable nodes.
893    /// - for a block which branches, we compute the nodes reachable from all
894    ///   the children, and find the "best" intersection between those.
895    ///   Note that we find the "best" intersection (a pair of branches which
896    ///   maximize the intersection of filtered successors) because some branches
897    ///   might never join the control-flow of the other branches, if they contain
898    ///   a `break`, `return`, `panic`, etc., like here:
899    ///   ```text
900    ///   if b { x = 3; } { return; }
901    ///   y += x;
902    ///   ...
903    ///   ```
904    /// Note that with nested switches, the branches of the inner switches might
905    /// goto the exits of the outer switches: for this reason, we give precedence
906    /// to the outer switches.
907    fn compute_switch_exits(cfg: &CfgInfo) -> HashMap<src::BlockId, Option<src::BlockId>> {
908        // Compute the successors info map, starting at the root node
909        trace!(
910            "- cfg.cfg:\n{:?}\n- cfg.cfg_no_be:\n{:?}\n- cfg.switch_blocks:\n{:?}",
911            cfg.cfg, cfg.fwd_cfg, cfg.switch_blocks
912        );
913
914        // We need to give precedence to the outer switches: we thus iterate
915        // over the switch blocks in topological order.
916        let mut sorted_switch_blocks: BTreeSet<OrdBlockId> = BTreeSet::new();
917        for bid in cfg.switch_blocks.iter() {
918            sorted_switch_blocks.insert(cfg.make_ord_block_id(*bid));
919        }
920        trace!("sorted_switch_blocks: {:?}", sorted_switch_blocks);
921
922        // Debugging: print all the successors
923        {
924            trace!("Successors info:\n{}\n", {
925                let mut out = vec![];
926                for (bid, info) in cfg.block_data.iter_indexed() {
927                    out.push(format!("{} -> {{flow: {:?}}}", bid, &info.flow).to_string());
928                }
929                out.join("\n")
930            });
931        }
932
933        // For every node which is a switch, retrieve the exit.
934        // As the set of intersection of successors is topologically sorted, the
935        // exit should be the first node in the set (if the set is non empty).
936        // Also, we need to explore the nodes in topological order, to give
937        // precedence to the outer switches.
938        let mut exits_set = HashSet::new();
939        let mut exits = HashMap::new();
940        for bid in sorted_switch_blocks {
941            trace!("Finding exit candidate for: {bid:?}");
942            let bid = bid.id;
943            let block_data = &cfg.block_data[bid];
944            // Find the best successor: this is the node with the highest flow, and the
945            // highest reverse topological rank.
946            //
947            // Remark: in order to rank the nodes, we also use the negation of the
948            // rank given by the topological order. The last elements of the set
949            // have the highest flow, that is they are the nodes to which the maximum
950            // number of paths converge. If several nodes have the same flow, we want
951            // to take the highest one in the hierarchy: hence the use of the inverse
952            // of the topological rank.
953            //
954            // Ex.:
955            // ```text
956            // A  -- we start here
957            // |
958            // |---------------------------------------
959            // |            |            |            |
960            // B:(0.25,-1)  C:(0.25,-2)  D:(0.25,-3)  E:(0.25,-4)
961            // |            |            |
962            // |--------------------------
963            // |
964            // F:(0.75,-5)
965            // |
966            // |
967            // G:(0.75,-6)
968            // ```
969            // The "best" node (with the highest (flow, rank) in the graph above is F.
970            let switch_exit: Option<BlockWithRank<(BigRational, isize)>> = block_data
971                .reachable_excluding_self()
972                .map(|id| {
973                    let flow = block_data.flow[id].clone();
974                    let rank = -isize::try_from(cfg.topo_rank(id)).unwrap();
975                    BlockWithRank {
976                        rank: (flow, rank),
977                        id,
978                    }
979                })
980                .max();
981            let exit = if let Some(exit) = switch_exit {
982                // We have an exit candidate: check that it was not already
983                // taken by an external switch
984                trace!("{bid:?} has an exit candidate: {exit:?}");
985                if exits_set.contains(&exit.id) {
986                    trace!(
987                        "Ignoring the exit candidate because already taken by an external switch"
988                    );
989                    None
990                } else {
991                    // It was not taken by an external switch.
992                    //
993                    // We must check that we can't reach the exit of an external
994                    // switch from one of the branches, without going through the
995                    // exit candidate.
996                    // We do this by simply checking that we can't reach any exits
997                    // (and use the fact that we explore the switch by using a
998                    // topological order to not discard valid exit candidates).
999                    //
1000                    // The reason is that it can lead to code like the following:
1001                    // ```
1002                    // if ... { // if #1
1003                    //   if ... { // if #2
1004                    //     ...
1005                    //     // here, we have a `goto b1`, where b1 is the exit
1006                    //     // of if #2: we thus stop translating the blocks.
1007                    //   }
1008                    //   else {
1009                    //     ...
1010                    //     // here, we have a `goto b2`, where b2 is the exit
1011                    //     // of if #1: we thus stop translating the blocks.
1012                    //   }
1013                    //   // We insert code for the block b1 here (which is the exit of
1014                    //   // the exit of if #2). However, this block should only
1015                    //   // be executed in the branch "then" of the if #2, not in
1016                    //   // the branch "else".
1017                    //   ...
1018                    // }
1019                    // else {
1020                    //   ...
1021                    // }
1022                    // ```
1023
1024                    // First: we do a quick check (does the set of all successors
1025                    // intersect the set of exits for outer blocks?). If yes, we do
1026                    // a more precise analysis: we check if we can reach the exit
1027                    // *without going through* the exit candidate.
1028                    let can_reach_exit = block_data
1029                        .reachable_excluding_self()
1030                        .any(|bid| exits_set.contains(&bid));
1031                    if !can_reach_exit || !Self::can_reach_outer_exit(cfg, &exits_set, bid, exit.id)
1032                    {
1033                        trace!("Keeping the exit candidate");
1034                        // No intersection: ok
1035                        exits_set.insert(exit.id);
1036                        Some(exit.id)
1037                    } else {
1038                        trace!(
1039                            "Ignoring the exit candidate because of an intersection with external switches"
1040                        );
1041                        None
1042                    }
1043                }
1044            } else {
1045                trace!("{bid:?} has no successors");
1046                None
1047            };
1048
1049            exits.insert(bid, exit);
1050        }
1051
1052        exits
1053    }
1054
1055    /// Compute the exits for the loops and the switches (switch on integer and
1056    /// if ... then ... else ...). We need to do this because control-flow in MIR
1057    /// is destructured: we have gotos everywhere.
1058    ///
1059    /// Let's consider the following piece of code:
1060    /// ```text
1061    /// if cond1 { ... } else { ... };
1062    /// if cond2 { ... } else { ... };
1063    /// ```
1064    /// Once converted to MIR, the control-flow is destructured, which means we
1065    /// have gotos everywhere. When reconstructing the control-flow, we have
1066    /// to be careful about the point where we should join the two branches of
1067    /// the first if.
1068    /// For instance, if we don't notice they should be joined at some point (i.e,
1069    /// whatever the branch we take, there is a moment when we go to the exact
1070    /// same place, just before the second if), we might generate code like
1071    /// this, with some duplicata:
1072    /// ```text
1073    /// if cond1 { ...; if cond2 { ... } else { ...} }
1074    /// else { ...; if cond2 { ... } else { ...} }
1075    /// ```
1076    ///
1077    /// Such a reconstructed program is valid, but it is definitely non-optimal:
1078    /// it is very different from the original program (making it less clean and
1079    /// clear), more bloated, and might involve duplicating the proof effort.
1080    ///
1081    /// For this reason, we need to find the "exit" of the first loop, which is
1082    /// the point where the two branches join. Note that this can be a bit tricky,
1083    /// because there may be more than two branches (if we do `switch(x) { ... }`),
1084    /// and some of them might not join (if they contain a `break`, `panic`,
1085    /// `return`, etc.).
1086    ///
1087    /// Finally, some similar issues arise for loops. For instance, let's consider
1088    /// the following piece of code:
1089    /// ```text
1090    /// while cond1 {
1091    ///   e1;
1092    ///   if cond2 {
1093    ///     break;
1094    ///   }
1095    ///   e2;
1096    /// }
1097    /// e3;
1098    /// return;
1099    /// ```
1100    ///
1101    /// Note that in MIR, the loop gets desugared to an if ... then ... else ....
1102    /// From the MIR, We want to generate something like this:
1103    /// ```text
1104    /// loop {
1105    ///   if cond1 {
1106    ///     e1;
1107    ///     if cond2 {
1108    ///       break;
1109    ///     }
1110    ///     e2;
1111    ///     continue;
1112    ///   }
1113    ///   else {
1114    ///     break;
1115    ///   }
1116    /// };
1117    /// e3;
1118    /// return;
1119    /// ```
1120    ///
1121    /// But if we don't pay attention, we might end up with that, once again with
1122    /// duplications:
1123    /// ```text
1124    /// loop {
1125    ///   if cond1 {
1126    ///     e1;
1127    ///     if cond2 {
1128    ///       e3;
1129    ///       return;
1130    ///     }
1131    ///     e2;
1132    ///     continue;
1133    ///   }
1134    ///   else {
1135    ///     e3;
1136    ///     return;
1137    ///   }
1138    /// }
1139    /// ```
1140    /// We thus have to notice that if the loop condition is false, we goto the same
1141    /// block as when following the goto introduced by the break inside the loop, and
1142    /// this block is dubbed the "loop exit".
1143    ///
1144    /// The following function thus computes the "exits" for loops and switches, which
1145    /// are basically the points where control-flow joins.
1146    fn compute(ctx: &mut TransformCtx, body: &src::ExprBody, cfg_info: &CfgInfo) -> Self {
1147        // Compute the loop exits
1148        let loop_exits = Self::compute_loop_exits(ctx, body, cfg_info);
1149        trace!("loop_exits:\n{:?}", loop_exits);
1150
1151        // Compute the switch exits
1152        let switch_exits = Self::compute_switch_exits(cfg_info);
1153        trace!("switch_exits:\n{:?}", switch_exits);
1154
1155        // Compute the exit info
1156        let mut exits_info = ExitsInfo {
1157            exits: body.body.map_ref(|_| Default::default()),
1158        };
1159
1160        // We need to give precedence to the outer switches and loops: we thus iterate
1161        // over the blocks in topological order.
1162        let mut sorted_blocks: BTreeSet<OrdBlockId> = BTreeSet::new();
1163        for bid in cfg_info
1164            .loop_entries
1165            .iter()
1166            .chain(cfg_info.switch_blocks.iter())
1167        {
1168            sorted_blocks.insert(cfg_info.make_ord_block_id(*bid));
1169        }
1170
1171        // Keep track of the exits which were already attributed
1172        let mut all_exits = HashSet::new();
1173
1174        // Put all this together
1175        for bid in sorted_blocks {
1176            let bid = bid.id;
1177            // Check if loop or switch block
1178            let exit_info = &mut exits_info.exits[bid];
1179            if cfg_info.loop_entries.contains(&bid) {
1180                exit_info.is_loop_entry = true;
1181                // This is a loop.
1182                //
1183                // For loops, we always register the exit (if there is one).
1184                // However, the exit may be owned by an outer switch (note
1185                // that we already took care of spreading the exits between
1186                // the inner/outer loops)
1187                let exit_id = loop_exits.get(&bid).unwrap();
1188                exit_info.loop_exit = *exit_id;
1189                // Check if we "own" the exit
1190                let exit_id = exit_id.filter(|exit_id| !all_exits.contains(exit_id));
1191                if let Some(exit_id) = exit_id {
1192                    all_exits.insert(exit_id);
1193                }
1194                exit_info.owned_loop_exit = exit_id;
1195            } else {
1196                exit_info.is_switch_entry = true;
1197                // For switches: check that the exit was not already given to a
1198                // loop
1199                let exit_id = switch_exits.get(&bid).unwrap();
1200                // Check if we "own" the exit
1201                let exit_id = exit_id.filter(|exit_id| !all_exits.contains(exit_id));
1202                if let Some(exit_id) = exit_id {
1203                    all_exits.insert(exit_id);
1204                }
1205                exit_info.owned_switch_exit = exit_id;
1206            }
1207        }
1208
1209        exits_info
1210    }
1211}
1212
1213enum GotoKind {
1214    Break(usize),
1215    Continue(usize),
1216    ExitBlock,
1217    Goto,
1218}
1219
1220struct ReconstructCtx<'a> {
1221    body: &'a src::ExprBody,
1222    exits_info: ExitsInfo,
1223    explored: HashSet<src::BlockId>,
1224    parent_loops: Vec<src::BlockId>,
1225    switch_exit_blocks: HashSet<src::BlockId>,
1226}
1227
1228impl<'a> ReconstructCtx<'a> {
1229    fn build(ctx: &mut TransformCtx, src_body: &'a src::ExprBody) -> Result<Self, Irreducible> {
1230        // Explore the function body to create the control-flow graph without backward
1231        // edges, and identify the loop entries (which are destinations of backward edges).
1232        let cfg_info = CfgInfo::build(&src_body.body)?;
1233
1234        // Find the exit block for all the loops and switches, if such an exit point exists.
1235        let exits_info = ExitsInfo::compute(ctx, src_body, &cfg_info);
1236
1237        // Translate the body by reconstructing the loops and the
1238        // conditional branchings.
1239        // Note that we shouldn't get `None`.
1240        Ok(ReconstructCtx {
1241            body: src_body,
1242            exits_info: exits_info,
1243            explored: HashSet::new(),
1244            parent_loops: Vec::new(),
1245            switch_exit_blocks: HashSet::new(),
1246        })
1247    }
1248
1249    fn get_goto_kind(&self, next_block_id: src::BlockId) -> GotoKind {
1250        // First explore the parent loops in revert order
1251        for (i, &loop_id) in self.parent_loops.iter().rev().enumerate() {
1252            // If we goto a loop entry node: this is a 'continue'
1253            if next_block_id == loop_id {
1254                return GotoKind::Continue(i);
1255            } else {
1256                // If we goto a loop exit node: this is a 'break'
1257                if let Some(exit_id) = self.exits_info.exits[loop_id].loop_exit {
1258                    if next_block_id == exit_id {
1259                        return GotoKind::Break(i);
1260                    }
1261                }
1262            }
1263        }
1264
1265        // Check if the goto exits the current block
1266        if self.switch_exit_blocks.contains(&next_block_id) {
1267            return GotoKind::ExitBlock;
1268        }
1269
1270        // Default
1271        GotoKind::Goto
1272    }
1273
1274    /// `parent_span`: we need some span data for the new statement.
1275    /// We use the one for the parent terminator.
1276    fn translate_child_block(&mut self, parent_span: Span, child_id: src::BlockId) -> tgt::Block {
1277        // Check if this is a backward call
1278        match self.get_goto_kind(child_id) {
1279            GotoKind::Break(index) => {
1280                let st = tgt::StatementKind::Break(index);
1281                tgt::Statement::new(parent_span, st).into_block()
1282            }
1283            GotoKind::Continue(index) => {
1284                let st = tgt::StatementKind::Continue(index);
1285                tgt::Statement::new(parent_span, st).into_block()
1286            }
1287            // If we are going to an exit block we simply ignore the goto
1288            GotoKind::ExitBlock => {
1289                let st = tgt::StatementKind::Nop;
1290                tgt::Statement::new(parent_span, st).into_block()
1291            }
1292            GotoKind::Goto => {
1293                // "Standard" goto: just recursively translate
1294                ensure_sufficient_stack(|| self.translate_block(child_id))
1295            }
1296        }
1297    }
1298
1299    fn translate_statement(&self, st: &src::Statement) -> Option<tgt::Statement> {
1300        let src_span = st.span;
1301        let st = match st.kind.clone() {
1302            src::StatementKind::Assign(place, rvalue) => tgt::StatementKind::Assign(place, rvalue),
1303            src::StatementKind::SetDiscriminant(place, variant_id) => {
1304                tgt::StatementKind::SetDiscriminant(place, variant_id)
1305            }
1306            src::StatementKind::CopyNonOverlapping(copy) => {
1307                tgt::StatementKind::CopyNonOverlapping(copy)
1308            }
1309            src::StatementKind::StorageLive(var_id) => tgt::StatementKind::StorageLive(var_id),
1310            src::StatementKind::StorageDead(var_id) => tgt::StatementKind::StorageDead(var_id),
1311            src::StatementKind::Deinit(place) => tgt::StatementKind::Deinit(place),
1312            src::StatementKind::Assert(assert) => tgt::StatementKind::Assert(assert),
1313            src::StatementKind::Nop => tgt::StatementKind::Nop,
1314            src::StatementKind::Error(s) => tgt::StatementKind::Error(s),
1315        };
1316        Some(tgt::Statement::new(src_span, st))
1317    }
1318
1319    fn translate_terminator(&mut self, terminator: &src::Terminator) -> tgt::Block {
1320        let src_span = terminator.span;
1321
1322        match &terminator.kind {
1323            src::TerminatorKind::Abort(kind) => {
1324                tgt::Statement::new(src_span, tgt::StatementKind::Abort(kind.clone())).into_block()
1325            }
1326            src::TerminatorKind::Return => {
1327                tgt::Statement::new(src_span, tgt::StatementKind::Return).into_block()
1328            }
1329            src::TerminatorKind::UnwindResume => {
1330                tgt::Statement::new(src_span, tgt::StatementKind::Abort(AbortKind::Panic(None)))
1331                    .into_block()
1332            }
1333            src::TerminatorKind::Call {
1334                call,
1335                target,
1336                on_unwind: _,
1337            } => {
1338                // TODO: Have unwinds in the LLBC
1339                let st = tgt::Statement::new(src_span, tgt::StatementKind::Call(call.clone()));
1340                let mut block = self.translate_child_block(terminator.span, *target);
1341                block.statements.insert(0, st);
1342                block
1343            }
1344            src::TerminatorKind::Drop {
1345                kind,
1346                place,
1347                tref,
1348                target,
1349                on_unwind: _,
1350            } => {
1351                // TODO: Have unwinds in the LLBC
1352                let st = tgt::Statement::new(
1353                    src_span,
1354                    tgt::StatementKind::Drop(place.clone(), tref.clone(), kind.clone()),
1355                );
1356                let mut block = self.translate_child_block(terminator.span, *target);
1357                block.statements.insert(0, st);
1358                block
1359            }
1360            src::TerminatorKind::Goto { target } => {
1361                self.translate_child_block(terminator.span, *target)
1362            }
1363            src::TerminatorKind::Switch { discr, targets } => {
1364                // Translate the target expressions
1365                let switch = match &targets {
1366                    src::SwitchTargets::If(then_tgt, else_tgt) => {
1367                        let then_block = self.translate_child_block(terminator.span, *then_tgt);
1368                        let else_block = self.translate_child_block(terminator.span, *else_tgt);
1369                        tgt::Switch::If(discr.clone(), then_block, else_block)
1370                    }
1371                    src::SwitchTargets::SwitchInt(int_ty, targets, otherwise) => {
1372                        // Note that some branches can be grouped together, like
1373                        // here:
1374                        // ```
1375                        // match e {
1376                        //   E::V1 | E::V2 => ..., // Grouped
1377                        //   E::V3 => ...
1378                        // }
1379                        // ```
1380                        // We detect this by checking if a block has already been
1381                        // translated as one of the branches of the switch.
1382                        //
1383                        // Rk.: note there may be intermediate gotos depending
1384                        // on the MIR we use. Typically, we manage to detect the
1385                        // grouped branches with Optimized MIR, but not with Promoted
1386                        // MIR. See the comment in "tests/src/matches.rs".
1387
1388                        // We link block ids to:
1389                        // - vector of matched integer values
1390                        // - translated blocks
1391                        let mut branches: IndexMap<src::BlockId, (Vec<Literal>, tgt::Block)> =
1392                            IndexMap::new();
1393
1394                        // Translate the children expressions
1395                        for (v, bid) in targets.iter() {
1396                            // Check if the block has already been translated:
1397                            // if yes, it means we need to group branches
1398                            if branches.contains_key(bid) {
1399                                // Already translated: add the matched value to
1400                                // the list of values
1401                                let branch = branches.get_mut(bid).unwrap();
1402                                branch.0.push(v.clone());
1403                            } else {
1404                                // Not translated: translate it
1405                                let block = self.translate_child_block(terminator.span, *bid);
1406                                // We use the terminator span information in case then
1407                                // then statement is `None`
1408                                branches.insert(*bid, (vec![v.clone()], block));
1409                            }
1410                        }
1411                        let targets_blocks: Vec<(Vec<Literal>, tgt::Block)> =
1412                            branches.into_iter().map(|(_, x)| x).collect();
1413
1414                        let otherwise_block =
1415                            self.translate_child_block(terminator.span, *otherwise);
1416
1417                        // Translate
1418                        tgt::Switch::SwitchInt(
1419                            discr.clone(),
1420                            *int_ty,
1421                            targets_blocks,
1422                            otherwise_block,
1423                        )
1424                    }
1425                };
1426
1427                // Return
1428                let span = tgt::combine_switch_targets_span(&switch);
1429                let span = combine_span(&src_span, &span);
1430                let st = tgt::StatementKind::Switch(switch);
1431                tgt::Statement::new(span, st).into_block()
1432            }
1433        }
1434    }
1435
1436    /// Remark: some values are boxed (here, the returned statement) so that they
1437    /// are allocated on the heap. This reduces stack usage (we had problems with
1438    /// stack overflows in the past). A more efficient solution would be to use loops
1439    /// to make this code constant space, but that would require a serious rewriting.
1440    fn translate_block(&mut self, block_id: src::BlockId) -> tgt::Block {
1441        // If the user activated this check: check that we didn't already translate
1442        // this block, and insert the block id in the set of already translated blocks.
1443        self.explored.insert(block_id);
1444
1445        let block = &self.body.body[block_id];
1446
1447        let exit_info = &self.exits_info.exits[block_id];
1448        // Check if we enter a loop: if so, update parent_loops and the current_exit_block
1449        let is_loop = exit_info.is_loop_entry;
1450        // If we enter a switch or a loop, we need to check if we own the exit
1451        // block, in which case we need to append it to the loop/switch body
1452        // in a sequence
1453        let is_switch = exit_info.is_switch_entry;
1454        let next_block = if is_loop {
1455            exit_info.owned_loop_exit
1456        } else if is_switch {
1457            exit_info.owned_switch_exit
1458        } else {
1459            None
1460        };
1461
1462        let terminator = {
1463            if is_loop {
1464                self.parent_loops.push(block_id);
1465            }
1466            // If we enter a switch, add the exit block to the set of outer exit blocks.
1467            if is_switch && let Some(bid) = next_block {
1468                assert!(!self.switch_exit_blocks.contains(&bid));
1469                self.switch_exit_blocks.insert(bid);
1470            }
1471
1472            // Translate the terminator and the subsequent blocks.
1473            // Note that this terminator is an option: we might ignore it
1474            // (if it is an exit).
1475            let terminator = self.translate_terminator(&block.terminator);
1476
1477            // Reset the state to what it was previously.
1478            if is_loop {
1479                self.parent_loops.pop();
1480            }
1481            if is_switch && let Some(bid) = next_block {
1482                self.switch_exit_blocks.remove(&bid);
1483            }
1484            terminator
1485        };
1486
1487        // Translate the statements inside the block
1488        let statements = block
1489            .statements
1490            .iter()
1491            .filter_map(|st| self.translate_statement(st))
1492            .collect_vec();
1493
1494        // Prepend the statements to the terminator.
1495        let mut block = if let Some(st) = tgt::Block::from_seq(statements) {
1496            st.merge(terminator)
1497        } else {
1498            terminator
1499        };
1500
1501        if is_loop {
1502            // Put the loop body inside a `Loop`.
1503            block = tgt::Statement::new(block.span, tgt::StatementKind::Loop(block)).into_block()
1504        } else if !is_switch {
1505            assert!(next_block.is_none());
1506        }
1507
1508        // Concatenate the exit expression, if needs be
1509        if let Some(exit_block_id) = next_block {
1510            let next_block = ensure_sufficient_stack(|| self.translate_block(exit_block_id));
1511            block = block.merge(next_block);
1512        }
1513        block
1514    }
1515}
1516
1517fn translate_body(ctx: &mut TransformCtx, body: &mut gast::Body) {
1518    use gast::Body::{Structured, Unstructured};
1519    let Unstructured(src_body) = body else {
1520        panic!("Called `ullbc_to_llbc` on an already restructured body")
1521    };
1522    trace!("About to translate to ullbc: {:?}", src_body.span);
1523
1524    // Calculate info about the graph and heuristically determine loop and switch exit blocks.
1525    let mut ctx = match ReconstructCtx::build(ctx, src_body) {
1526        Ok(ctx) => ctx,
1527        Err(Irreducible(bid)) => {
1528            let span = src_body.body[bid].terminator.span;
1529            register_error!(
1530                ctx,
1531                span,
1532                "the control-flow graph of this function is not reducible"
1533            );
1534            panic!("can't reconstruct irreducible control-flow")
1535        }
1536    };
1537    // Translate the blocks using the computed data.
1538    let tgt_body = ctx.translate_block(src::BlockId::ZERO);
1539
1540    let tgt_body = tgt::ExprBody {
1541        span: src_body.span,
1542        locals: src_body.locals.clone(),
1543        comments: src_body.comments.clone(),
1544        body: tgt_body,
1545    };
1546    *body = Structured(tgt_body);
1547}
1548
1549pub struct Transform;
1550impl TransformPass for Transform {
1551    fn transform_ctx(&self, ctx: &mut TransformCtx) {
1552        // Translate the bodies one at a time.
1553        ctx.for_each_body(|ctx, body| {
1554            translate_body(ctx, body);
1555        });
1556
1557        // Print the functions
1558        let fmt_ctx = &ctx.into_fmt();
1559        for fun in &ctx.translated.fun_decls {
1560            trace!(
1561                "# Signature:\n{}\n\n# Function definition:\n{}\n",
1562                fun.signature.with_ctx(fmt_ctx),
1563                fun.with_ctx(fmt_ctx),
1564            );
1565        }
1566
1567        if ctx.options.print_built_llbc {
1568            eprintln!("# LLBC resulting from control-flow reconstruction:\n\n{ctx}\n",);
1569        }
1570    }
1571}