charon_lib/transform/control_flow/
ullbc_to_llbc.rs

1//! ULLBC to LLBC
2//!
3//! We reconstruct the control-flow in the Unstructured LLBC.
4//!
5//! The reconstruction algorithm is not written to be efficient (its complexity
6//! is probably very bad), but it was not written to be: this is still an early
7//! stage and we want the algorithm to generate the best reconstruction as
8//! possible. We still need to test the algorithm on more interesting examples,
9//! and will consider making it more efficient once it is a bit mature and well
10//! tested.
11//! Also note that we more importantly focus on making the algorithm sound: the
12//! reconstructed program must always be equivalent to the original MIR program,
13//! and the fact that the reconstruction preserves this property must be obvious.
14//!
15//! Finally, we conjecture the execution time shouldn't be too much a problem:
16//! we don't expect the translation mechanism to be applied on super huge functions
17//! (which will be difficult to formally analyze), and the MIR graphs are actually
18//! not that big because statements are grouped into code blocks (a block is made
19//! of a list of statements, followed by a terminator - branchings and jumps can
20//! only be performed by terminators -, meaning that MIR graphs don't have that
21//! many nodes and edges).
22
23use itertools::Itertools;
24use petgraph::algo::dijkstra;
25use petgraph::algo::dominators::{Dominators, simple_fast};
26use petgraph::graphmap::DiGraphMap;
27use petgraph::visit::{Dfs, DfsPostOrder, GraphBase, IntoNeighbors, Visitable, Walker};
28use std::collections::{BTreeSet, HashMap, HashSet};
29use std::mem;
30
31use crate::common::ensure_sufficient_stack;
32use crate::errors::sanity_check;
33use crate::ids::IndexVec;
34use crate::llbc_ast as tgt;
35use crate::meta::{Span, combine_span};
36use crate::transform::TransformCtx;
37use crate::transform::ctx::TransformPass;
38use crate::ullbc_ast::{self as src, BlockId};
39use crate::{ast::*, register_error};
40
41/// Arbitrary-precision numbers
42type BigUint = fraction::DynaInt<u64, fraction::BigUint>;
43type BigRational = fraction::Ratio<BigUint>;
44
45/// Control-Flow Graph
46type Cfg = DiGraphMap<src::BlockId, ()>;
47
48/// Information precomputed about a function's CFG.
49#[derive(Debug)]
50struct CfgInfo {
51    /// The CFG
52    pub cfg: Cfg,
53    /// The CFG where all the backward edges have been removed. Aka "forward CFG".
54    pub fwd_cfg: Cfg,
55    /// We consider the destination of the backward edges to be loop entries and
56    /// store them here.
57    pub loop_entries: HashSet<src::BlockId>,
58    /// The blocks whose terminators are a switch are stored here.
59    pub switch_blocks: HashSet<src::BlockId>,
60    /// Tree of which nodes dominates which other nodes.
61    #[expect(unused)]
62    pub dominator_tree: Dominators<BlockId>,
63    /// Computed data about each block.
64    pub block_data: IndexVec<BlockId, BlockData>,
65}
66
67#[derive(Debug)]
68struct BlockData {
69    pub id: BlockId,
70    /// Order in a reverse postorder numbering. `None` if the block is unreachable.
71    pub reverse_postorder: Option<u32>,
72    /// Node from where we can only reach error nodes (panic, etc.)
73    pub only_reach_error: bool,
74    /// List of reachable nodes, with the length of shortest path to them. Includes the current
75    /// node.
76    pub shortest_paths: hashbrown::HashMap<BlockId, usize>,
77    /// Let's say we put a quantity of water equal to 1 on the block, and the water flows downards.
78    /// Whenever there is a branching, the quantity of water gets equally divided between the
79    /// branches. When the control flows join, we put the water back together. The set below
80    /// computes the amount of water received by each descendant of the node.
81    ///
82    /// TODO: there must be a known algorithm which computes this, right?...
83    /// This is exactly this problems:
84    /// <https://stackoverflow.com/questions/78221666/algorithm-for-total-flow-through-weighted-directed-acyclic-graph>
85    /// TODO: the way I compute this is not efficient.
86    pub flow: IndexVec<BlockId, BigRational>,
87}
88
89#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
90struct BlockWithRank<T> {
91    /// A "rank" quantity that we use to order the nodes.
92    /// By placing the `rank` field before the `id`, we make sure that
93    /// we use the rank first in the lexicographic order.
94    rank: T,
95    id: src::BlockId,
96}
97type OrdBlockId = BlockWithRank<u32>;
98
99/// Error indicating that the control-flow graph is not reducible. The contained block id is a
100/// block involved in an irreducible subgraph.
101struct Irreducible(BlockId);
102
103impl CfgInfo {
104    /// Build the CFGs (the "regular" CFG and the CFG without backward edges) and
105    /// compute some information like the loop entries and the switch blocks.
106    fn build(body: &src::BodyContents) -> Result<Self, Irreducible> {
107        let start_block = BlockId::ZERO;
108
109        // Build the node graph (we ignore unwind paths for now).
110        let mut cfg = Cfg::new();
111        for (block_id, block) in body.iter_indexed() {
112            cfg.add_node(block_id);
113            for tgt in block.targets_ignoring_unwind() {
114                cfg.add_edge(block_id, tgt, ());
115            }
116        }
117
118        let empty_flow = body.map_ref(|_| BigRational::new(0u64.into(), 1u64.into()));
119        let mut block_data: IndexVec<BlockId, BlockData> =
120            body.map_ref_indexed(|id, _| BlockData {
121                id,
122                // Default values will stay for unreachable nodes, which are irrelevant.
123                reverse_postorder: None,
124                only_reach_error: false,
125                shortest_paths: Default::default(),
126                flow: empty_flow.clone(),
127            });
128
129        // Compute the dominator tree.
130        let dominator_tree = simple_fast(&cfg, start_block);
131
132        // Compute reverse postorder numbering.
133        for (i, block_id) in DfsPostOrder::new(&cfg, start_block).iter(&cfg).enumerate() {
134            let rev_post_id = body.len() - i;
135            assert!(rev_post_id <= u32::MAX as usize);
136            block_data[block_id].reverse_postorder = Some(rev_post_id as u32);
137        }
138
139        // Compute the forward graph (without backward edges).
140        let mut fwd_cfg = Cfg::new();
141        let mut loop_entries = HashSet::new();
142        let mut switch_blocks = HashSet::new();
143        for src in cfg.nodes() {
144            if block_data[src].reverse_postorder.is_none() {
145                // Unreachable block
146                continue;
147            }
148            fwd_cfg.add_node(src);
149            if body[src].terminator.kind.is_switch() {
150                switch_blocks.insert(src);
151            }
152            for tgt in cfg.neighbors(src) {
153                // Check if the edge is a backward edge.
154                if block_data[src].reverse_postorder >= block_data[tgt].reverse_postorder {
155                    // This is a backward edge
156                    loop_entries.insert(tgt);
157                    // A cfg is reducible iff the target of every back edge dominates the
158                    // edge's source.
159                    if !dominator_tree.dominators(src).unwrap().contains(&tgt) {
160                        return Err(Irreducible(src));
161                    }
162                } else {
163                    fwd_cfg.add_edge(src, tgt, ());
164                }
165            }
166        }
167
168        for block_id in DfsPostOrder::new(&fwd_cfg, start_block).iter(&fwd_cfg) {
169            let block = &body[block_id];
170            let targets = cfg.neighbors(block_id).collect_vec();
171            let fwd_targets = fwd_cfg.neighbors(block_id).collect_vec();
172
173            // Compute the nodes that can only reach error nodes.
174            // The node can only reach error nodes if:
175            // - it is an error node;
176            // - or it has neighbors and they all lead to errors.
177            // Note that if there is a backward edge, `only_reach_error` cannot contain this
178            // node yet. In other words, this does not consider infinite loops as reaching an
179            // error node.
180            if block.terminator.is_error()
181                || (!targets.is_empty()
182                    && targets.iter().all(|&tgt| block_data[tgt].only_reach_error))
183            {
184                block_data[block_id].only_reach_error = true;
185            }
186
187            // Compute the flows between each pair of nodes.
188            let mut flow: IndexVec<src::BlockId, BigRational> =
189                mem::take(&mut block_data[block_id].flow);
190            if !fwd_targets.is_empty() {
191                // We need to divide the initial flow equally between the children
192                let factor = BigRational::new(1u64.into(), fwd_targets.len().into());
193
194                // For each child, multiply the flows of its own children by the ratio,
195                // and add.
196                for &child_id in &fwd_targets {
197                    // First, add the child itself
198                    flow[child_id] += factor.clone();
199
200                    // Then add its successors
201                    let child = &block_data[child_id];
202                    for grandchild in child.reachable_excluding_self() {
203                        // Flow from `child` to `grandchild`
204                        let child_flow = child.flow[grandchild].clone();
205                        flow[grandchild] += factor.clone() * child_flow;
206                    }
207                }
208            }
209            block_data[block_id].flow = flow;
210
211            // Compute shortest paths to all reachable nodes in the forward graph.
212            block_data[block_id].shortest_paths = dijkstra(&fwd_cfg, block_id, None, |_| 1usize);
213        }
214
215        Ok(CfgInfo {
216            cfg,
217            fwd_cfg,
218            loop_entries,
219            switch_blocks,
220            dominator_tree,
221            block_data,
222        })
223    }
224
225    fn block_data(&self, block_id: BlockId) -> &BlockData {
226        &self.block_data[block_id]
227    }
228    // fn can_reach(&self, src: BlockId, tgt: BlockId) -> bool {
229    //     self.block_data[src].shortest_paths.contains_key(&tgt)
230    // }
231    fn topo_rank(&self, block_id: BlockId) -> u32 {
232        self.block_data[block_id].reverse_postorder.unwrap()
233    }
234    /// Create an [OrdBlockId] from a block id and a rank given by a map giving
235    /// a sort (topological in our use cases) over the graph.
236    fn make_ord_block_id(&self, block_id: BlockId) -> OrdBlockId {
237        OrdBlockId {
238            id: block_id,
239            rank: self.topo_rank(block_id),
240        }
241    }
242    fn is_backward_edge(&self, src: BlockId, tgt: BlockId) -> bool {
243        self.block_data[src].reverse_postorder >= self.block_data[tgt].reverse_postorder
244            && self.cfg.contains_edge(src, tgt)
245    }
246}
247
248impl BlockData {
249    fn shortest_paths_including_self(&self) -> impl Iterator<Item = (BlockId, usize)> {
250        self.shortest_paths.iter().map(|(bid, d)| (*bid, *d))
251    }
252    fn shortest_paths_excluding_self(&self) -> impl Iterator<Item = (BlockId, usize)> {
253        self.shortest_paths_including_self()
254            .filter(move |&(bid, _)| bid != self.id)
255    }
256    #[expect(unused)]
257    fn reachable_including_self(&self) -> impl Iterator<Item = BlockId> {
258        self.shortest_paths_including_self().map(|(bid, _)| bid)
259    }
260    fn reachable_excluding_self(&self) -> impl Iterator<Item = BlockId> {
261        self.shortest_paths_excluding_self().map(|(bid, _)| bid)
262    }
263}
264
265#[derive(Debug, Clone)]
266struct LoopExitCandidateInfo {
267    /// The occurrences going to this exit.
268    /// For every occurrence, we register the distance between the loop entry
269    /// and the node from which the edge going to the exit starts.
270    /// If later we have to choose between candidates with the same number
271    /// of occurrences, we choose the one with the smallest distances.
272    ///
273    /// Note that it *may* happen that we have several exit candidates referenced
274    /// more than once for one loop. This comes from the fact that whenever we
275    /// reach a node from which the loop entry is not reachable (without using a
276    /// backward edge leading to an outer loop entry), we register this node
277    /// as well as all its children as exit candidates.
278    /// Consider the following example:
279    /// ```text
280    /// while i < max {
281    ///     if cond {
282    ///         break;
283    ///     }
284    ///     s += i;
285    ///     i += 1
286    /// }
287    /// // All the below nodes are exit candidates (each of them is referenced twice)
288    /// s += 1;
289    /// return s;
290    /// ```
291    pub occurrences: Vec<usize>,
292}
293
294struct FilteredLoopParents {
295    remaining_parents: Vec<(src::BlockId, usize)>,
296    removed_parents: Vec<(src::BlockId, usize)>,
297}
298
299#[derive(Debug, Default, Clone)]
300struct ExitInfo {
301    is_loop_entry: bool,
302    is_switch_entry: bool,
303    /// The loop exit
304    loop_exit: Option<src::BlockId>,
305    /// Some loop exits actually belong to outer switches. We still need
306    /// to track them in the loop exits, in order to know when we should
307    /// insert a break. However, we need to make sure we don't add the
308    /// corresponding block in a sequence, after having translated the
309    /// loop, like so:
310    /// ```text
311    /// loop {
312    ///   loop_body
313    /// };
314    /// exit_blocks // OK if the exit "belongs" to the loop
315    /// ```
316    ///
317    /// In case the exit doesn't belong to the loop:
318    /// ```text
319    /// if b {
320    ///   loop {
321    ///     loop_body
322    ///   } // no exit blocks after the loop
323    /// }
324    /// else {
325    ///   ...
326    /// };
327    /// exit_blocks // the exit blocks are here
328    /// ```
329    owned_loop_exit: Option<src::BlockId>,
330    /// The switch exit.
331    /// Note that the switch exits are always owned.
332    owned_switch_exit: Option<src::BlockId>,
333}
334
335/// The exits of a graph
336#[derive(Debug, Clone)]
337struct ExitsInfo {
338    exits: IndexVec<BlockId, ExitInfo>,
339}
340
341impl ExitsInfo {
342    /// Check if a loop entry is reachable from a node, in a graph where we remove
343    /// the backward edges going directly to the loop entry.
344    ///
345    /// If the loop entry is not reachable, it means that:
346    /// - the loop entry is not reachable at all
347    /// - or it is only reachable through an outer loop
348    ///
349    /// The starting node should be a (transitive) child of the loop entry.
350    /// We use this to find candidates for loop exits.
351    fn loop_entry_is_reachable_from_inner(
352        cfg: &CfgInfo,
353        loop_entry: src::BlockId,
354        block_id: src::BlockId,
355    ) -> bool {
356        // It is reachable in the complete graph. Check if it is reachable by not
357        // going through backward edges which go to outer loops. In practice, we
358        // just need to forbid the use of any backward edges at the exception of
359        // those which go directly to the current loop's entry. This means that we
360        // ignore backward edges to outer loops of course, but also backward edges
361        // to inner loops because we shouldn't need to follow those (there should be
362        // more direct paths).
363        Dfs::new(&cfg.fwd_cfg, block_id)
364            .iter(&cfg.fwd_cfg)
365            .any(|bid| cfg.is_backward_edge(bid, loop_entry))
366    }
367
368    fn filter_loop_parents(
369        cfg: &CfgInfo,
370        parent_loops: &Vec<(src::BlockId, usize)>,
371        block_id: src::BlockId,
372    ) -> Option<FilteredLoopParents> {
373        let mut eliminate: usize = 0;
374        for (i, (loop_id, _)) in parent_loops.iter().enumerate().rev() {
375            if Self::loop_entry_is_reachable_from_inner(cfg, *loop_id, block_id) {
376                eliminate = i + 1;
377                break;
378            }
379        }
380
381        // Split the vector of parents
382        let (remaining_parents, removed_parents) = parent_loops.split_at(eliminate);
383        if !removed_parents.is_empty() {
384            let (mut remaining_parents, removed_parents) =
385                (remaining_parents.to_vec(), removed_parents.to_vec());
386
387            // Update the distance to the last loop - we just increment the distance
388            // by 1, because from the point of view of the parent loop, we just exited
389            // a block and go to the next sequence of instructions.
390            if !remaining_parents.is_empty() {
391                remaining_parents.last_mut().unwrap().1 += 1;
392            }
393
394            Some(FilteredLoopParents {
395                remaining_parents,
396                removed_parents,
397            })
398        } else {
399            None
400        }
401    }
402
403    /// Auxiliary helper
404    ///
405    /// Check if it is possible to reach the exit of an outer switch from `start_bid` without going
406    /// through the `exit_candidate`. We use the forward graph.
407    fn can_reach_outer_exit(
408        cfg: &CfgInfo,
409        outer_exits: &HashSet<src::BlockId>,
410        start_bid: src::BlockId,
411        exit_candidate: src::BlockId,
412    ) -> bool {
413        /// Graph that is identical to `Cfg` except that a chosen node is considered to have no neighbors.
414        struct GraphWithoutEdgesFrom<'a> {
415            graph: &'a Cfg,
416            special_node: BlockId,
417        }
418        impl GraphBase for GraphWithoutEdgesFrom<'_> {
419            type EdgeId = <Cfg as GraphBase>::EdgeId;
420            type NodeId = <Cfg as GraphBase>::NodeId;
421        }
422        impl IntoNeighbors for &GraphWithoutEdgesFrom<'_> {
423            type Neighbors = impl Iterator<Item = Self::NodeId>;
424            fn neighbors(self, a: Self::NodeId) -> Self::Neighbors {
425                if a == self.special_node {
426                    None
427                } else {
428                    Some(self.graph.neighbors(a))
429                }
430                .into_iter()
431                .flatten()
432            }
433        }
434        impl Visitable for GraphWithoutEdgesFrom<'_> {
435            type Map = <Cfg as Visitable>::Map;
436            fn visit_map(self: &Self) -> Self::Map {
437                self.graph.visit_map()
438            }
439            fn reset_map(self: &Self, map: &mut Self::Map) {
440                self.graph.reset_map(map);
441            }
442        }
443
444        // Do a DFS over the forward graph where we pretend that the exit candidate has no outgoing
445        // edges. If we reach an outer exit candidate in that graph then the exit candidate does not
446        // dominate the outer exit candidates in the forward graph starting from `start_bid`.
447        let graph = GraphWithoutEdgesFrom {
448            graph: &cfg.fwd_cfg,
449            special_node: exit_candidate,
450        };
451        Dfs::new(&graph, start_bid)
452            .iter(&graph)
453            .any(|bid| outer_exits.contains(&bid))
454    }
455    /// Register a node and its children as exit candidates for a list of
456    /// parent loops.
457    fn register_children_as_loop_exit_candidates(
458        cfg: &CfgInfo,
459        loop_exits: &mut HashMap<src::BlockId, SeqHashMap<src::BlockId, LoopExitCandidateInfo>>,
460        removed_parent_loops: &Vec<(src::BlockId, usize)>,
461        block_id: src::BlockId,
462    ) {
463        let mut base_dist = 0;
464        // For every parent loop, in reverse order (we go from last to first in
465        // order to correctly compute the distances)
466        for (loop_id, loop_dist) in removed_parent_loops.iter().rev() {
467            // Update the distance to the loop entry
468            base_dist += *loop_dist;
469
470            // Retrieve the candidates
471            let candidates = loop_exits.get_mut(loop_id).unwrap();
472
473            // Update them
474            for (bid, dist) in cfg.block_data(block_id).shortest_paths_including_self() {
475                let distance = base_dist + dist;
476                match candidates.get_mut(&bid) {
477                    None => {
478                        candidates.insert(
479                            bid,
480                            LoopExitCandidateInfo {
481                                occurrences: vec![distance],
482                            },
483                        );
484                    }
485                    Some(c) => {
486                        c.occurrences.push(distance);
487                    }
488                }
489            }
490        }
491    }
492
493    /// Compute the loop exit candidates.
494    ///
495    /// There may be several candidates with the same "optimality" (same number of
496    /// occurrences, etc.), in which case we choose the first one which was registered
497    /// (the order in which we explore the graph is deterministic): this is why we
498    /// store the candidates in a linked hash map.
499    fn compute_loop_exit_candidates(
500        cfg: &CfgInfo,
501        explored: &mut HashSet<src::BlockId>,
502        ordered_loops: &mut Vec<src::BlockId>,
503        loop_exits: &mut HashMap<src::BlockId, SeqHashMap<src::BlockId, LoopExitCandidateInfo>>,
504        // List of parent loops, with the distance to the entry of the loop (the distance
505        // is the distance between the current node and the loop entry for the last parent,
506        // and the distance between the parents for the others).
507        mut parent_loops: Vec<(src::BlockId, usize)>,
508        block_id: src::BlockId,
509    ) {
510        if explored.contains(&block_id) {
511            return;
512        }
513        explored.insert(block_id);
514
515        // Check if we enter a loop - add the corresponding node if necessary
516        if cfg.loop_entries.contains(&block_id) {
517            parent_loops.push((block_id, 1));
518            ordered_loops.push(block_id);
519        } else {
520            // Increase the distance with the parent loop
521            if !parent_loops.is_empty() {
522                parent_loops.last_mut().unwrap().1 += 1;
523            }
524        };
525
526        // Retrieve the children - note that we ignore the back edges
527        for child in cfg.fwd_cfg.neighbors(block_id) {
528            // If the parent loop entry is not reachable from the child without going
529            // through a backward edge which goes directly to the loop entry, consider
530            // this node a potential exit.
531            ensure_sufficient_stack(|| {
532                let new_parent_loops = match Self::filter_loop_parents(cfg, &parent_loops, child) {
533                    None => parent_loops.clone(),
534                    Some(fparent_loops) => {
535                        // We filtered some parent loops: it means this child and its
536                        // children are loop exit candidates for all those loops: we must
537                        // thus register them.
538                        // Note that we register the child *and* its children: the reason
539                        // is that we might do something *then* actually jump to the exit.
540                        // For instance, the following block of code:
541                        // ```
542                        // if cond { break; } else { ... }
543                        // ```
544                        //
545                        // Gets translated in MIR to something like this:
546                        // ```
547                        // bb1: {
548                        //   if cond -> bb2 else -> bb3; // bb2 is not the real exit
549                        // }
550                        //
551                        // bb2: {
552                        //   goto bb4; // bb4 is the real exit
553                        // }
554                        // ```
555                        Self::register_children_as_loop_exit_candidates(
556                            cfg,
557                            loop_exits,
558                            &fparent_loops.removed_parents,
559                            child,
560                        );
561                        fparent_loops.remaining_parents
562                    }
563                };
564                // Explore, with the filtered parents
565                Self::compute_loop_exit_candidates(
566                    cfg,
567                    explored,
568                    ordered_loops,
569                    loop_exits,
570                    new_parent_loops,
571                    child,
572                );
573            })
574        }
575    }
576
577    /// See [`compute_loop_switch_exits`] for
578    /// explanations about what "exits" are.
579    ///
580    /// The following function computes the loop exits. It acts as follows.
581    ///
582    /// We keep track of a stack of the loops in which we entered.
583    /// It is very easy to check when we enter a loop: loop entries are destinations
584    /// of backward edges, which can be spotted with a simple graph exploration (see
585    /// [`build_cfg_partial_info_edges`].
586    /// The criteria to consider whether we exit a loop is the following:
587    /// - we exit a loop if we go to a block from which we can't reach the loop
588    ///   entry at all
589    /// - or if we can reach the loop entry, but must use a backward edge which goes
590    ///   to an outer loop
591    ///
592    /// It is better explained on the following example:
593    /// ```text
594    /// 'outer while i < max {
595    ///     'inner while j < max {
596    ///        j += 1;
597    ///     }
598    ///     // (i)
599    ///     i += 1;
600    /// }
601    /// ```
602    /// If we enter the inner loop then go to (i) from the inner loop, we consider
603    /// that we exited the outer loop because:
604    /// - we can reach the entry of the inner loop from (i) (by finishing then
605    ///   starting again an iteration of the outer loop)
606    /// - but doing this requires taking a backward edge which goes to the outer loop
607    ///
608    /// Whenever we exit a loop, we save the block we went to as an exit candidate
609    /// for this loop. Note that there may by many exit candidates. For instance,
610    /// in the below example:
611    /// ```text
612    /// while ... {
613    ///    ...
614    ///    if ... {
615    ///        // We can't reach the loop entry from here: this is an exit
616    ///        // candidate
617    ///        return;
618    ///    }
619    /// }
620    /// // This is another exit candidate - and this is the one we want to use
621    /// // as the "real" exit...
622    /// ...
623    /// ```
624    ///
625    /// Also note that it may happen that we go several times to the same exit (if
626    /// we use breaks for instance): we record the number of times an exit candidate
627    /// is used.
628    ///
629    /// Once we listed all the exit candidates, we find the "best" one for every
630    /// loop, starting with the outer loops. We start with outer loops because
631    /// inner loops might use breaks to exit to the exit of outer loops: if we
632    /// start with the inner loops, the exit which is "natural" for the outer loop
633    /// might end up being used for one of the inner loops...
634    ///
635    /// The best exit is the following one:
636    /// - it is the one which is used the most times (note that there can be
637    ///   several candidates which are referenced strictly more than once: see the
638    ///   comment below)
639    /// - if several exits have the same number of occurrences, we choose the one
640    ///   for which we goto the "earliest" (earliest meaning that the goto is close to
641    ///   the loop entry node in the AST). The reason is that all the loops should
642    ///   have an outer if ... then ... else ... which executes the loop body or goes
643    ///   to the exit (note that this is not necessarily the first
644    ///   if ... then ... else ... we find: loop conditions can be arbitrary
645    ///   expressions, containing branchings).
646    ///
647    /// # Several candidates for a loop exit:
648    /// =====================================
649    /// There used to be a sanity check to ensure there are no two different
650    /// candidates with exactly the same number of occurrences and distance from
651    /// the entry of the loop, if the number of occurrences is > 1.
652    ///
653    /// We removed it because it does happen, for instance here (the match
654    /// introduces an `unreachable` node, and it has the same number of
655    /// occurrences and the same distance to the loop entry as the `panic`
656    /// node):
657    ///
658    /// ```text
659    /// pub fn list_nth_mut_loop_pair<'a, T>(
660    ///     mut ls: &'a mut List<T>,
661    ///     mut i: u32,
662    /// ) -> &'a mut T {
663    ///     loop {
664    ///         match ls {
665    ///             List::Nil => {
666    ///                 panic!() // <-- best candidate
667    ///             }
668    ///             List::Cons(x, tl) => {
669    ///                 if i == 0 {
670    ///                     return x;
671    ///                 } else {
672    ///                     ls = tl;
673    ///                     i -= 1;
674    ///                 }
675    ///             }
676    ///             _ => {
677    ///               // Note that Rustc always introduces an unreachable branch after
678    ///               // desugaring matches.
679    ///               unreachable!(), // <-- best candidate
680    ///             }
681    ///         }
682    ///     }
683    /// }
684    /// ```
685    ///
686    /// When this happens we choose an exit candidate whose edges don't necessarily
687    /// lead to an error (above there are none, so we don't choose any exits). Note
688    /// that this last condition is important to prevent loops from being unnecessarily
689    /// nested:
690    ///
691    /// ```text
692    /// pub fn nested_loops_enum(step_out: usize, step_in: usize) -> usize {
693    ///     let mut s = 0;
694    ///
695    ///     for _ in 0..128 { // We don't want this loop to be nested with the loops below
696    ///         s += 1;
697    ///     }
698    ///
699    ///     for _ in 0..(step_out) {
700    ///         for _ in 0..(step_in) {
701    ///             s += 1;
702    ///         }
703    ///     }
704    ///
705    ///     s
706    /// }
707    /// ```
708    fn compute_loop_exits(
709        ctx: &mut TransformCtx,
710        body: &src::ExprBody,
711        cfg: &CfgInfo,
712    ) -> HashMap<src::BlockId, Option<src::BlockId>> {
713        let mut explored = HashSet::new();
714        let mut ordered_loops = Vec::new();
715        let mut loop_exits = HashMap::new();
716
717        // Initialize the loop exits candidates
718        for loop_id in &cfg.loop_entries {
719            loop_exits.insert(*loop_id, SeqHashMap::new());
720        }
721
722        // Compute the candidates
723        Self::compute_loop_exit_candidates(
724            cfg,
725            &mut explored,
726            &mut ordered_loops,
727            &mut loop_exits,
728            Vec::new(),
729            src::BlockId::ZERO,
730        );
731
732        {
733            // Debugging
734            let candidates: Vec<String> = loop_exits
735                .iter()
736                .map(|(loop_id, candidates)| format!("{loop_id} -> {candidates:?}"))
737                .collect();
738            trace!("Loop exit candidates:\n{}", candidates.join("\n"));
739        }
740
741        // Choose one candidate among the potential candidates.
742        let mut exits: HashSet<src::BlockId> = HashSet::new();
743        let mut chosen_loop_exits: HashMap<src::BlockId, Option<src::BlockId>> = HashMap::new();
744        // For every loop
745        for loop_id in ordered_loops {
746            // Check the candidates.
747            // Ignore the candidates which have already been chosen as exits for other
748            // loops (which should be outer loops).
749            // We choose the exit with:
750            // - the most occurrences
751            // - the least total distance (if there are several possibilities)
752            // - doesn't necessarily lead to an error (panic, unreachable)
753
754            // First:
755            // - filter the candidates
756            // - compute the number of occurrences
757            // - compute the sum of distances
758            // TODO: we could simply order by using a lexicographic order
759            let loop_exits = loop_exits
760                .get(&loop_id)
761                .unwrap()
762                .iter()
763                // If candidate already selected for another loop: ignore
764                .filter(|(candidate_id, _)| !exits.contains(candidate_id))
765                .map(|(candidate_id, candidate_info)| {
766                    let num_occurrences = candidate_info.occurrences.len();
767                    let dist_sum = candidate_info.occurrences.iter().sum();
768                    (*candidate_id, num_occurrences, dist_sum)
769                })
770                .collect_vec();
771
772            trace!(
773                "Loop {}: possible exits:\n{}",
774                loop_id,
775                loop_exits
776                    .iter()
777                    .map(|(bid, occs, dsum)| format!(
778                        "{bid} -> {{ occurrences: {occs}, dist_sum: {dsum} }}",
779                    ))
780                    .collect::<Vec<String>>()
781                    .join("\n")
782            );
783
784            // Second: actually select the proper candidate.
785
786            // We find the one with the highest occurrence and the smallest distance
787            // from the entry of the loop (note that we take care of listing the exit
788            // candidates in a deterministic order).
789            let mut best_exit: Option<src::BlockId> = None;
790            let mut best_occurrences = 0;
791            let mut best_dist_sum = std::usize::MAX;
792            for (candidate_id, occurrences, dist_sum) in &loop_exits {
793                if (*occurrences > best_occurrences)
794                    || (*occurrences == best_occurrences && *dist_sum < best_dist_sum)
795                {
796                    best_exit = Some(*candidate_id);
797                    best_occurrences = *occurrences;
798                    best_dist_sum = *dist_sum;
799                }
800            }
801
802            let possible_candidates: Vec<_> = loop_exits
803                .iter()
804                .filter_map(|(bid, occs, dsum)| {
805                    if *occs == best_occurrences && *dsum == best_dist_sum {
806                        Some(*bid)
807                    } else {
808                        None
809                    }
810                })
811                .collect();
812            let num_possible_candidates = loop_exits.len();
813
814            // If there is exactly one one best candidate, it is easy.
815            // Otherwise we need to split further.
816            if num_possible_candidates > 1 {
817                trace!("Best candidates: {:?}", possible_candidates);
818                // TODO: if we use a lexicographic order we can merge this with the code
819                // above.
820                // Remove the candidates which only lead to errors (panic or unreachable).
821                let candidates: Vec<_> = possible_candidates
822                    .iter()
823                    .filter(|&&bid| !cfg.block_data[bid].only_reach_error)
824                    .collect();
825                // If there is exactly one candidate we select it
826                if candidates.len() == 1 {
827                    let exit_id = *candidates[0];
828                    exits.insert(exit_id);
829                    trace!("Loop {loop_id}: selected the best exit candidate {exit_id}");
830                    chosen_loop_exits.insert(loop_id, Some(exit_id));
831                } else {
832                    // Otherwise we do not select any exit.
833                    // We don't want to select any exit if we are in the below situation
834                    // (all paths lead to errors). We added a sanity check below to
835                    // catch the situations where there are several exits which don't
836                    // lead to errors.
837                    //
838                    // Example:
839                    // ========
840                    // ```
841                    // loop {
842                    //     match ls {
843                    //         List::Nil => {
844                    //             panic!() // <-- best candidate
845                    //         }
846                    //         List::Cons(x, tl) => {
847                    //             if i == 0 {
848                    //                 return x;
849                    //             } else {
850                    //                 ls = tl;
851                    //                 i -= 1;
852                    //             }
853                    //         }
854                    //         _ => {
855                    //           unreachable!(); // <-- best candidate (Rustc introduces an `unreachable` case)
856                    //         }
857                    //     }
858                    // }
859                    // ```
860                    //
861                    // Adding this sanity check so that we can see when there are
862                    // several candidates.
863                    let span = body.body[loop_id].terminator.span; // Taking *a* span from the block
864                    sanity_check!(ctx, span, candidates.is_empty());
865                    trace!(
866                        "Loop {loop_id}: did not select an exit candidate because they all lead to panics"
867                    );
868                    chosen_loop_exits.insert(loop_id, None);
869                }
870            } else {
871                // Register the exit, if there is one
872                if let Some(exit_id) = best_exit {
873                    exits.insert(exit_id);
874                }
875                chosen_loop_exits.insert(loop_id, best_exit);
876            }
877        }
878
879        // Return the chosen exits
880        trace!("Chosen loop exits: {:?}", chosen_loop_exits);
881        chosen_loop_exits
882    }
883
884    /// See [`compute_loop_switch_exits`] for
885    /// explanations about what "exits" are.
886    ///
887    /// In order to compute the switch exits, we simply recursively compute a
888    /// topologically ordered set of "filtered successors" as follows (note
889    /// that we work in the CFG *without* back edges):
890    /// - for a block which doesn't branch (only one successor), the filtered
891    ///   successors is the set of reachable nodes.
892    /// - for a block which branches, we compute the nodes reachable from all
893    ///   the children, and find the "best" intersection between those.
894    ///   Note that we find the "best" intersection (a pair of branches which
895    ///   maximize the intersection of filtered successors) because some branches
896    ///   might never join the control-flow of the other branches, if they contain
897    ///   a `break`, `return`, `panic`, etc., like here:
898    ///   ```text
899    ///   if b { x = 3; } { return; }
900    ///   y += x;
901    ///   ...
902    ///   ```
903    /// Note that with nested switches, the branches of the inner switches might
904    /// goto the exits of the outer switches: for this reason, we give precedence
905    /// to the outer switches.
906    fn compute_switch_exits(cfg: &CfgInfo) -> HashMap<src::BlockId, Option<src::BlockId>> {
907        // Compute the successors info map, starting at the root node
908        trace!(
909            "- cfg.cfg:\n{:?}\n- cfg.cfg_no_be:\n{:?}\n- cfg.switch_blocks:\n{:?}",
910            cfg.cfg, cfg.fwd_cfg, cfg.switch_blocks
911        );
912
913        // We need to give precedence to the outer switches: we thus iterate
914        // over the switch blocks in topological order.
915        let mut sorted_switch_blocks: BTreeSet<OrdBlockId> = BTreeSet::new();
916        for bid in cfg.switch_blocks.iter() {
917            sorted_switch_blocks.insert(cfg.make_ord_block_id(*bid));
918        }
919        trace!("sorted_switch_blocks: {:?}", sorted_switch_blocks);
920
921        // Debugging: print all the successors
922        {
923            trace!("Successors info:\n{}\n", {
924                let mut out = vec![];
925                for (bid, info) in cfg.block_data.iter_indexed() {
926                    out.push(format!("{} -> {{flow: {:?}}}", bid, &info.flow).to_string());
927                }
928                out.join("\n")
929            });
930        }
931
932        // For every node which is a switch, retrieve the exit.
933        // As the set of intersection of successors is topologically sorted, the
934        // exit should be the first node in the set (if the set is non empty).
935        // Also, we need to explore the nodes in topological order, to give
936        // precedence to the outer switches.
937        let mut exits_set = HashSet::new();
938        let mut exits = HashMap::new();
939        for bid in sorted_switch_blocks {
940            trace!("Finding exit candidate for: {bid:?}");
941            let bid = bid.id;
942            let block_data = &cfg.block_data[bid];
943            // Find the best successor: this is the node with the highest flow, and the
944            // highest reverse topological rank.
945            //
946            // Remark: in order to rank the nodes, we also use the negation of the
947            // rank given by the topological order. The last elements of the set
948            // have the highest flow, that is they are the nodes to which the maximum
949            // number of paths converge. If several nodes have the same flow, we want
950            // to take the highest one in the hierarchy: hence the use of the inverse
951            // of the topological rank.
952            //
953            // Ex.:
954            // ```text
955            // A  -- we start here
956            // |
957            // |---------------------------------------
958            // |            |            |            |
959            // B:(0.25,-1)  C:(0.25,-2)  D:(0.25,-3)  E:(0.25,-4)
960            // |            |            |
961            // |--------------------------
962            // |
963            // F:(0.75,-5)
964            // |
965            // |
966            // G:(0.75,-6)
967            // ```
968            // The "best" node (with the highest (flow, rank) in the graph above is F.
969            let switch_exit: Option<BlockWithRank<(BigRational, isize)>> = block_data
970                .reachable_excluding_self()
971                .map(|id| {
972                    let flow = block_data.flow[id].clone();
973                    let rank = -isize::try_from(cfg.topo_rank(id)).unwrap();
974                    BlockWithRank {
975                        rank: (flow, rank),
976                        id,
977                    }
978                })
979                .max();
980            let exit = if let Some(exit) = switch_exit {
981                // We have an exit candidate: check that it was not already
982                // taken by an external switch
983                trace!("{bid:?} has an exit candidate: {exit:?}");
984                if exits_set.contains(&exit.id) {
985                    trace!(
986                        "Ignoring the exit candidate because already taken by an external switch"
987                    );
988                    None
989                } else {
990                    // It was not taken by an external switch.
991                    //
992                    // We must check that we can't reach the exit of an external
993                    // switch from one of the branches, without going through the
994                    // exit candidate.
995                    // We do this by simply checking that we can't reach any exits
996                    // (and use the fact that we explore the switch by using a
997                    // topological order to not discard valid exit candidates).
998                    //
999                    // The reason is that it can lead to code like the following:
1000                    // ```
1001                    // if ... { // if #1
1002                    //   if ... { // if #2
1003                    //     ...
1004                    //     // here, we have a `goto b1`, where b1 is the exit
1005                    //     // of if #2: we thus stop translating the blocks.
1006                    //   }
1007                    //   else {
1008                    //     ...
1009                    //     // here, we have a `goto b2`, where b2 is the exit
1010                    //     // of if #1: we thus stop translating the blocks.
1011                    //   }
1012                    //   // We insert code for the block b1 here (which is the exit of
1013                    //   // the exit of if #2). However, this block should only
1014                    //   // be executed in the branch "then" of the if #2, not in
1015                    //   // the branch "else".
1016                    //   ...
1017                    // }
1018                    // else {
1019                    //   ...
1020                    // }
1021                    // ```
1022
1023                    // First: we do a quick check (does the set of all successors
1024                    // intersect the set of exits for outer blocks?). If yes, we do
1025                    // a more precise analysis: we check if we can reach the exit
1026                    // *without going through* the exit candidate.
1027                    let can_reach_exit = block_data
1028                        .reachable_excluding_self()
1029                        .any(|bid| exits_set.contains(&bid));
1030                    if !can_reach_exit || !Self::can_reach_outer_exit(cfg, &exits_set, bid, exit.id)
1031                    {
1032                        trace!("Keeping the exit candidate");
1033                        // No intersection: ok
1034                        exits_set.insert(exit.id);
1035                        Some(exit.id)
1036                    } else {
1037                        trace!(
1038                            "Ignoring the exit candidate because of an intersection with external switches"
1039                        );
1040                        None
1041                    }
1042                }
1043            } else {
1044                trace!("{bid:?} has no successors");
1045                None
1046            };
1047
1048            exits.insert(bid, exit);
1049        }
1050
1051        exits
1052    }
1053
1054    /// Compute the exits for the loops and the switches (switch on integer and
1055    /// if ... then ... else ...). We need to do this because control-flow in MIR
1056    /// is destructured: we have gotos everywhere.
1057    ///
1058    /// Let's consider the following piece of code:
1059    /// ```text
1060    /// if cond1 { ... } else { ... };
1061    /// if cond2 { ... } else { ... };
1062    /// ```
1063    /// Once converted to MIR, the control-flow is destructured, which means we
1064    /// have gotos everywhere. When reconstructing the control-flow, we have
1065    /// to be careful about the point where we should join the two branches of
1066    /// the first if.
1067    /// For instance, if we don't notice they should be joined at some point (i.e,
1068    /// whatever the branch we take, there is a moment when we go to the exact
1069    /// same place, just before the second if), we might generate code like
1070    /// this, with some duplicata:
1071    /// ```text
1072    /// if cond1 { ...; if cond2 { ... } else { ...} }
1073    /// else { ...; if cond2 { ... } else { ...} }
1074    /// ```
1075    ///
1076    /// Such a reconstructed program is valid, but it is definitely non-optimal:
1077    /// it is very different from the original program (making it less clean and
1078    /// clear), more bloated, and might involve duplicating the proof effort.
1079    ///
1080    /// For this reason, we need to find the "exit" of the first loop, which is
1081    /// the point where the two branches join. Note that this can be a bit tricky,
1082    /// because there may be more than two branches (if we do `switch(x) { ... }`),
1083    /// and some of them might not join (if they contain a `break`, `panic`,
1084    /// `return`, etc.).
1085    ///
1086    /// Finally, some similar issues arise for loops. For instance, let's consider
1087    /// the following piece of code:
1088    /// ```text
1089    /// while cond1 {
1090    ///   e1;
1091    ///   if cond2 {
1092    ///     break;
1093    ///   }
1094    ///   e2;
1095    /// }
1096    /// e3;
1097    /// return;
1098    /// ```
1099    ///
1100    /// Note that in MIR, the loop gets desugared to an if ... then ... else ....
1101    /// From the MIR, We want to generate something like this:
1102    /// ```text
1103    /// loop {
1104    ///   if cond1 {
1105    ///     e1;
1106    ///     if cond2 {
1107    ///       break;
1108    ///     }
1109    ///     e2;
1110    ///     continue;
1111    ///   }
1112    ///   else {
1113    ///     break;
1114    ///   }
1115    /// };
1116    /// e3;
1117    /// return;
1118    /// ```
1119    ///
1120    /// But if we don't pay attention, we might end up with that, once again with
1121    /// duplications:
1122    /// ```text
1123    /// loop {
1124    ///   if cond1 {
1125    ///     e1;
1126    ///     if cond2 {
1127    ///       e3;
1128    ///       return;
1129    ///     }
1130    ///     e2;
1131    ///     continue;
1132    ///   }
1133    ///   else {
1134    ///     e3;
1135    ///     return;
1136    ///   }
1137    /// }
1138    /// ```
1139    /// We thus have to notice that if the loop condition is false, we goto the same
1140    /// block as when following the goto introduced by the break inside the loop, and
1141    /// this block is dubbed the "loop exit".
1142    ///
1143    /// The following function thus computes the "exits" for loops and switches, which
1144    /// are basically the points where control-flow joins.
1145    fn compute(ctx: &mut TransformCtx, body: &src::ExprBody, cfg_info: &CfgInfo) -> Self {
1146        // Compute the loop exits
1147        let loop_exits = Self::compute_loop_exits(ctx, body, cfg_info);
1148        trace!("loop_exits:\n{:?}", loop_exits);
1149
1150        // Compute the switch exits
1151        let switch_exits = Self::compute_switch_exits(cfg_info);
1152        trace!("switch_exits:\n{:?}", switch_exits);
1153
1154        // Compute the exit info
1155        let mut exits_info = ExitsInfo {
1156            exits: body.body.map_ref(|_| Default::default()),
1157        };
1158
1159        // We need to give precedence to the outer switches and loops: we thus iterate
1160        // over the blocks in topological order.
1161        let mut sorted_blocks: BTreeSet<OrdBlockId> = BTreeSet::new();
1162        for bid in cfg_info
1163            .loop_entries
1164            .iter()
1165            .chain(cfg_info.switch_blocks.iter())
1166        {
1167            sorted_blocks.insert(cfg_info.make_ord_block_id(*bid));
1168        }
1169
1170        // Keep track of the exits which were already attributed
1171        let mut all_exits = HashSet::new();
1172
1173        // Put all this together
1174        for bid in sorted_blocks {
1175            let bid = bid.id;
1176            // Check if loop or switch block
1177            let exit_info = &mut exits_info.exits[bid];
1178            if cfg_info.loop_entries.contains(&bid) {
1179                exit_info.is_loop_entry = true;
1180                // This is a loop.
1181                //
1182                // For loops, we always register the exit (if there is one).
1183                // However, the exit may be owned by an outer switch (note
1184                // that we already took care of spreading the exits between
1185                // the inner/outer loops)
1186                let exit_id = loop_exits.get(&bid).unwrap();
1187                exit_info.loop_exit = *exit_id;
1188                // Check if we "own" the exit
1189                let exit_id = exit_id.filter(|exit_id| !all_exits.contains(exit_id));
1190                if let Some(exit_id) = exit_id {
1191                    all_exits.insert(exit_id);
1192                }
1193                exit_info.owned_loop_exit = exit_id;
1194            } else {
1195                exit_info.is_switch_entry = true;
1196                // For switches: check that the exit was not already given to a
1197                // loop
1198                let exit_id = switch_exits.get(&bid).unwrap();
1199                // Check if we "own" the exit
1200                let exit_id = exit_id.filter(|exit_id| !all_exits.contains(exit_id));
1201                if let Some(exit_id) = exit_id {
1202                    all_exits.insert(exit_id);
1203                }
1204                exit_info.owned_switch_exit = exit_id;
1205            }
1206        }
1207
1208        exits_info
1209    }
1210}
1211
1212enum GotoKind {
1213    Break(usize),
1214    Continue(usize),
1215    ExitBlock,
1216    Goto,
1217}
1218
1219struct ReconstructCtx<'a> {
1220    body: &'a src::ExprBody,
1221    exits_info: ExitsInfo,
1222    explored: HashSet<src::BlockId>,
1223    parent_loops: Vec<src::BlockId>,
1224    switch_exit_blocks: HashSet<src::BlockId>,
1225}
1226
1227impl<'a> ReconstructCtx<'a> {
1228    fn build(ctx: &mut TransformCtx, src_body: &'a src::ExprBody) -> Result<Self, Irreducible> {
1229        // Explore the function body to create the control-flow graph without backward
1230        // edges, and identify the loop entries (which are destinations of backward edges).
1231        let cfg_info = CfgInfo::build(&src_body.body)?;
1232
1233        // Find the exit block for all the loops and switches, if such an exit point exists.
1234        let exits_info = ExitsInfo::compute(ctx, src_body, &cfg_info);
1235
1236        // Translate the body by reconstructing the loops and the
1237        // conditional branchings.
1238        // Note that we shouldn't get `None`.
1239        Ok(ReconstructCtx {
1240            body: src_body,
1241            exits_info: exits_info,
1242            explored: HashSet::new(),
1243            parent_loops: Vec::new(),
1244            switch_exit_blocks: HashSet::new(),
1245        })
1246    }
1247
1248    fn get_goto_kind(&self, next_block_id: src::BlockId) -> GotoKind {
1249        // First explore the parent loops in revert order
1250        for (i, &loop_id) in self.parent_loops.iter().rev().enumerate() {
1251            // If we goto a loop entry node: this is a 'continue'
1252            if next_block_id == loop_id {
1253                return GotoKind::Continue(i);
1254            } else {
1255                // If we goto a loop exit node: this is a 'break'
1256                if let Some(exit_id) = self.exits_info.exits[loop_id].loop_exit {
1257                    if next_block_id == exit_id {
1258                        return GotoKind::Break(i);
1259                    }
1260                }
1261            }
1262        }
1263
1264        // Check if the goto exits the current block
1265        if self.switch_exit_blocks.contains(&next_block_id) {
1266            return GotoKind::ExitBlock;
1267        }
1268
1269        // Default
1270        GotoKind::Goto
1271    }
1272
1273    /// `parent_span`: we need some span data for the new statement.
1274    /// We use the one for the parent terminator.
1275    fn translate_child_block(&mut self, parent_span: Span, child_id: src::BlockId) -> tgt::Block {
1276        // Check if this is a backward call
1277        match self.get_goto_kind(child_id) {
1278            GotoKind::Break(index) => {
1279                let st = tgt::StatementKind::Break(index);
1280                tgt::Statement::new(parent_span, st).into_block()
1281            }
1282            GotoKind::Continue(index) => {
1283                let st = tgt::StatementKind::Continue(index);
1284                tgt::Statement::new(parent_span, st).into_block()
1285            }
1286            // If we are going to an exit block we simply ignore the goto
1287            GotoKind::ExitBlock => {
1288                let st = tgt::StatementKind::Nop;
1289                tgt::Statement::new(parent_span, st).into_block()
1290            }
1291            GotoKind::Goto => {
1292                // "Standard" goto: just recursively translate
1293                ensure_sufficient_stack(|| self.translate_block(child_id))
1294            }
1295        }
1296    }
1297
1298    fn translate_statement(&self, st: &src::Statement) -> Option<tgt::Statement> {
1299        let src_span = st.span;
1300        let st = match st.kind.clone() {
1301            src::StatementKind::Assign(place, rvalue) => tgt::StatementKind::Assign(place, rvalue),
1302            src::StatementKind::SetDiscriminant(place, variant_id) => {
1303                tgt::StatementKind::SetDiscriminant(place, variant_id)
1304            }
1305            src::StatementKind::CopyNonOverlapping(copy) => {
1306                tgt::StatementKind::CopyNonOverlapping(copy)
1307            }
1308            src::StatementKind::StorageLive(var_id) => tgt::StatementKind::StorageLive(var_id),
1309            src::StatementKind::StorageDead(var_id) => tgt::StatementKind::StorageDead(var_id),
1310            src::StatementKind::Deinit(place) => tgt::StatementKind::Deinit(place),
1311            src::StatementKind::Assert(assert) => tgt::StatementKind::Assert(assert),
1312            src::StatementKind::Nop => tgt::StatementKind::Nop,
1313        };
1314        Some(tgt::Statement::new(src_span, st))
1315    }
1316
1317    fn translate_terminator(&mut self, terminator: &src::Terminator) -> tgt::Block {
1318        let src_span = terminator.span;
1319
1320        match &terminator.kind {
1321            src::TerminatorKind::Abort(kind) => {
1322                tgt::Statement::new(src_span, tgt::StatementKind::Abort(kind.clone())).into_block()
1323            }
1324            src::TerminatorKind::Return => {
1325                tgt::Statement::new(src_span, tgt::StatementKind::Return).into_block()
1326            }
1327            src::TerminatorKind::UnwindResume => {
1328                tgt::Statement::new(src_span, tgt::StatementKind::Abort(AbortKind::Panic(None)))
1329                    .into_block()
1330            }
1331            src::TerminatorKind::Call {
1332                call,
1333                target,
1334                on_unwind: _,
1335            } => {
1336                // TODO: Have unwinds in the LLBC
1337                let st = tgt::Statement::new(src_span, tgt::StatementKind::Call(call.clone()));
1338                let mut block = self.translate_child_block(terminator.span, *target);
1339                block.statements.insert(0, st);
1340                block
1341            }
1342            src::TerminatorKind::Drop {
1343                kind,
1344                place,
1345                tref,
1346                target,
1347                on_unwind: _,
1348            } => {
1349                // TODO: Have unwinds in the LLBC
1350                let st = tgt::Statement::new(
1351                    src_span,
1352                    tgt::StatementKind::Drop(place.clone(), tref.clone(), kind.clone()),
1353                );
1354                let mut block = self.translate_child_block(terminator.span, *target);
1355                block.statements.insert(0, st);
1356                block
1357            }
1358            src::TerminatorKind::Goto { target } => {
1359                self.translate_child_block(terminator.span, *target)
1360            }
1361            src::TerminatorKind::Switch { discr, targets } => {
1362                // Translate the target expressions
1363                let switch = match &targets {
1364                    src::SwitchTargets::If(then_tgt, else_tgt) => {
1365                        let then_block = self.translate_child_block(terminator.span, *then_tgt);
1366                        let else_block = self.translate_child_block(terminator.span, *else_tgt);
1367                        tgt::Switch::If(discr.clone(), then_block, else_block)
1368                    }
1369                    src::SwitchTargets::SwitchInt(int_ty, targets, otherwise) => {
1370                        // Note that some branches can be grouped together, like
1371                        // here:
1372                        // ```
1373                        // match e {
1374                        //   E::V1 | E::V2 => ..., // Grouped
1375                        //   E::V3 => ...
1376                        // }
1377                        // ```
1378                        // We detect this by checking if a block has already been
1379                        // translated as one of the branches of the switch.
1380                        //
1381                        // Rk.: note there may be intermediate gotos depending
1382                        // on the MIR we use. Typically, we manage to detect the
1383                        // grouped branches with Optimized MIR, but not with Promoted
1384                        // MIR. See the comment in "tests/src/matches.rs".
1385
1386                        // We link block ids to:
1387                        // - vector of matched integer values
1388                        // - translated blocks
1389                        let mut branches: SeqHashMap<src::BlockId, (Vec<Literal>, tgt::Block)> =
1390                            SeqHashMap::new();
1391
1392                        // Translate the children expressions
1393                        for (v, bid) in targets.iter() {
1394                            // Check if the block has already been translated:
1395                            // if yes, it means we need to group branches
1396                            if branches.contains_key(bid) {
1397                                // Already translated: add the matched value to
1398                                // the list of values
1399                                let branch = branches.get_mut(bid).unwrap();
1400                                branch.0.push(v.clone());
1401                            } else {
1402                                // Not translated: translate it
1403                                let block = self.translate_child_block(terminator.span, *bid);
1404                                // We use the terminator span information in case then
1405                                // then statement is `None`
1406                                branches.insert(*bid, (vec![v.clone()], block));
1407                            }
1408                        }
1409                        let targets_blocks: Vec<(Vec<Literal>, tgt::Block)> =
1410                            branches.into_iter().map(|(_, x)| x).collect();
1411
1412                        let otherwise_block =
1413                            self.translate_child_block(terminator.span, *otherwise);
1414
1415                        // Translate
1416                        tgt::Switch::SwitchInt(
1417                            discr.clone(),
1418                            *int_ty,
1419                            targets_blocks,
1420                            otherwise_block,
1421                        )
1422                    }
1423                };
1424
1425                // Return
1426                let span = tgt::combine_switch_targets_span(&switch);
1427                let span = combine_span(&src_span, &span);
1428                let st = tgt::StatementKind::Switch(switch);
1429                tgt::Statement::new(span, st).into_block()
1430            }
1431        }
1432    }
1433
1434    /// Remark: some values are boxed (here, the returned statement) so that they
1435    /// are allocated on the heap. This reduces stack usage (we had problems with
1436    /// stack overflows in the past). A more efficient solution would be to use loops
1437    /// to make this code constant space, but that would require a serious rewriting.
1438    fn translate_block(&mut self, block_id: src::BlockId) -> tgt::Block {
1439        // If the user activated this check: check that we didn't already translate
1440        // this block, and insert the block id in the set of already translated blocks.
1441        self.explored.insert(block_id);
1442
1443        let block = &self.body.body[block_id];
1444
1445        let exit_info = &self.exits_info.exits[block_id];
1446        // Check if we enter a loop: if so, update parent_loops and the current_exit_block
1447        let is_loop = exit_info.is_loop_entry;
1448        // If we enter a switch or a loop, we need to check if we own the exit
1449        // block, in which case we need to append it to the loop/switch body
1450        // in a sequence
1451        let is_switch = exit_info.is_switch_entry;
1452        let next_block = if is_loop {
1453            exit_info.owned_loop_exit
1454        } else if is_switch {
1455            exit_info.owned_switch_exit
1456        } else {
1457            None
1458        };
1459
1460        let terminator = {
1461            if is_loop {
1462                self.parent_loops.push(block_id);
1463            }
1464            // If we enter a switch, add the exit block to the set of outer exit blocks.
1465            if is_switch && let Some(bid) = next_block {
1466                assert!(!self.switch_exit_blocks.contains(&bid));
1467                self.switch_exit_blocks.insert(bid);
1468            }
1469
1470            // Translate the terminator and the subsequent blocks.
1471            // Note that this terminator is an option: we might ignore it
1472            // (if it is an exit).
1473            let terminator = self.translate_terminator(&block.terminator);
1474
1475            // Reset the state to what it was previously.
1476            if is_loop {
1477                self.parent_loops.pop();
1478            }
1479            if is_switch && let Some(bid) = next_block {
1480                self.switch_exit_blocks.remove(&bid);
1481            }
1482            terminator
1483        };
1484
1485        // Translate the statements inside the block
1486        let statements = block
1487            .statements
1488            .iter()
1489            .filter_map(|st| self.translate_statement(st))
1490            .collect_vec();
1491
1492        // Prepend the statements to the terminator.
1493        let mut block = if let Some(st) = tgt::Block::from_seq(statements) {
1494            st.merge(terminator)
1495        } else {
1496            terminator
1497        };
1498
1499        if is_loop {
1500            // Put the loop body inside a `Loop`.
1501            block = tgt::Statement::new(block.span, tgt::StatementKind::Loop(block)).into_block()
1502        } else if !is_switch {
1503            assert!(next_block.is_none());
1504        }
1505
1506        // Concatenate the exit expression, if needs be
1507        if let Some(exit_block_id) = next_block {
1508            let next_block = ensure_sufficient_stack(|| self.translate_block(exit_block_id));
1509            block = block.merge(next_block);
1510        }
1511        block
1512    }
1513}
1514
1515fn translate_body(ctx: &mut TransformCtx, body: &mut gast::Body) {
1516    use gast::Body::{Structured, Unstructured};
1517    let Unstructured(src_body) = body else {
1518        panic!("Called `ullbc_to_llbc` on an already restructured body")
1519    };
1520    trace!("About to translate to ullbc: {:?}", src_body.span);
1521
1522    // Calculate info about the graph and heuristically determine loop and switch exit blocks.
1523    let mut ctx = match ReconstructCtx::build(ctx, src_body) {
1524        Ok(ctx) => ctx,
1525        Err(Irreducible(bid)) => {
1526            let span = src_body.body[bid].terminator.span;
1527            register_error!(
1528                ctx,
1529                span,
1530                "the control-flow graph of this function is not reducible"
1531            );
1532            panic!("can't reconstruct irreducible control-flow")
1533        }
1534    };
1535    // Translate the blocks using the computed data.
1536    let tgt_body = ctx.translate_block(src::BlockId::ZERO);
1537
1538    let tgt_body = tgt::ExprBody {
1539        span: src_body.span,
1540        locals: src_body.locals.clone(),
1541        bound_body_regions: src_body.bound_body_regions,
1542        body: tgt_body,
1543        comments: src_body.comments.clone(),
1544    };
1545    *body = Structured(tgt_body);
1546}
1547
1548pub struct Transform;
1549impl TransformPass for Transform {
1550    fn transform_ctx(&self, ctx: &mut TransformCtx) {
1551        // Translate the bodies one at a time.
1552        ctx.for_each_body(|ctx, body| {
1553            translate_body(ctx, body);
1554        });
1555
1556        if ctx.options.print_built_llbc {
1557            eprintln!("# LLBC resulting from control-flow reconstruction:\n\n{ctx}\n",);
1558        } else {
1559            trace!("# LLBC resulting from control-flow reconstruction:\n\n{ctx}\n",);
1560        }
1561    }
1562}
charon_lib/transform/control_flow/ullbc_to_llbc.rs

charon_lib/transform/control_flow/
ullbc_to_llbc.rs