Skip to main content

charon_lib/transform/control_flow/
ullbc_to_llbc.rs

1//! ULLBC to LLBC
2//!
3//! We reconstruct the control-flow in the Unstructured LLBC.
4//!
5//! The reconstruction algorithm is not written to be efficient (its complexity
6//! is probably very bad), but it was not written to be: this is still an early
7//! stage and we want the algorithm to generate the best reconstruction as
8//! possible. We still need to test the algorithm on more interesting examples,
9//! and will consider making it more efficient once it is a bit mature and well
10//! tested.
11//! Also note that we more importantly focus on making the algorithm sound: the
12//! reconstructed program must always be equivalent to the original MIR program,
13//! and the fact that the reconstruction preserves this property must be obvious.
14use itertools::Itertools;
15use petgraph::algo::dijkstra;
16use petgraph::algo::dominators::{Dominators, simple_fast};
17use petgraph::graphmap::DiGraphMap;
18use petgraph::visit::{
19    Dfs, DfsPostOrder, EdgeFiltered, EdgeRef, GraphRef, IntoNeighbors, VisitMap, Visitable, Walker,
20};
21use smallvec::SmallVec;
22use std::cmp::Reverse;
23use std::collections::{HashMap, HashSet};
24use std::mem;
25
26use crate::common::ensure_sufficient_stack;
27use crate::ids::IndexVec;
28use crate::llbc_ast::{self as tgt, StatementId};
29use crate::meta::{Span, combine_span};
30use crate::transform::TransformCtx;
31use crate::transform::ctx::TransformPass;
32use crate::ullbc_ast::{self as src, BlockId};
33use crate::{ast::*, register_error};
34
35pub enum StackAction<N> {
36    PopPath,
37    Explore(N),
38}
39pub struct DfsWithPath<N, VM> {
40    /// The stack of nodes to visit
41    pub stack: Vec<StackAction<N>>,
42    /// The map of discovered nodes
43    pub discovered: VM,
44    /// The path from start node to current node.
45    pub path: Vec<N>,
46}
47impl<N, VM> DfsWithPath<N, VM>
48where
49    N: Copy + PartialEq,
50    VM: VisitMap<N>,
51{
52    /// Create a new **DfsWithPath**, using the graph's visitor map, and put **start** in the stack
53    /// of nodes to visit.
54    pub fn new<G>(graph: G, start: N) -> Self
55    where
56        G: GraphRef + Visitable<NodeId = N, Map = VM>,
57    {
58        Self {
59            stack: vec![StackAction::Explore(start)],
60            discovered: graph.visit_map(),
61            path: vec![],
62        }
63    }
64
65    /// Return the next node in the dfs, or **None** if the traversal is done.
66    pub fn next<G>(&mut self, graph: G) -> Option<N>
67    where
68        G: IntoNeighbors<NodeId = N>,
69    {
70        while let Some(action) = self.stack.pop() {
71            match action {
72                StackAction::Explore(node) => {
73                    if self.discovered.visit(node) {
74                        self.path.push(node);
75                        self.stack.push(StackAction::PopPath);
76                        for succ in graph.neighbors(node) {
77                            if !self.discovered.is_visited(&succ) {
78                                self.stack.push(StackAction::Explore(succ));
79                            }
80                        }
81                        return Some(node);
82                    }
83                }
84                StackAction::PopPath => {
85                    self.path.pop();
86                }
87            }
88        }
89        None
90    }
91}
92
93/// Arbitrary-precision numbers
94type BigUint = fraction::DynaInt<u64, fraction::BigUint>;
95type BigRational = fraction::Ratio<BigUint>;
96
97/// Control-Flow Graph
98type Cfg = DiGraphMap<src::BlockId, ()>;
99
100/// Information precomputed about a function's CFG.
101#[derive(Debug)]
102struct CfgInfo {
103    /// The CFG
104    pub cfg: Cfg,
105    /// The CFG where all the backward edges have been removed. Aka "forward CFG".
106    pub fwd_cfg: Cfg,
107    /// We consider the destination of the backward edges to be loop entries and
108    /// store them here.
109    pub loop_entries: HashSet<src::BlockId>,
110    /// The blocks whose terminators are a switch are stored here.
111    pub switch_blocks: HashSet<src::BlockId>,
112    /// Tree of which nodes dominates which other nodes.
113    #[expect(unused)]
114    pub dominator_tree: Dominators<BlockId>,
115    /// Computed data about each block.
116    pub block_data: IndexVec<BlockId, Box<BlockData>>,
117}
118
119#[derive(Debug)]
120struct BlockData {
121    pub id: BlockId,
122    pub span: Span,
123    /// The (unique) entrypoints of each loop. Unique because we error on irreducible cfgs.
124    pub is_loop_header: bool,
125    /// Whether this block is a switch.
126    pub is_switch: bool,
127    /// Whether this block has multiple incoming control-flow edges in the forward graph.
128    pub is_merge_target: bool,
129    /// Order in a reverse postorder numbering. `None` if the block is unreachable.
130    pub reverse_postorder: Option<u32>,
131    /// Nodes that this block immediately dominates. Sorted by reverse_postorder_id, with largest
132    /// id first.
133    pub immediately_dominates: SmallVec<[BlockId; 2]>,
134    /// The nodes from `immediately_dominates` that are also merge targets. Sorted in the same
135    /// order.
136    pub immediately_dominated_merge_targets: SmallVec<[BlockId; 2]>,
137    /// List of loops inside of which this node is (loops are identified by their header). A node
138    /// is considered inside a loop if it is reachable from the loop header and if it can reach the
139    /// loop header using only the backwards edges into it (i.e. we don't count a path that enters
140    /// the loop header through a forward edge).
141    ///
142    /// Note that we might have to take a backward edge to reach the loop header, e.g.:
143    ///   'a: loop {
144    ///       // ...
145    ///       'b: loop {
146    ///           // ...
147    ///           if true {
148    ///               continue 'a;
149    ///           } else {
150    ///               if true {
151    ///                   break 'a;
152    ///               }
153    ///               // This node has to take two backward edges in order to reach the start of `'a`.
154    ///           }
155    ///       }
156    ///   }
157    ///
158    /// The restriction on backwards edges is for the following case:
159    ///   loop {
160    ///     loop {
161    ///       ..
162    ///     }
163    ///     // Not in inner loop
164    ///   }
165    ///
166    /// This is sorted by path order from the graph root.
167    pub within_loops: SmallVec<[BlockId; 2]>,
168    /// Node from where we can only reach error nodes (panic, etc.)
169    // TODO: track more nicely the set of targets reachable from a node: panic, return, exit loop,
170    // continue loop (this is a partial order).
171    pub only_reach_error: bool,
172    /// List of reachable nodes, with the length of shortest path to them. Includes the current
173    /// node.
174    pub shortest_paths: hashbrown::HashMap<BlockId, usize>,
175    /// Let's say we put a quantity of water equal to 1 on the block, and the water flows downards.
176    /// Whenever there is a branching, the quantity of water gets equally divided between the
177    /// branches. When the control flows join, we put the water back together. The set below
178    /// computes the amount of water received by each descendant of the node.
179    ///
180    /// TODO: there must be a known algorithm which computes this, right?...
181    /// This is exactly this problems:
182    /// <https://stackoverflow.com/questions/78221666/algorithm-for-total-flow-through-weighted-directed-acyclic-graph>
183    /// TODO: the way I compute this is not efficient.
184    pub flow: IndexVec<BlockId, BigRational>,
185    /// Reconstructed information about loops and switches.
186    pub exit_info: ExitInfo,
187}
188
189#[derive(Debug, Default, Clone)]
190struct ExitInfo {
191    /// The loop exit
192    loop_exit: Option<src::BlockId>,
193    /// The switch exit.
194    switch_exit: Option<src::BlockId>,
195}
196
197/// Error indicating that the control-flow graph is not reducible. The contained block id is a
198/// block involved in an irreducible subgraph.
199struct Irreducible(BlockId);
200
201impl CfgInfo {
202    /// Build the CFGs (the "regular" CFG and the CFG without backward edges) and precompute a
203    /// bunch of graph information about the CFG.
204    fn build(ctx: &TransformCtx, body: &src::BodyContents) -> Result<Self, Irreducible> {
205        // The steps in this function follow a precise order, as each step typically requires the
206        // previous one.
207        let start_block = BlockId::ZERO;
208
209        let empty_flow = body.map_ref(|_| BigRational::new(0u64.into(), 1u64.into()));
210        let mut block_data: IndexVec<BlockId, _> = body.map_ref_indexed(|id, contents| {
211            Box::new(BlockData {
212                id,
213                span: contents.terminator.span,
214                is_loop_header: false,
215                is_switch: false,
216                is_merge_target: false,
217                reverse_postorder: None,
218                immediately_dominates: Default::default(),
219                immediately_dominated_merge_targets: Default::default(),
220                within_loops: Default::default(),
221                only_reach_error: false,
222                shortest_paths: Default::default(),
223                flow: empty_flow.clone(),
224                exit_info: Default::default(),
225            })
226        });
227
228        // Build the node graph (we ignore unwind paths for now).
229        let mut cfg = Cfg::new();
230        for (block_id, block) in body.iter_enumerated() {
231            cfg.add_node(block_id);
232            for tgt in block.targets_ignoring_unwind() {
233                cfg.add_edge(block_id, tgt, ());
234            }
235        }
236
237        // Compute the dominator tree.
238        let dominator_tree = simple_fast(&cfg, start_block);
239
240        // Compute reverse postorder numbering.
241        for (i, block_id) in DfsPostOrder::new(&cfg, start_block).iter(&cfg).enumerate() {
242            let rev_post_id = body.len() - i;
243            block_data[block_id].reverse_postorder = Some(rev_post_id.try_into().unwrap());
244
245            // Store the dominator tree in `block_data`.
246            if let Some(dominator) = dominator_tree.immediate_dominator(block_id) {
247                block_data[dominator].immediately_dominates.push(block_id);
248            }
249        }
250
251        // Compute the forward graph (without backward edges). We do a dfs while keeping track of
252        // the path from the start node.
253        let mut fwd_cfg = Cfg::new();
254        let mut loop_entries = HashSet::new();
255        let mut switch_blocks = HashSet::new();
256        for block_id in Dfs::new(&cfg, start_block).iter(&cfg) {
257            fwd_cfg.add_node(block_id);
258
259            if body[block_id].terminator.kind.is_switch() {
260                switch_blocks.insert(block_id);
261                block_data[block_id].is_switch = true;
262            }
263
264            // Iterate over edges into this node (so that we can determine whether this node is a
265            // loop header).
266            let mut incoming_fwd_edges = 0;
267            for from in cfg.neighbors_directed(block_id, petgraph::Direction::Incoming) {
268                // Check if the edge is a backward edge.
269                if block_data[from].reverse_postorder >= block_data[block_id].reverse_postorder {
270                    // This is a backward edge
271                    block_data[block_id].is_loop_header = true;
272                    loop_entries.insert(block_id);
273                    // A cfg is reducible iff the target of every back edge dominates the
274                    // edge's source.
275                    if !dominator_tree.dominators(from).unwrap().contains(&block_id) {
276                        return Err(Irreducible(from));
277                    }
278                } else {
279                    incoming_fwd_edges += 1;
280                    fwd_cfg.add_edge(from, block_id, ());
281                }
282            }
283
284            // Detect merge targets.
285            if incoming_fwd_edges >= 2 {
286                block_data[block_id].is_merge_target = true;
287            }
288        }
289
290        // Finish filling in information.
291        for block_id in DfsPostOrder::new(&fwd_cfg, start_block).iter(&fwd_cfg) {
292            let block = &body[block_id];
293            let targets = cfg.neighbors(block_id).collect_vec();
294            let fwd_targets = fwd_cfg.neighbors(block_id).collect_vec();
295
296            // Compute the nodes that can only reach error nodes.
297            // The node can only reach error nodes if:
298            // - it is an error node;
299            // - or it has neighbors and they all lead to errors.
300            // Note that if there is a backward edge, `only_reach_error` cannot contain this
301            // node yet. In other words, this does not consider infinite loops as reaching an
302            // error node.
303            if block.terminator.is_error()
304                || (!targets.is_empty()
305                    && targets.iter().all(|&tgt| block_data[tgt].only_reach_error))
306            {
307                block_data[block_id].only_reach_error = true;
308            }
309
310            // Compute the flows between each pair of nodes.
311            let mut flow: IndexVec<src::BlockId, BigRational> =
312                mem::take(&mut block_data[block_id].flow);
313            // The flow to self is 1.
314            flow[block_id] = BigRational::new(1u64.into(), 1u64.into());
315            // Divide the flow from each child to a given target block by the number of children.
316            // This is a sparse matrix multiplication and could be implemented using a linalg
317            // library.
318            let num_children: BigUint = fwd_targets.len().into();
319            for &child in &fwd_targets {
320                for grandchild in block_data[child].reachable_including_self() {
321                    // Flow from `child` to `grandchild`
322                    flow[grandchild] += &block_data[child].flow[grandchild] / &num_children;
323                }
324            }
325            block_data[block_id].flow = flow;
326
327            // Compute shortest paths to all reachable nodes in the forward graph.
328            block_data[block_id].shortest_paths = dijkstra(&fwd_cfg, block_id, None, |_| 1usize);
329
330            // Fill in the rest of the domination data.
331            let mut dominatees = mem::take(&mut block_data[block_id].immediately_dominates);
332            dominatees.sort_by_key(|&child| block_data[child].reverse_postorder);
333            dominatees.reverse();
334            block_data[block_id].immediately_dominates = dominatees;
335            block_data[block_id].immediately_dominated_merge_targets = block_data[block_id]
336                .immediately_dominates
337                .iter()
338                .copied()
339                .filter(|&child| block_data[child].is_merge_target)
340                .collect();
341        }
342
343        // Fill in the within_loop information. See the docs of `within_loops` to understand what
344        // we're computing.
345        let mut path_dfs = DfsWithPath::new(&cfg, start_block);
346        while let Some(block_id) = path_dfs.next(&cfg) {
347            // Store all the loops on the path to this
348            // node.
349            let mut within_loops: SmallVec<_> = path_dfs
350                .path
351                .iter()
352                .copied()
353                .filter(|&loop_id| block_data[loop_id].is_loop_header)
354                .collect();
355            // The loops that we can reach by taking a single backward edge.
356            let loops_directly_within = within_loops
357                .iter()
358                .copied()
359                .filter(|&loop_header| {
360                    cfg.neighbors_directed(loop_header, petgraph::Direction::Incoming)
361                        .any(|bid| block_data[block_id].shortest_paths.contains_key(&bid))
362                })
363                .collect_vec();
364            // The loops that we can reach by taking any number of backward edges.
365            let loops_indirectly_within: HashSet<_> = loops_directly_within
366                .iter()
367                .copied()
368                .flat_map(|loop_header| &block_data[loop_header].within_loops)
369                .chain(&loops_directly_within)
370                .copied()
371                .collect();
372            within_loops.retain(|id| loops_indirectly_within.contains(id));
373            block_data[block_id].within_loops = within_loops;
374        }
375
376        let mut cfg = CfgInfo {
377            cfg,
378            fwd_cfg,
379            loop_entries,
380            switch_blocks,
381            dominator_tree,
382            block_data,
383        };
384
385        // Pick an exit block for each loop, if we find one.
386        ExitInfo::compute_loop_exits(ctx, &mut cfg);
387
388        // Pick an exit block for each switch, if we find one.
389        ExitInfo::compute_switch_exits(&mut cfg);
390
391        Ok(cfg)
392    }
393
394    fn block_data(&self, block_id: BlockId) -> &BlockData {
395        &self.block_data[block_id]
396    }
397    // fn can_reach(&self, src: BlockId, tgt: BlockId) -> bool {
398    //     self.block_data[src].shortest_paths.contains_key(&tgt)
399    // }
400    fn topo_rank(&self, block_id: BlockId) -> u32 {
401        self.block_data[block_id].reverse_postorder.unwrap()
402    }
403    #[expect(unused)]
404    fn is_backward_edge(&self, src: BlockId, tgt: BlockId) -> bool {
405        self.block_data[src].reverse_postorder >= self.block_data[tgt].reverse_postorder
406            && self.cfg.contains_edge(src, tgt)
407    }
408
409    /// Check if the node is within the given loop.
410    fn is_within_loop(&self, loop_header: src::BlockId, block_id: src::BlockId) -> bool {
411        self.block_data[block_id]
412            .within_loops
413            .contains(&loop_header)
414    }
415
416    /// Check if all paths from `src` to nodes in `target_set` go through `through_node`. If `src`
417    /// is already in `target_set`, we ignore that empty path.
418    fn all_paths_go_through(
419        &self,
420        src: src::BlockId,
421        through_node: src::BlockId,
422        target_set: &HashSet<src::BlockId>,
423    ) -> bool {
424        let graph = EdgeFiltered::from_fn(&self.fwd_cfg, |edge| edge.source() != through_node);
425        !Dfs::new(&graph, src)
426            .iter(&graph)
427            .skip(1) // skip src
428            .any(|bid| target_set.contains(&bid))
429    }
430}
431
432impl BlockData {
433    fn shortest_paths_including_self(&self) -> impl Iterator<Item = (BlockId, usize)> {
434        self.shortest_paths.iter().map(|(bid, d)| (*bid, *d))
435    }
436    fn shortest_paths_excluding_self(&self) -> impl Iterator<Item = (BlockId, usize)> {
437        self.shortest_paths_including_self()
438            .filter(move |&(bid, _)| bid != self.id)
439    }
440    fn reachable_including_self(&self) -> impl Iterator<Item = BlockId> {
441        self.shortest_paths_including_self().map(|(bid, _)| bid)
442    }
443    fn reachable_excluding_self(&self) -> impl Iterator<Item = BlockId> {
444        self.shortest_paths_excluding_self().map(|(bid, _)| bid)
445    }
446    #[expect(unused)]
447    fn can_reach_excluding_self(&self, other: BlockId) -> bool {
448        self.shortest_paths.contains_key(&other) && self.id != other
449    }
450}
451
452/// See [`ExitInfo::compute_loop_exit_ranks`].
453#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
454struct LoopExitRank {
455    /// Number of paths we found going to this exit.
456    path_count: usize,
457    /// Distance from the loop header.
458    distance_from_header: Reverse<usize>,
459}
460
461impl ExitInfo {
462    /// Compute the first node on each path that exits the loop.
463    fn compute_loop_exit_starting_points(
464        cfg: &CfgInfo,
465        loop_header: src::BlockId,
466    ) -> Vec<src::BlockId> {
467        let mut loop_exits = Vec::new();
468        // Do a dfs from the loop header while keeping track of the path from the loop header to
469        // the current node.
470        let mut dfs = Dfs::new(&cfg.fwd_cfg, loop_header);
471        while let Some(block_id) = dfs.next(&cfg.fwd_cfg) {
472            // If we've exited all the loops after and including the target one, this node is an
473            // exit node for the target loop.
474            if !cfg.is_within_loop(loop_header, block_id) {
475                loop_exits.push(block_id);
476                // Don't explore any more paths from this node.
477                dfs.discovered.extend(cfg.fwd_cfg.neighbors(block_id));
478            }
479        }
480        loop_exits
481    }
482
483    /// Compute the loop exit candidates along with a rank.
484    ///
485    /// In the simple case, there is one exit node through which all exit paths go. We want to be
486    /// sure to catch that case, and when that's not possible we want to still find a node through
487    /// which a lot of exit paths go.
488    ///
489    /// To do that, we first count for each exit node how many exit paths go through it, and pick
490    /// the node with most occurrences. If there are many such nodes, we pick the one with shortest
491    /// distance from the loop header. Finally if there are still many such nodes, we keep the
492    /// first node found (the order in which we explore the graph is deterministic, and we use an
493    /// insertion-order hash map).
494    ///
495    /// Note that exit candidates will typically be referenced more than once for one loop. This
496    /// comes from the fact that whenever we reach a node outside the current loop, we register
497    /// this node as well as all its children as exit candidates.
498    /// Consider the following example:
499    /// ```text
500    /// while i < max {
501    ///     if cond {
502    ///         break;
503    ///     }
504    ///     s += i;
505    ///     i += 1
506    /// }
507    /// // All the below nodes are exit candidates (each of them is referenced twice)
508    /// s += 1;
509    /// return s;
510    /// ```
511    fn compute_loop_exit_ranks(
512        cfg: &CfgInfo,
513        loop_header: src::BlockId,
514    ) -> SeqHashMap<src::BlockId, LoopExitRank> {
515        let mut loop_exits: SeqHashMap<BlockId, LoopExitRank> = SeqHashMap::new();
516        for block_id in Self::compute_loop_exit_starting_points(cfg, loop_header) {
517            for bid in cfg.block_data(block_id).reachable_including_self() {
518                loop_exits
519                    .entry(bid)
520                    .or_insert_with(|| LoopExitRank {
521                        path_count: 0,
522                        distance_from_header: Reverse(
523                            cfg.block_data[loop_header].shortest_paths[&bid],
524                        ),
525                    })
526                    .path_count += 1;
527            }
528        }
529        loop_exits
530    }
531
532    /// A loop exit is any block reachable from the loop header that isn't inside the loop.
533    /// This function choses an exit for every loop. See `compute_loop_exit_ranks` for how we
534    /// select them.
535    ///
536    /// For example:
537    /// ```text
538    /// while ... {
539    ///    ...
540    ///    if ... {
541    ///        // We can't reach the loop entry from here: this is an exit
542    ///        // candidate
543    ///        return;
544    ///    }
545    /// }
546    /// // This is another exit candidate - and this is the one we want to use
547    /// // as the "real" exit...
548    /// ...
549    /// ```
550    ///
551    /// Once we listed all the exit candidates, we find the "best" one for every loop. The best
552    /// exit is the following one:
553    /// - it is the one which is used the most times (note that there can be
554    ///   several candidates which are referenced strictly more than once: see the
555    ///   comment below)
556    /// - if several exits have the same number of occurrences, we choose the one
557    ///   for which we goto the "earliest" (earliest meaning that the goto is close to
558    ///   the loop entry node in the AST). The reason is that all the loops should
559    ///   have an outer if ... then ... else ... which executes the loop body or goes
560    ///   to the exit (note that this is not necessarily the first
561    ///   if ... then ... else ... we find: loop conditions can be arbitrary
562    ///   expressions, containing branchings).
563    ///
564    /// # Several candidates for a loop exit:
565    /// =====================================
566    /// There used to be a sanity check to ensure there are no two different
567    /// candidates with exactly the same number of occurrences and distance from
568    /// the entry of the loop, if the number of occurrences is > 1.
569    ///
570    /// We removed it because it does happen, for instance here (the match
571    /// introduces an `unreachable` node, and it has the same number of
572    /// occurrences and the same distance to the loop entry as the `panic`
573    /// node):
574    ///
575    /// ```text
576    /// pub fn list_nth_mut_loop_pair<'a, T>(
577    ///     mut ls: &'a mut List<T>,
578    ///     mut i: u32,
579    /// ) -> &'a mut T {
580    ///     loop {
581    ///         match ls {
582    ///             List::Nil => {
583    ///                 panic!() // <-- best candidate
584    ///             }
585    ///             List::Cons(x, tl) => {
586    ///                 if i == 0 {
587    ///                     return x;
588    ///                 } else {
589    ///                     ls = tl;
590    ///                     i -= 1;
591    ///                 }
592    ///             }
593    ///             _ => {
594    ///               // Note that Rustc always introduces an unreachable branch after
595    ///               // desugaring matches.
596    ///               unreachable!(), // <-- best candidate
597    ///             }
598    ///         }
599    ///     }
600    /// }
601    /// ```
602    ///
603    /// When this happens we choose an exit candidate whose edges don't necessarily
604    /// lead to an error (above there are none, so we don't choose any exits). Note
605    /// that this last condition is important to prevent loops from being unnecessarily
606    /// nested:
607    ///
608    /// ```text
609    /// pub fn nested_loops_enum(step_out: usize, step_in: usize) -> usize {
610    ///     let mut s = 0;
611    ///
612    ///     for _ in 0..128 { // We don't want this loop to be nested with the loops below
613    ///         s += 1;
614    ///     }
615    ///
616    ///     for _ in 0..(step_out) {
617    ///         for _ in 0..(step_in) {
618    ///             s += 1;
619    ///         }
620    ///     }
621    ///
622    ///     s
623    /// }
624    /// ```
625    fn compute_loop_exits(_ctx: &TransformCtx, cfg: &mut CfgInfo) {
626        for &loop_id in &cfg.loop_entries {
627            // Compute the candidates.
628            let loop_exits: SeqHashMap<BlockId, LoopExitRank> =
629                Self::compute_loop_exit_ranks(cfg, loop_id);
630            // We choose the exit with:
631            // - the most occurrences
632            // - the least total distance (if there are several possibilities)
633            // - doesn't necessarily lead to an error (panic, unreachable)
634            let best_exits: Vec<(BlockId, LoopExitRank)> =
635                loop_exits.into_iter().max_set_by_key(|&(_, rank)| rank);
636            // If there is exactly one best candidate, use it. Otherwise we need to split further.
637            let chosen_exit = match best_exits.into_iter().map(|(bid, _)| bid).exactly_one() {
638                Ok(best_exit) => Some(best_exit),
639                Err(best_exits) => {
640                    // Remove the candidates which only lead to errors (panic or unreachable).
641                    // If there is exactly one candidate we select it, otherwise we do not select any
642                    // exit.
643                    // We don't want to select any exit if we are in the below situation
644                    // (all paths lead to errors). We added a sanity check below to
645                    // catch the situations where there are several exits which don't
646                    // lead to errors.
647                    //
648                    // Example:
649                    // ========
650                    // ```
651                    // loop {
652                    //     match ls {
653                    //         List::Nil => {
654                    //             panic!() // <-- best candidate
655                    //         }
656                    //         List::Cons(x, tl) => {
657                    //             if i == 0 {
658                    //                 return x;
659                    //             } else {
660                    //                 ls = tl;
661                    //                 i -= 1;
662                    //             }
663                    //         }
664                    //         _ => {
665                    //           unreachable!(); // <-- best candidate (Rustc introduces an `unreachable` case)
666                    //         }
667                    //     }
668                    // }
669                    // ```
670                    // The `exactly_one` can fail, see `tests/ui/control-flow/ambiguous-loop-exit.rs`
671                    best_exits
672                        .filter(|&bid| !cfg.block_data[bid].only_reach_error)
673                        .exactly_one()
674                        .ok()
675                }
676            };
677            cfg.block_data[loop_id].exit_info.loop_exit = chosen_exit;
678        }
679    }
680
681    /// Let's consider the following piece of code:
682    /// ```text
683    /// if cond1 { ... } else { ... };
684    /// if cond2 { ... } else { ... };
685    /// ```
686    /// Once converted to MIR, the control-flow is destructured, which means we
687    /// have gotos everywhere. When reconstructing the control-flow, we have
688    /// to be careful about the point where we should join the two branches of
689    /// the first if.
690    /// For instance, if we don't notice they should be joined at some point (i.e,
691    /// whatever the branch we take, there is a moment when we go to the exact
692    /// same place, just before the second if), we might generate code like
693    /// this, with some duplicata:
694    /// ```text
695    /// if cond1 { ...; if cond2 { ... } else { ...} }
696    /// else { ...; if cond2 { ... } else { ...} }
697    /// ```
698    ///
699    /// Such a reconstructed program is valid, but it is definitely non-optimal:
700    /// it is very different from the original program (making it less clean and
701    /// clear), more bloated, and might involve duplicating the proof effort.
702    ///
703    /// For this reason, we need to find the "exit" of the first switch, which is
704    /// the point where the two branches join. Note that this can be a bit tricky,
705    /// because there may be more than two branches (if we do `switch(x) { ... }`),
706    /// and some of them might not join (if they contain a `break`, `panic`,
707    /// `return`, etc.).
708    ///
709    /// In order to compute the switch exits, we simply recursively compute a
710    /// topologically ordered set of "filtered successors" as follows (note
711    /// that we work in the CFG *without* back edges):
712    /// - for a block which doesn't branch (only one successor), the filtered
713    ///   successors is the set of reachable nodes.
714    /// - for a block which branches, we compute the nodes reachable from all
715    ///   the children, and find the "best" intersection between those.
716    ///   Note that we find the "best" intersection (a pair of branches which
717    ///   maximize the intersection of filtered successors) because some branches
718    ///   might never join the control-flow of the other branches, if they contain
719    ///   a `break`, `return`, `panic`, etc., like here:
720    ///   ```text
721    ///   if b { x = 3; } { return; }
722    ///   y += x;
723    ///   ...
724    ///   ```
725    /// Note that with nested switches, the branches of the inner switches might
726    /// goto the exits of the outer switches: for this reason, we give precedence
727    /// to the outer switches.
728    fn compute_switch_exits(cfg: &mut CfgInfo) {
729        // We need to give precedence to the outer switches: we thus iterate
730        // over the switch blocks in topological order.
731        let mut exits_set = HashSet::new();
732        for bid in cfg
733            .switch_blocks
734            .iter()
735            .copied()
736            .sorted_unstable_by_key(|&bid| (cfg.topo_rank(bid), bid))
737        {
738            let block_data = &cfg.block_data[bid];
739            // Find the best successor: this is the node with the highest flow, and the lowest
740            // topological rank. If several nodes have the same flow, we want to take the highest
741            // one in the hierarchy: hence the use of the topological rank.
742            //
743            // Ex.:
744            // ```text
745            // A  -- we start here
746            // |
747            // |---------------------------------------
748            // |            |            |            |
749            // B:(0.25,-1)  C:(0.25,-2)  D:(0.25,-3)  E:(0.25,-4)
750            // |            |            |
751            // |--------------------------
752            // |
753            // F:(0.75,-5)
754            // |
755            // |
756            // G:(0.75,-6)
757            // ```
758            // The "best" node (with the highest (flow, rank) in the graph above is F.
759            // If the switch is inside a loop, we also only consider exists that are inside that
760            // same loop. There must be one, otherwise the switch entry would not be inside the
761            // loop.
762            let current_loop = block_data.within_loops.last().copied();
763            let best_exit: Option<BlockId> = block_data
764                .reachable_excluding_self()
765                .filter(|&b| {
766                    current_loop.is_none_or(|current_loop| cfg.is_within_loop(current_loop, b))
767                })
768                .max_by_key(|&id| {
769                    let flow = &block_data.flow[id];
770                    let rank = Reverse(cfg.topo_rank(id));
771                    ((flow, rank), id)
772                });
773            // We have an exit candidate: we first check that it was not already taken by an
774            // external switch.
775            //
776            // We then check that we can't reach the exit of an external switch from one of the
777            // branches, without going through the exit candidate. We do this by simply checking
778            // that we can't reach any of the exits of outer switches.
779            //
780            // The reason is that it can lead to code like the following:
781            // ```
782            // if ... { // if #1
783            //   if ... { // if #2
784            //     ...
785            //     // here, we have a `goto b1`, where b1 is the exit
786            //     // of if #2: we thus stop translating the blocks.
787            //   }
788            //   else {
789            //     ...
790            //     // here, we have a `goto b2`, where b2 is the exit
791            //     // of if #1: we thus stop translating the blocks.
792            //   }
793            //   // We insert code for the block b1 here (which is the exit of
794            //   // the exit of if #2). However, this block should only
795            //   // be executed in the branch "then" of the if #2, not in
796            //   // the branch "else".
797            //   ...
798            // }
799            // else {
800            //   ...
801            // }
802            // ```
803            if let Some(exit_id) = best_exit
804                && !exits_set.contains(&exit_id)
805                && cfg.all_paths_go_through(bid, exit_id, &exits_set)
806            {
807                exits_set.insert(exit_id);
808                cfg.block_data[bid].exit_info.switch_exit = Some(exit_id);
809            }
810        }
811    }
812}
813
814/// Iter over the last non-switch statements that may be executed on any branch of this block.
815/// Skips over `Nop`s.
816fn iter_tail_statements(block: &mut tgt::Block, f: &mut impl FnMut(&mut tgt::Statement)) {
817    let Some(st) = block
818        .statements
819        .iter_mut()
820        .rev()
821        .find(|st| !st.kind.is_nop())
822    else {
823        return;
824    };
825    if let tgt::StatementKind::Switch(switch) = &mut st.kind {
826        for block in switch.iter_targets_mut() {
827            iter_tail_statements(block, f);
828        }
829    } else {
830        f(st)
831    };
832}
833
834type Depth = usize;
835
836#[derive(Debug, Clone, Copy)]
837enum SpecialJumpKind {
838    /// When encountering this block, `continue` to the given depth.
839    LoopContinue(Depth),
840    /// When encountering this block, `break` to the given depth. This comes from a loop.
841    LoopBreak(Depth),
842    /// When encountering this block, `break` to the given depth. This is a `loop` context
843    /// introduced only for forward jumps.
844    ForwardBreak(Depth),
845    /// When encountering this block, do nothing, as this is the next block that will be
846    /// translated.
847    NextBlock,
848}
849
850#[derive(Clone, Copy)]
851struct SpecialJump {
852    /// The relevant block.
853    target_block: BlockId,
854    /// How to translate a jump to the target block.
855    kind: SpecialJumpKind,
856}
857
858impl SpecialJump {
859    fn new(target_block: BlockId, kind: SpecialJumpKind) -> Self {
860        Self { target_block, kind }
861    }
862}
863
864impl std::fmt::Debug for SpecialJump {
865    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
866        write!(f, "SpecialJump({}, {:?})", self.target_block, self.kind)
867    }
868}
869
870/// How to handle blocks reachable from multiple branches.
871enum ReconstructMode {
872    /// Duplicate blocks reachable from multiple branches.
873    Duplicate,
874    /// Insert loops for the purpose of breaking forward to them to implement DAG-like control-flow
875    /// without duplicating blocks.
876    /// Based on the algorithm from "Beyond Relooper" (https://dl.acm.org/doi/10.1145/3547621).
877    ForwardBreak,
878}
879
880struct ReconstructCtx<'a> {
881    cfg: CfgInfo,
882    body: &'a src::ExprBody,
883    /// The depth of `loop` contexts we may `break`/`continue` to.
884    break_context_depth: Depth,
885    /// Stack of block ids that should be translated to special jumps (`break`/`continue`/do
886    /// nothing) in the current context.
887    /// The block where control-flow continues naturally after this block is kept at the top of the
888    /// stack.
889    special_jump_stack: Vec<SpecialJump>,
890    mode: ReconstructMode,
891}
892
893impl<'a> ReconstructCtx<'a> {
894    fn build(ctx: &TransformCtx, src_body: &'a src::ExprBody) -> Result<Self, Irreducible> {
895        // Compute all sorts of graph-related information about the control-flow graph, including
896        // reachability, the dominator tree, loop entries, and loop/switch exits.
897        let cfg = CfgInfo::build(ctx, &src_body.body)?;
898
899        // Translate the body by reconstructing the loops and the
900        // conditional branchings.
901        let allow_duplication = true;
902        Ok(ReconstructCtx {
903            cfg,
904            body: src_body,
905            break_context_depth: 0,
906            special_jump_stack: Vec::new(),
907            mode: if allow_duplication {
908                ReconstructMode::Duplicate
909            } else {
910                ReconstructMode::ForwardBreak
911            },
912        })
913    }
914
915    fn translate_statement(&self, st: &src::Statement) -> tgt::Statement {
916        let src_span = st.span;
917        let st = match st.kind.clone() {
918            src::StatementKind::Assign(place, rvalue) => tgt::StatementKind::Assign(place, rvalue),
919            src::StatementKind::SetDiscriminant(place, variant_id) => {
920                tgt::StatementKind::SetDiscriminant(place, variant_id)
921            }
922            src::StatementKind::CopyNonOverlapping(copy) => {
923                tgt::StatementKind::CopyNonOverlapping(copy)
924            }
925            src::StatementKind::StorageLive(var_id) => tgt::StatementKind::StorageLive(var_id),
926            src::StatementKind::StorageDead(var_id) => tgt::StatementKind::StorageDead(var_id),
927            src::StatementKind::Assert { assert, on_failure } => {
928                tgt::StatementKind::Assert { assert, on_failure }
929            }
930            src::StatementKind::PlaceMention(place) => tgt::StatementKind::PlaceMention(place),
931            src::StatementKind::Nop => tgt::StatementKind::Nop,
932        };
933        tgt::Statement::new(src_span, st)
934    }
935
936    /// Translate a jump to the given block. The span is used to create the jump statement, if any.
937    #[tracing::instrument(skip(self), ret, fields(stack = ?self.special_jump_stack))]
938    fn translate_jump(&mut self, span: Span, target_block: src::BlockId) -> tgt::Block {
939        match self
940            .special_jump_stack
941            .iter_mut()
942            .rev()
943            .enumerate()
944            .find(|(_, j)| j.target_block == target_block)
945        {
946            Some((i, jump_target)) => {
947                let mk_block = |kind| tgt::Statement::new(span, kind).into_block();
948                match jump_target.kind {
949                    // The top of the stack is where control-flow goes naturally, no need to add a
950                    // `break`/`continue`.
951                    SpecialJumpKind::LoopContinue(_) | SpecialJumpKind::ForwardBreak(_)
952                        if i == 0 && matches!(self.mode, ReconstructMode::ForwardBreak) =>
953                    {
954                        mk_block(tgt::StatementKind::Nop)
955                    }
956                    SpecialJumpKind::LoopContinue(depth) => mk_block(tgt::StatementKind::Continue(
957                        self.break_context_depth - depth,
958                    )),
959                    SpecialJumpKind::ForwardBreak(depth) | SpecialJumpKind::LoopBreak(depth) => {
960                        mk_block(tgt::StatementKind::Break(self.break_context_depth - depth))
961                    }
962                    SpecialJumpKind::NextBlock => mk_block(tgt::StatementKind::Nop),
963                }
964            }
965            // Translate the block without a jump.
966            None => return self.translate_block(target_block),
967        }
968    }
969
970    fn translate_terminator(&mut self, terminator: &src::Terminator) -> tgt::Block {
971        let src_span = terminator.span;
972
973        match &terminator.kind {
974            src::TerminatorKind::Abort(kind) => {
975                tgt::Statement::new(src_span, tgt::StatementKind::Abort(kind.clone())).into_block()
976            }
977            src::TerminatorKind::Return => {
978                tgt::Statement::new(src_span, tgt::StatementKind::Return).into_block()
979            }
980            src::TerminatorKind::UnwindResume => {
981                tgt::Statement::new(src_span, tgt::StatementKind::Abort(AbortKind::Panic(None)))
982                    .into_block()
983            }
984            src::TerminatorKind::Call {
985                call,
986                target,
987                on_unwind: _,
988            } => {
989                // TODO: Have unwinds in the LLBC
990                let st = tgt::Statement::new(src_span, tgt::StatementKind::Call(call.clone()));
991                let mut block = self.translate_jump(terminator.span, *target);
992                block.statements.insert(0, st);
993                block
994            }
995            src::TerminatorKind::Drop {
996                kind,
997                place,
998                fn_ptr,
999                target,
1000                on_unwind: _,
1001            } => {
1002                // TODO: Have unwinds in the LLBC
1003                let st = tgt::Statement::new(
1004                    src_span,
1005                    tgt::StatementKind::Drop(place.clone(), fn_ptr.clone(), *kind),
1006                );
1007                let mut block = self.translate_jump(terminator.span, *target);
1008                block.statements.insert(0, st);
1009                block
1010            }
1011            src::TerminatorKind::Assert {
1012                assert,
1013                target,
1014                on_unwind,
1015            } => {
1016                let unwind_block = &self.body.body[*on_unwind];
1017                let on_failure = unwind_block.as_abort().unwrap_or(AbortKind::Panic(None));
1018                let st = tgt::StatementKind::Assert {
1019                    assert: assert.clone(),
1020                    on_failure,
1021                };
1022                let target = self.translate_jump(terminator.span, *target);
1023                tgt::Statement::new(src_span, st).into_block().merge(target)
1024            }
1025            src::TerminatorKind::InlineAsm {
1026                asm,
1027                targets,
1028                on_unwind: _,
1029            } => {
1030                // TODO: Have unwinds in the LLBC.
1031                let targets = targets
1032                    .iter()
1033                    .map(|target| self.translate_jump(terminator.span, *target))
1034                    .collect();
1035                let st = tgt::StatementKind::InlineAsm {
1036                    asm: asm.clone(),
1037                    targets,
1038                };
1039                tgt::Statement::new(src_span, st).into_block()
1040            }
1041            src::TerminatorKind::Goto { target } => self.translate_jump(terminator.span, *target),
1042            src::TerminatorKind::Switch { discr, targets } => {
1043                // Translate the target expressions
1044                let switch = match &targets {
1045                    src::SwitchTargets::If(then_tgt, else_tgt) => {
1046                        let then_block = self.translate_jump(terminator.span, *then_tgt);
1047                        let else_block = self.translate_jump(terminator.span, *else_tgt);
1048                        tgt::Switch::If(discr.clone(), then_block, else_block)
1049                    }
1050                    src::SwitchTargets::SwitchInt(int_ty, targets, otherwise) => {
1051                        // Note that some branches can be grouped together, like
1052                        // here:
1053                        // ```
1054                        // match e {
1055                        //   E::V1 | E::V2 => ..., // Grouped
1056                        //   E::V3 => ...
1057                        // }
1058                        // ```
1059                        // We detect this by checking if a block has already been
1060                        // translated as one of the branches of the switch.
1061                        //
1062                        // Rk.: note there may be intermediate gotos depending
1063                        // on the MIR we use. Typically, we manage to detect the
1064                        // grouped branches with Optimized MIR, but not with Promoted
1065                        // MIR. See the comment in "tests/src/matches.rs".
1066
1067                        // We link block ids to:
1068                        // - vector of matched integer values
1069                        // - translated blocks
1070                        let mut branches: SeqHashMap<src::BlockId, (Vec<Literal>, tgt::Block)> =
1071                            SeqHashMap::new();
1072
1073                        // Translate the children expressions
1074                        for (v, bid) in targets.iter() {
1075                            // Check if the block has already been translated:
1076                            // if yes, it means we need to group branches
1077                            if branches.contains_key(bid) {
1078                                // Already translated: add the matched value to
1079                                // the list of values
1080                                let branch = branches.get_mut(bid).unwrap();
1081                                branch.0.push(v.clone());
1082                            } else {
1083                                // Not translated: translate it
1084                                let block = self.translate_jump(terminator.span, *bid);
1085                                // We use the terminator span information in case then
1086                                // then statement is `None`
1087                                branches.insert(*bid, (vec![v.clone()], block));
1088                            }
1089                        }
1090                        let targets_blocks: Vec<(Vec<Literal>, tgt::Block)> =
1091                            branches.into_iter().map(|(_, x)| x).collect();
1092
1093                        let otherwise_block = self.translate_jump(terminator.span, *otherwise);
1094
1095                        // Translate
1096                        tgt::Switch::SwitchInt(
1097                            discr.clone(),
1098                            *int_ty,
1099                            targets_blocks,
1100                            otherwise_block,
1101                        )
1102                    }
1103                };
1104
1105                // Return
1106                let span = tgt::combine_switch_targets_span(&switch);
1107                let span = combine_span(&src_span, &span);
1108                let st = tgt::StatementKind::Switch(switch);
1109                tgt::Statement::new(span, st).into_block()
1110            }
1111        }
1112    }
1113
1114    /// Translate just the block statements and terminator.
1115    fn translate_block_itself(&mut self, block_id: BlockId) -> tgt::Block {
1116        let block = &self.body.body[block_id];
1117        // Translate the statements inside the block
1118        let statements = block
1119            .statements
1120            .iter()
1121            .map(|st| self.translate_statement(st))
1122            .collect_vec();
1123        // Translate the terminator.
1124        let terminator = self.translate_terminator(&block.terminator);
1125        // Prepend the statements to the terminator.
1126        if let Some(st) = tgt::Block::from_seq(statements) {
1127            st.merge(terminator)
1128        } else {
1129            terminator
1130        }
1131    }
1132
1133    /// Translate a block including surrounding control-flow like looping.
1134    #[tracing::instrument(skip(self), fields(stack = ?self.special_jump_stack))]
1135    fn translate_block(&mut self, block_id: src::BlockId) -> tgt::Block {
1136        ensure_sufficient_stack(|| self.translate_block_inner(block_id))
1137    }
1138    fn translate_block_inner(&mut self, block_id: src::BlockId) -> tgt::Block {
1139        // Some of the blocks we might jump to inside this tree can't be translated as normal
1140        // blocks: the loop backward edges must become `continue`s and the merge nodes may need
1141        // some care if we're jumping to them from distant locations.
1142        // For this purpose, we push to the `special_jump_stack` the block ids that must be
1143        // translated specially. In `translate_jump` we check the stack. At the end of this
1144        // function we restore the stack to its previous state.
1145        let old_context_depth = self.special_jump_stack.len();
1146        let block_data = &self.cfg.block_data[block_id];
1147        let span = block_data.span;
1148
1149        // Catch jumps to the loop header or loop exit.
1150        if block_data.is_loop_header {
1151            self.break_context_depth += 1;
1152            if let Some(exit_id) = block_data.exit_info.loop_exit {
1153                self.special_jump_stack.push(SpecialJump::new(
1154                    exit_id,
1155                    SpecialJumpKind::LoopBreak(self.break_context_depth),
1156                ));
1157            }
1158            // Put the next block at the top of the stack.
1159            self.special_jump_stack.push(SpecialJump::new(
1160                block_id,
1161                SpecialJumpKind::LoopContinue(self.break_context_depth),
1162            ));
1163        }
1164
1165        // Catch jumps to a merge node.
1166        let merge_children = &block_data.immediately_dominated_merge_targets;
1167        if let ReconstructMode::ForwardBreak = self.mode {
1168            // We support forward-jumps using `break`
1169            // The child with highest postorder numbering is nested outermost in this scheme.
1170            for &child in merge_children {
1171                self.break_context_depth += 1;
1172                self.special_jump_stack.push(SpecialJump::new(
1173                    child,
1174                    SpecialJumpKind::ForwardBreak(self.break_context_depth),
1175                ));
1176            }
1177        }
1178
1179        if let Some(bid) = block_data.exit_info.switch_exit
1180            && !block_data.is_loop_header
1181            && !(matches!(self.mode, ReconstructMode::ForwardBreak)
1182                && merge_children.contains(&bid))
1183        {
1184            // Move some code that would be inside one or several switch branches to be after the
1185            // switch intead.
1186            self.special_jump_stack
1187                .push(SpecialJump::new(bid, SpecialJumpKind::NextBlock));
1188        }
1189
1190        // Translate this block. Any jumps to a loop header or a merge node will be replaced with
1191        // `continue`/`break`.
1192        let mut block = self.translate_block_itself(block_id);
1193
1194        // Reset the state to what it was previously, and translate what remains.
1195        let new_statement = move |kind| tgt::Statement::new(block.span, kind);
1196        while self.special_jump_stack.len() > old_context_depth {
1197            let special_jump = self.special_jump_stack.pop().unwrap();
1198            match &special_jump.kind {
1199                SpecialJumpKind::LoopContinue(_) => {
1200                    self.break_context_depth -= 1;
1201                    if let ReconstructMode::ForwardBreak = self.mode {
1202                        // We add `continue` at the end for users that don't know that the default
1203                        // behavior at the end of a loop block is `continue`. Not needed for
1204                        // `Duplicate` mode because we use explicit `continue`s there. TODO: clean
1205                        // that up.
1206                        block
1207                            .statements
1208                            .push(new_statement(tgt::StatementKind::Continue(0)));
1209                    }
1210                    block = new_statement(tgt::StatementKind::Loop(block)).into_block();
1211                }
1212                SpecialJumpKind::ForwardBreak(_) => {
1213                    self.break_context_depth -= 1;
1214                    // Remove unneeded `break`s in branches leading up to that final one.
1215                    iter_tail_statements(&mut block, &mut |st| {
1216                        if matches!(st.kind, tgt::StatementKind::Break(0)) {
1217                            st.kind = tgt::StatementKind::Nop;
1218                        }
1219                    });
1220                    // We add a `loop { ...; break }` so that we can use `break` to jump forward.
1221                    block
1222                        .statements
1223                        .push(new_statement(tgt::StatementKind::Break(0)));
1224                    block = new_statement(tgt::StatementKind::Loop(block)).into_block();
1225                    // We must translate the merge nodes after the block used for forward jumps to
1226                    // them.
1227                    let next_block = self.translate_jump(span, special_jump.target_block);
1228                    block = block.merge(next_block);
1229                }
1230                SpecialJumpKind::NextBlock | SpecialJumpKind::LoopBreak(..) => {
1231                    let next_block = self.translate_jump(span, special_jump.target_block);
1232                    block = block.merge(next_block);
1233                }
1234            }
1235        }
1236        block
1237    }
1238}
1239
1240fn remove_useless_jump_blocks(body: &mut tgt::ExprBody) {
1241    use tgt::StatementKind;
1242    #[derive(Default)]
1243    struct Count {
1244        continue_count: u32,
1245        break_count: u32,
1246    }
1247    #[derive(Default, Visitor)]
1248    struct CountJumpsVisitor {
1249        counts: HashMap<StatementId, Count>,
1250        loop_stack: Vec<StatementId>,
1251    }
1252    #[derive(Visitor)]
1253    struct RemoveUselessJumpsVisitor {
1254        counts: HashMap<StatementId, Count>,
1255        /// For every loop we encounter, whether we're keeping it or removing it.
1256        loop_stack: Vec<bool>,
1257    }
1258
1259    impl VisitBodyMut for CountJumpsVisitor {
1260        fn visit_llbc_statement(&mut self, st: &mut tgt::Statement) -> ControlFlow<Self::Break> {
1261            if let StatementKind::Loop(_) = &st.kind {
1262                self.loop_stack.push(st.id);
1263            }
1264            match &st.kind {
1265                StatementKind::Break(depth) => {
1266                    let loop_id = self.loop_stack[self.loop_stack.len() - 1 - depth];
1267                    self.counts.entry(loop_id).or_default().break_count += 1;
1268                }
1269                StatementKind::Continue(depth) => {
1270                    let loop_id = self.loop_stack[self.loop_stack.len() - 1 - depth];
1271                    self.counts.entry(loop_id).or_default().continue_count += 1;
1272                }
1273                _ => {}
1274            }
1275            self.visit_inner(st)?;
1276            if let StatementKind::Loop(_) = &st.kind {
1277                self.loop_stack.pop();
1278            }
1279            ControlFlow::Continue(())
1280        }
1281    }
1282
1283    impl VisitBodyMut for RemoveUselessJumpsVisitor {
1284        fn visit_llbc_block(&mut self, block: &mut tgt::Block) -> ControlFlow<Self::Break> {
1285            for mut st in mem::take(&mut block.statements) {
1286                if let tgt::StatementKind::Loop(block) = &mut st.kind {
1287                    let counts = &self.counts[&st.id];
1288                    let remove = counts.continue_count == 0
1289                        && counts.break_count == 1
1290                        && matches!(
1291                            block.statements.last().unwrap().kind,
1292                            StatementKind::Break(0)
1293                        );
1294                    self.loop_stack.push(!remove);
1295                }
1296                self.visit(&mut st)?;
1297                if st.kind.is_loop() && !self.loop_stack.pop().unwrap() {
1298                    // Remove the loop.
1299                    let StatementKind::Loop(mut inner_block) = st.kind else {
1300                        unreachable!()
1301                    };
1302                    inner_block.statements.last_mut().unwrap().kind = StatementKind::Nop;
1303                    block.statements.extend(inner_block.statements);
1304                } else {
1305                    block.statements.push(st);
1306                }
1307            }
1308            ControlFlow::Continue(())
1309        }
1310        fn enter_llbc_statement(&mut self, st: &mut tgt::Statement) {
1311            match &st.kind {
1312                StatementKind::Break(depth) => {
1313                    let new_depth = self.loop_stack[self.loop_stack.len() - depth..]
1314                        .iter()
1315                        .filter(|&&keep| keep)
1316                        .count();
1317                    st.kind = StatementKind::Break(new_depth);
1318                }
1319                StatementKind::Continue(depth) => {
1320                    let new_depth = self.loop_stack[self.loop_stack.len() - depth..]
1321                        .iter()
1322                        .filter(|&&keep| keep)
1323                        .count();
1324                    st.kind = StatementKind::Continue(new_depth);
1325                }
1326                _ => {}
1327            }
1328        }
1329    }
1330
1331    let mut v = CountJumpsVisitor::default();
1332    body.body.drive_body_mut(&mut v);
1333    let mut v = RemoveUselessJumpsVisitor {
1334        counts: v.counts,
1335        loop_stack: Default::default(),
1336    };
1337    body.body.drive_body_mut(&mut v);
1338}
1339
1340fn translate_body(ctx: &mut TransformCtx, body: &mut gast::Body) {
1341    use gast::Body::{Structured, Unstructured};
1342    let Unstructured(src_body) = body else {
1343        panic!("Called `ullbc_to_llbc` on an already restructured body")
1344    };
1345    trace!("About to translate to ullbc: {:?}", src_body.span);
1346
1347    // Calculate info about the graph and heuristically determine loop and switch exit blocks.
1348    let start_block = BlockId::ZERO;
1349    let mut ctx = match ReconstructCtx::build(ctx, src_body) {
1350        Ok(ctx) => ctx,
1351        Err(Irreducible(bid)) => {
1352            let span = src_body.body[bid].terminator.span;
1353            register_error!(
1354                ctx,
1355                span,
1356                "the control-flow graph of this function is not reducible"
1357            );
1358            panic!("can't reconstruct irreducible control-flow")
1359        }
1360    };
1361    // Translate the blocks using the computed data.
1362    let tgt_body = ctx.translate_block(start_block);
1363
1364    let mut tgt_body = tgt::ExprBody {
1365        span: src_body.span,
1366        locals: src_body.locals.clone(),
1367        bound_body_regions: src_body.bound_body_regions,
1368        body: tgt_body,
1369        comments: src_body.comments.clone(),
1370    };
1371    remove_useless_jump_blocks(&mut tgt_body);
1372
1373    *body = Structured(tgt_body);
1374}
1375
1376pub struct Transform;
1377impl TransformPass for Transform {
1378    fn transform_ctx(&self, ctx: &mut TransformCtx) {
1379        // Translate the bodies one at a time.
1380        ctx.for_each_body(|ctx, body| {
1381            translate_body(ctx, body);
1382        });
1383
1384        if ctx.options.print_built_llbc {
1385            eprintln!("# LLBC resulting from control-flow reconstruction:\n\n{ctx}\n",);
1386        } else {
1387            trace!("# LLBC resulting from control-flow reconstruction:\n\n{ctx}\n",);
1388        }
1389    }
1390}