charon_lib/transform/control_flow/ullbc_to_llbc.rs
1//! ULLBC to LLBC
2//!
3//! We reconstruct the control-flow in the Unstructured LLBC.
4//!
5//! The reconstruction algorithm is not written to be efficient (its complexity
6//! is probably very bad), but it was not written to be: this is still an early
7//! stage and we want the algorithm to generate the best reconstruction as
8//! possible. We still need to test the algorithm on more interesting examples,
9//! and will consider making it more efficient once it is a bit mature and well
10//! tested.
11//! Also note that we more importantly focus on making the algorithm sound: the
12//! reconstructed program must always be equivalent to the original MIR program,
13//! and the fact that the reconstruction preserves this property must be obvious.
14use itertools::Itertools;
15use petgraph::algo::dijkstra;
16use petgraph::algo::dominators::{Dominators, simple_fast};
17use petgraph::graphmap::DiGraphMap;
18use petgraph::visit::{
19 Dfs, DfsPostOrder, EdgeFiltered, EdgeRef, GraphRef, IntoNeighbors, VisitMap, Visitable, Walker,
20};
21use smallvec::SmallVec;
22use std::cmp::Reverse;
23use std::collections::{HashMap, HashSet};
24use std::mem;
25
26use crate::common::ensure_sufficient_stack;
27use crate::errors::sanity_check;
28use crate::ids::IndexVec;
29use crate::llbc_ast::{self as tgt, StatementId};
30use crate::meta::{Span, combine_span};
31use crate::transform::TransformCtx;
32use crate::transform::ctx::TransformPass;
33use crate::ullbc_ast::{self as src, BlockId};
34use crate::{ast::*, register_error};
35
36pub enum StackAction<N> {
37 PopPath,
38 Explore(N),
39}
40pub struct DfsWithPath<N, VM> {
41 /// The stack of nodes to visit
42 pub stack: Vec<StackAction<N>>,
43 /// The map of discovered nodes
44 pub discovered: VM,
45 /// The path from start node to current node.
46 pub path: Vec<N>,
47}
48impl<N, VM> DfsWithPath<N, VM>
49where
50 N: Copy + PartialEq,
51 VM: VisitMap<N>,
52{
53 /// Create a new **DfsWithPath**, using the graph's visitor map, and put **start** in the stack
54 /// of nodes to visit.
55 pub fn new<G>(graph: G, start: N) -> Self
56 where
57 G: GraphRef + Visitable<NodeId = N, Map = VM>,
58 {
59 Self {
60 stack: vec![StackAction::Explore(start)],
61 discovered: graph.visit_map(),
62 path: vec![],
63 }
64 }
65
66 /// Return the next node in the dfs, or **None** if the traversal is done.
67 pub fn next<G>(&mut self, graph: G) -> Option<N>
68 where
69 G: IntoNeighbors<NodeId = N>,
70 {
71 while let Some(action) = self.stack.pop() {
72 match action {
73 StackAction::Explore(node) => {
74 if self.discovered.visit(node) {
75 self.path.push(node);
76 self.stack.push(StackAction::PopPath);
77 for succ in graph.neighbors(node) {
78 if !self.discovered.is_visited(&succ) {
79 self.stack.push(StackAction::Explore(succ));
80 }
81 }
82 return Some(node);
83 }
84 }
85 StackAction::PopPath => {
86 self.path.pop();
87 }
88 }
89 }
90 None
91 }
92}
93
94/// Arbitrary-precision numbers
95type BigUint = fraction::DynaInt<u64, fraction::BigUint>;
96type BigRational = fraction::Ratio<BigUint>;
97
98/// Control-Flow Graph
99type Cfg = DiGraphMap<src::BlockId, ()>;
100
101/// Information precomputed about a function's CFG.
102#[derive(Debug)]
103struct CfgInfo {
104 /// The CFG
105 pub cfg: Cfg,
106 /// The CFG where all the backward edges have been removed. Aka "forward CFG".
107 pub fwd_cfg: Cfg,
108 /// We consider the destination of the backward edges to be loop entries and
109 /// store them here.
110 pub loop_entries: HashSet<src::BlockId>,
111 /// The blocks whose terminators are a switch are stored here.
112 pub switch_blocks: HashSet<src::BlockId>,
113 /// Tree of which nodes dominates which other nodes.
114 #[expect(unused)]
115 pub dominator_tree: Dominators<BlockId>,
116 /// Computed data about each block.
117 pub block_data: IndexVec<BlockId, Box<BlockData>>,
118}
119
120#[derive(Debug)]
121struct BlockData {
122 pub id: BlockId,
123 pub span: Span,
124 /// The (unique) entrypoints of each loop. Unique because we error on irreducible cfgs.
125 pub is_loop_header: bool,
126 /// Whether this block is a switch.
127 pub is_switch: bool,
128 /// Whether this block has multiple incoming control-flow edges in the forward graph.
129 pub is_merge_target: bool,
130 /// Order in a reverse postorder numbering. `None` if the block is unreachable.
131 pub reverse_postorder: Option<u32>,
132 /// Nodes that this block immediately dominates. Sorted by reverse_postorder_id, with largest
133 /// id first.
134 pub immediately_dominates: SmallVec<[BlockId; 2]>,
135 /// The nodes from `immediately_dominates` that are also merge targets. Sorted in the same
136 /// order.
137 pub immediately_dominated_merge_targets: SmallVec<[BlockId; 2]>,
138 /// List of loops inside of which this node is (loops are identified by their header). A node
139 /// is considered inside a loop if it is reachable from the loop header and if it can reach the
140 /// loop header using only the backwards edges into it (i.e. we don't count a path that enters
141 /// the loop header through a forward edge).
142 ///
143 /// Note that we might have to take a backward edge to reach the loop header, e.g.:
144 /// 'a: loop {
145 /// // ...
146 /// 'b: loop {
147 /// // ...
148 /// if true {
149 /// continue 'a;
150 /// } else {
151 /// if true {
152 /// break 'a;
153 /// }
154 /// // This node has to take two backward edges in order to reach the start of `'a`.
155 /// }
156 /// }
157 /// }
158 ///
159 /// The restriction on backwards edges is for the following case:
160 /// loop {
161 /// loop {
162 /// ..
163 /// }
164 /// // Not in inner loop
165 /// }
166 ///
167 /// This is sorted by path order from the graph root.
168 pub within_loops: SmallVec<[BlockId; 2]>,
169 /// Node from where we can only reach error nodes (panic, etc.)
170 // TODO: track more nicely the set of targets reachable from a node: panic, return, exit loop,
171 // continue loop (this is a partial order).
172 pub only_reach_error: bool,
173 /// List of reachable nodes, with the length of shortest path to them. Includes the current
174 /// node.
175 pub shortest_paths: hashbrown::HashMap<BlockId, usize>,
176 /// Let's say we put a quantity of water equal to 1 on the block, and the water flows downards.
177 /// Whenever there is a branching, the quantity of water gets equally divided between the
178 /// branches. When the control flows join, we put the water back together. The set below
179 /// computes the amount of water received by each descendant of the node.
180 ///
181 /// TODO: there must be a known algorithm which computes this, right?...
182 /// This is exactly this problems:
183 /// <https://stackoverflow.com/questions/78221666/algorithm-for-total-flow-through-weighted-directed-acyclic-graph>
184 /// TODO: the way I compute this is not efficient.
185 pub flow: IndexVec<BlockId, BigRational>,
186 /// Reconstructed information about loops and switches.
187 pub exit_info: ExitInfo,
188}
189
190#[derive(Debug, Default, Clone)]
191struct ExitInfo {
192 /// The loop exit
193 loop_exit: Option<src::BlockId>,
194 /// The switch exit.
195 switch_exit: Option<src::BlockId>,
196}
197
198/// Error indicating that the control-flow graph is not reducible. The contained block id is a
199/// block involved in an irreducible subgraph.
200struct Irreducible(BlockId);
201
202impl CfgInfo {
203 /// Build the CFGs (the "regular" CFG and the CFG without backward edges) and precompute a
204 /// bunch of graph information about the CFG.
205 fn build(ctx: &TransformCtx, body: &src::BodyContents) -> Result<Self, Irreducible> {
206 // The steps in this function follow a precise order, as each step typically requires the
207 // previous one.
208 let start_block = BlockId::ZERO;
209
210 let empty_flow = body.map_ref(|_| BigRational::new(0u64.into(), 1u64.into()));
211 let mut block_data: IndexVec<BlockId, _> = body.map_ref_indexed(|id, contents| {
212 Box::new(BlockData {
213 id,
214 span: contents.terminator.span,
215 is_loop_header: false,
216 is_switch: false,
217 is_merge_target: false,
218 reverse_postorder: None,
219 immediately_dominates: Default::default(),
220 immediately_dominated_merge_targets: Default::default(),
221 within_loops: Default::default(),
222 only_reach_error: false,
223 shortest_paths: Default::default(),
224 flow: empty_flow.clone(),
225 exit_info: Default::default(),
226 })
227 });
228
229 // Build the node graph (we ignore unwind paths for now).
230 let mut cfg = Cfg::new();
231 for (block_id, block) in body.iter_indexed() {
232 cfg.add_node(block_id);
233 for tgt in block.targets_ignoring_unwind() {
234 cfg.add_edge(block_id, tgt, ());
235 }
236 }
237
238 // Compute the dominator tree.
239 let dominator_tree = simple_fast(&cfg, start_block);
240
241 // Compute reverse postorder numbering.
242 for (i, block_id) in DfsPostOrder::new(&cfg, start_block).iter(&cfg).enumerate() {
243 let rev_post_id = body.len() - i;
244 block_data[block_id].reverse_postorder = Some(rev_post_id.try_into().unwrap());
245
246 // Store the dominator tree in `block_data`.
247 if let Some(dominator) = dominator_tree.immediate_dominator(block_id) {
248 block_data[dominator].immediately_dominates.push(block_id);
249 }
250 }
251
252 // Compute the forward graph (without backward edges). We do a dfs while keeping track of
253 // the path from the start node.
254 let mut fwd_cfg = Cfg::new();
255 let mut loop_entries = HashSet::new();
256 let mut switch_blocks = HashSet::new();
257 for block_id in Dfs::new(&cfg, start_block).iter(&cfg) {
258 fwd_cfg.add_node(block_id);
259
260 if body[block_id].terminator.kind.is_switch() {
261 switch_blocks.insert(block_id);
262 block_data[block_id].is_switch = true;
263 }
264
265 // Iterate over edges into this node (so that we can determine whether this node is a
266 // loop header).
267 let mut incoming_fwd_edges = 0;
268 for from in cfg.neighbors_directed(block_id, petgraph::Direction::Incoming) {
269 // Check if the edge is a backward edge.
270 if block_data[from].reverse_postorder >= block_data[block_id].reverse_postorder {
271 // This is a backward edge
272 block_data[block_id].is_loop_header = true;
273 loop_entries.insert(block_id);
274 // A cfg is reducible iff the target of every back edge dominates the
275 // edge's source.
276 if !dominator_tree.dominators(from).unwrap().contains(&block_id) {
277 return Err(Irreducible(from));
278 }
279 } else {
280 incoming_fwd_edges += 1;
281 fwd_cfg.add_edge(from, block_id, ());
282 }
283 }
284
285 // Detect merge targets.
286 if incoming_fwd_edges >= 2 {
287 block_data[block_id].is_merge_target = true;
288 }
289 }
290
291 // Finish filling in information.
292 for block_id in DfsPostOrder::new(&fwd_cfg, start_block).iter(&fwd_cfg) {
293 let block = &body[block_id];
294 let targets = cfg.neighbors(block_id).collect_vec();
295 let fwd_targets = fwd_cfg.neighbors(block_id).collect_vec();
296
297 // Compute the nodes that can only reach error nodes.
298 // The node can only reach error nodes if:
299 // - it is an error node;
300 // - or it has neighbors and they all lead to errors.
301 // Note that if there is a backward edge, `only_reach_error` cannot contain this
302 // node yet. In other words, this does not consider infinite loops as reaching an
303 // error node.
304 if block.terminator.is_error()
305 || (!targets.is_empty()
306 && targets.iter().all(|&tgt| block_data[tgt].only_reach_error))
307 {
308 block_data[block_id].only_reach_error = true;
309 }
310
311 // Compute the flows between each pair of nodes.
312 let mut flow: IndexVec<src::BlockId, BigRational> =
313 mem::take(&mut block_data[block_id].flow);
314 // The flow to self is 1.
315 flow[block_id] = BigRational::new(1u64.into(), 1u64.into());
316 // Divide the flow from each child to a given target block by the number of children.
317 // This is a sparse matrix multiplication and could be implemented using a linalg
318 // library.
319 let num_children: BigUint = fwd_targets.len().into();
320 for &child in &fwd_targets {
321 for grandchild in block_data[child].reachable_including_self() {
322 // Flow from `child` to `grandchild`
323 flow[grandchild] += &block_data[child].flow[grandchild] / &num_children;
324 }
325 }
326 block_data[block_id].flow = flow;
327
328 // Compute shortest paths to all reachable nodes in the forward graph.
329 block_data[block_id].shortest_paths = dijkstra(&fwd_cfg, block_id, None, |_| 1usize);
330
331 // Fill in the rest of the domination data.
332 let mut dominatees = mem::take(&mut block_data[block_id].immediately_dominates);
333 dominatees.sort_by_key(|&child| block_data[child].reverse_postorder);
334 dominatees.reverse();
335 block_data[block_id].immediately_dominates = dominatees;
336 block_data[block_id].immediately_dominated_merge_targets = block_data[block_id]
337 .immediately_dominates
338 .iter()
339 .copied()
340 .filter(|&child| block_data[child].is_merge_target)
341 .collect();
342 }
343
344 // Fill in the within_loop information. See the docs of `within_loops` to understand what
345 // we're computing.
346 let mut path_dfs = DfsWithPath::new(&cfg, start_block);
347 while let Some(block_id) = path_dfs.next(&cfg) {
348 // Store all the loops on the path to this
349 // node.
350 let mut within_loops: SmallVec<_> = path_dfs
351 .path
352 .iter()
353 .copied()
354 .filter(|&loop_id| block_data[loop_id].is_loop_header)
355 .collect();
356 // The loops that we can reach by taking a single backward edge.
357 let loops_directly_within = within_loops
358 .iter()
359 .copied()
360 .filter(|&loop_header| {
361 cfg.neighbors_directed(loop_header, petgraph::Direction::Incoming)
362 .any(|bid| block_data[block_id].shortest_paths.contains_key(&bid))
363 })
364 .collect_vec();
365 // The loops that we can reach by taking any number of backward edges.
366 let loops_indirectly_within: HashSet<_> = loops_directly_within
367 .iter()
368 .copied()
369 .flat_map(|loop_header| &block_data[loop_header].within_loops)
370 .chain(&loops_directly_within)
371 .copied()
372 .collect();
373 within_loops.retain(|id| loops_indirectly_within.contains(id));
374 block_data[block_id].within_loops = within_loops;
375 }
376
377 let mut cfg = CfgInfo {
378 cfg,
379 fwd_cfg,
380 loop_entries,
381 switch_blocks,
382 dominator_tree,
383 block_data,
384 };
385
386 // Pick an exit block for each loop, if we find one.
387 ExitInfo::compute_loop_exits(ctx, &mut cfg);
388
389 // Pick an exit block for each switch, if we find one.
390 ExitInfo::compute_switch_exits(&mut cfg);
391
392 Ok(cfg)
393 }
394
395 fn block_data(&self, block_id: BlockId) -> &BlockData {
396 &self.block_data[block_id]
397 }
398 // fn can_reach(&self, src: BlockId, tgt: BlockId) -> bool {
399 // self.block_data[src].shortest_paths.contains_key(&tgt)
400 // }
401 fn topo_rank(&self, block_id: BlockId) -> u32 {
402 self.block_data[block_id].reverse_postorder.unwrap()
403 }
404 #[expect(unused)]
405 fn is_backward_edge(&self, src: BlockId, tgt: BlockId) -> bool {
406 self.block_data[src].reverse_postorder >= self.block_data[tgt].reverse_postorder
407 && self.cfg.contains_edge(src, tgt)
408 }
409
410 /// Check if the node is within the given loop.
411 fn is_within_loop(&self, loop_header: src::BlockId, block_id: src::BlockId) -> bool {
412 self.block_data[block_id]
413 .within_loops
414 .contains(&loop_header)
415 }
416
417 /// Check if all paths from `src` to nodes in `target_set` go through `through_node`. If `src`
418 /// is already in `target_set`, we ignore that empty path.
419 fn all_paths_go_through(
420 &self,
421 src: src::BlockId,
422 through_node: src::BlockId,
423 target_set: &HashSet<src::BlockId>,
424 ) -> bool {
425 let graph = EdgeFiltered::from_fn(&self.fwd_cfg, |edge| edge.source() != through_node);
426 !Dfs::new(&graph, src)
427 .iter(&graph)
428 .skip(1) // skip src
429 .any(|bid| target_set.contains(&bid))
430 }
431}
432
433impl BlockData {
434 fn shortest_paths_including_self(&self) -> impl Iterator<Item = (BlockId, usize)> {
435 self.shortest_paths.iter().map(|(bid, d)| (*bid, *d))
436 }
437 fn shortest_paths_excluding_self(&self) -> impl Iterator<Item = (BlockId, usize)> {
438 self.shortest_paths_including_self()
439 .filter(move |&(bid, _)| bid != self.id)
440 }
441 fn reachable_including_self(&self) -> impl Iterator<Item = BlockId> {
442 self.shortest_paths_including_self().map(|(bid, _)| bid)
443 }
444 fn reachable_excluding_self(&self) -> impl Iterator<Item = BlockId> {
445 self.shortest_paths_excluding_self().map(|(bid, _)| bid)
446 }
447 #[expect(unused)]
448 fn can_reach_excluding_self(&self, other: BlockId) -> bool {
449 self.shortest_paths.contains_key(&other) && self.id != other
450 }
451}
452
453/// See [`ExitInfo::compute_loop_exit_ranks`].
454#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
455struct LoopExitRank {
456 /// Number of paths we found going to this exit.
457 path_count: usize,
458 /// Distance from the loop header.
459 distance_from_header: Reverse<usize>,
460}
461
462impl ExitInfo {
463 /// Compute the first node on each path that exits the loop.
464 fn compute_loop_exit_starting_points(
465 cfg: &CfgInfo,
466 loop_header: src::BlockId,
467 ) -> Vec<src::BlockId> {
468 let mut loop_exits = Vec::new();
469 // Do a dfs from the loop header while keeping track of the path from the loop header to
470 // the current node.
471 let mut dfs = Dfs::new(&cfg.fwd_cfg, loop_header);
472 while let Some(block_id) = dfs.next(&cfg.fwd_cfg) {
473 // If we've exited all the loops after and including the target one, this node is an
474 // exit node for the target loop.
475 if !cfg.is_within_loop(loop_header, block_id) {
476 loop_exits.push(block_id);
477 // Don't explore any more paths from this node.
478 dfs.discovered.extend(cfg.fwd_cfg.neighbors(block_id));
479 }
480 }
481 loop_exits
482 }
483
484 /// Compute the loop exit candidates along with a rank.
485 ///
486 /// In the simple case, there is one exit node through which all exit paths go. We want to be
487 /// sure to catch that case, and when that's not possible we want to still find a node through
488 /// which a lot of exit paths go.
489 ///
490 /// To do that, we first count for each exit node how many exit paths go through it, and pick
491 /// the node with most occurrences. If there are many such nodes, we pick the one with shortest
492 /// distance from the loop header. Finally if there are still many such nodes, we keep the
493 /// first node found (the order in which we explore the graph is deterministic, and we use an
494 /// insertion-order hash map).
495 ///
496 /// Note that exit candidates will typically be referenced more than once for one loop. This
497 /// comes from the fact that whenever we reach a node outside the current loop, we register
498 /// this node as well as all its children as exit candidates.
499 /// Consider the following example:
500 /// ```text
501 /// while i < max {
502 /// if cond {
503 /// break;
504 /// }
505 /// s += i;
506 /// i += 1
507 /// }
508 /// // All the below nodes are exit candidates (each of them is referenced twice)
509 /// s += 1;
510 /// return s;
511 /// ```
512 fn compute_loop_exit_ranks(
513 cfg: &CfgInfo,
514 loop_header: src::BlockId,
515 ) -> SeqHashMap<src::BlockId, LoopExitRank> {
516 let mut loop_exits: SeqHashMap<BlockId, LoopExitRank> = SeqHashMap::new();
517 for block_id in Self::compute_loop_exit_starting_points(cfg, loop_header) {
518 for bid in cfg.block_data(block_id).reachable_including_self() {
519 loop_exits
520 .entry(bid)
521 .or_insert_with(|| LoopExitRank {
522 path_count: 0,
523 distance_from_header: Reverse(
524 cfg.block_data[loop_header].shortest_paths[&bid],
525 ),
526 })
527 .path_count += 1;
528 }
529 }
530 loop_exits
531 }
532
533 /// A loop exit is any block reachable from the loop header that isn't inside the loop.
534 /// This function choses an exit for every loop. See `compute_loop_exit_ranks` for how we
535 /// select them.
536 ///
537 /// For example:
538 /// ```text
539 /// while ... {
540 /// ...
541 /// if ... {
542 /// // We can't reach the loop entry from here: this is an exit
543 /// // candidate
544 /// return;
545 /// }
546 /// }
547 /// // This is another exit candidate - and this is the one we want to use
548 /// // as the "real" exit...
549 /// ...
550 /// ```
551 ///
552 /// Once we listed all the exit candidates, we find the "best" one for every loop. The best
553 /// exit is the following one:
554 /// - it is the one which is used the most times (note that there can be
555 /// several candidates which are referenced strictly more than once: see the
556 /// comment below)
557 /// - if several exits have the same number of occurrences, we choose the one
558 /// for which we goto the "earliest" (earliest meaning that the goto is close to
559 /// the loop entry node in the AST). The reason is that all the loops should
560 /// have an outer if ... then ... else ... which executes the loop body or goes
561 /// to the exit (note that this is not necessarily the first
562 /// if ... then ... else ... we find: loop conditions can be arbitrary
563 /// expressions, containing branchings).
564 ///
565 /// # Several candidates for a loop exit:
566 /// =====================================
567 /// There used to be a sanity check to ensure there are no two different
568 /// candidates with exactly the same number of occurrences and distance from
569 /// the entry of the loop, if the number of occurrences is > 1.
570 ///
571 /// We removed it because it does happen, for instance here (the match
572 /// introduces an `unreachable` node, and it has the same number of
573 /// occurrences and the same distance to the loop entry as the `panic`
574 /// node):
575 ///
576 /// ```text
577 /// pub fn list_nth_mut_loop_pair<'a, T>(
578 /// mut ls: &'a mut List<T>,
579 /// mut i: u32,
580 /// ) -> &'a mut T {
581 /// loop {
582 /// match ls {
583 /// List::Nil => {
584 /// panic!() // <-- best candidate
585 /// }
586 /// List::Cons(x, tl) => {
587 /// if i == 0 {
588 /// return x;
589 /// } else {
590 /// ls = tl;
591 /// i -= 1;
592 /// }
593 /// }
594 /// _ => {
595 /// // Note that Rustc always introduces an unreachable branch after
596 /// // desugaring matches.
597 /// unreachable!(), // <-- best candidate
598 /// }
599 /// }
600 /// }
601 /// }
602 /// ```
603 ///
604 /// When this happens we choose an exit candidate whose edges don't necessarily
605 /// lead to an error (above there are none, so we don't choose any exits). Note
606 /// that this last condition is important to prevent loops from being unnecessarily
607 /// nested:
608 ///
609 /// ```text
610 /// pub fn nested_loops_enum(step_out: usize, step_in: usize) -> usize {
611 /// let mut s = 0;
612 ///
613 /// for _ in 0..128 { // We don't want this loop to be nested with the loops below
614 /// s += 1;
615 /// }
616 ///
617 /// for _ in 0..(step_out) {
618 /// for _ in 0..(step_in) {
619 /// s += 1;
620 /// }
621 /// }
622 ///
623 /// s
624 /// }
625 /// ```
626 fn compute_loop_exits(ctx: &TransformCtx, cfg: &mut CfgInfo) {
627 for &loop_id in &cfg.loop_entries {
628 // Compute the candidates.
629 let loop_exits: SeqHashMap<BlockId, LoopExitRank> =
630 Self::compute_loop_exit_ranks(cfg, loop_id);
631 // We choose the exit with:
632 // - the most occurrences
633 // - the least total distance (if there are several possibilities)
634 // - doesn't necessarily lead to an error (panic, unreachable)
635 let best_exits: Vec<(BlockId, LoopExitRank)> =
636 loop_exits.into_iter().max_set_by_key(|&(_, rank)| rank);
637 // If there is exactly one best candidate, use it. Otherwise we need to split further.
638 let chosen_exit = match best_exits.into_iter().map(|(bid, _)| bid).exactly_one() {
639 Ok(best_exit) => Some(best_exit),
640 Err(best_exits) => {
641 // Remove the candidates which only lead to errors (panic or unreachable).
642 // If there is exactly one candidate we select it, otherwise we do not select any
643 // exit.
644 // We don't want to select any exit if we are in the below situation
645 // (all paths lead to errors). We added a sanity check below to
646 // catch the situations where there are several exits which don't
647 // lead to errors.
648 //
649 // Example:
650 // ========
651 // ```
652 // loop {
653 // match ls {
654 // List::Nil => {
655 // panic!() // <-- best candidate
656 // }
657 // List::Cons(x, tl) => {
658 // if i == 0 {
659 // return x;
660 // } else {
661 // ls = tl;
662 // i -= 1;
663 // }
664 // }
665 // _ => {
666 // unreachable!(); // <-- best candidate (Rustc introduces an `unreachable` case)
667 // }
668 // }
669 // }
670 // ```
671 best_exits
672 .filter(|&bid| !cfg.block_data[bid].only_reach_error)
673 .exactly_one()
674 .map_err(|mut candidates| {
675 // Adding this sanity check so that we can see when there are several
676 // candidates.
677 let span = cfg.block_data[loop_id].span;
678 sanity_check!(ctx, span, candidates.next().is_none());
679 })
680 .ok()
681 }
682 };
683 cfg.block_data[loop_id].exit_info.loop_exit = chosen_exit;
684 }
685 }
686
687 /// Let's consider the following piece of code:
688 /// ```text
689 /// if cond1 { ... } else { ... };
690 /// if cond2 { ... } else { ... };
691 /// ```
692 /// Once converted to MIR, the control-flow is destructured, which means we
693 /// have gotos everywhere. When reconstructing the control-flow, we have
694 /// to be careful about the point where we should join the two branches of
695 /// the first if.
696 /// For instance, if we don't notice they should be joined at some point (i.e,
697 /// whatever the branch we take, there is a moment when we go to the exact
698 /// same place, just before the second if), we might generate code like
699 /// this, with some duplicata:
700 /// ```text
701 /// if cond1 { ...; if cond2 { ... } else { ...} }
702 /// else { ...; if cond2 { ... } else { ...} }
703 /// ```
704 ///
705 /// Such a reconstructed program is valid, but it is definitely non-optimal:
706 /// it is very different from the original program (making it less clean and
707 /// clear), more bloated, and might involve duplicating the proof effort.
708 ///
709 /// For this reason, we need to find the "exit" of the first switch, which is
710 /// the point where the two branches join. Note that this can be a bit tricky,
711 /// because there may be more than two branches (if we do `switch(x) { ... }`),
712 /// and some of them might not join (if they contain a `break`, `panic`,
713 /// `return`, etc.).
714 ///
715 /// In order to compute the switch exits, we simply recursively compute a
716 /// topologically ordered set of "filtered successors" as follows (note
717 /// that we work in the CFG *without* back edges):
718 /// - for a block which doesn't branch (only one successor), the filtered
719 /// successors is the set of reachable nodes.
720 /// - for a block which branches, we compute the nodes reachable from all
721 /// the children, and find the "best" intersection between those.
722 /// Note that we find the "best" intersection (a pair of branches which
723 /// maximize the intersection of filtered successors) because some branches
724 /// might never join the control-flow of the other branches, if they contain
725 /// a `break`, `return`, `panic`, etc., like here:
726 /// ```text
727 /// if b { x = 3; } { return; }
728 /// y += x;
729 /// ...
730 /// ```
731 /// Note that with nested switches, the branches of the inner switches might
732 /// goto the exits of the outer switches: for this reason, we give precedence
733 /// to the outer switches.
734 fn compute_switch_exits(cfg: &mut CfgInfo) {
735 // We need to give precedence to the outer switches: we thus iterate
736 // over the switch blocks in topological order.
737 let mut exits_set = HashSet::new();
738 for bid in cfg
739 .switch_blocks
740 .iter()
741 .copied()
742 .sorted_unstable_by_key(|&bid| (cfg.topo_rank(bid), bid))
743 {
744 let block_data = &cfg.block_data[bid];
745 // Find the best successor: this is the node with the highest flow, and the lowest
746 // topological rank. If several nodes have the same flow, we want to take the highest
747 // one in the hierarchy: hence the use of the topological rank.
748 //
749 // Ex.:
750 // ```text
751 // A -- we start here
752 // |
753 // |---------------------------------------
754 // | | | |
755 // B:(0.25,-1) C:(0.25,-2) D:(0.25,-3) E:(0.25,-4)
756 // | | |
757 // |--------------------------
758 // |
759 // F:(0.75,-5)
760 // |
761 // |
762 // G:(0.75,-6)
763 // ```
764 // The "best" node (with the highest (flow, rank) in the graph above is F.
765 // If the switch is inside a loop, we also only consider exists that are inside that
766 // same loop. There must be one, otherwise the switch entry would not be inside the
767 // loop.
768 let current_loop = block_data.within_loops.last().copied();
769 let best_exit: Option<BlockId> = block_data
770 .reachable_excluding_self()
771 .filter(|&b| {
772 current_loop.is_none_or(|current_loop| cfg.is_within_loop(current_loop, b))
773 })
774 .max_by_key(|&id| {
775 let flow = &block_data.flow[id];
776 let rank = Reverse(cfg.topo_rank(id));
777 ((flow, rank), id)
778 });
779 // We have an exit candidate: we first check that it was not already taken by an
780 // external switch.
781 //
782 // We then check that we can't reach the exit of an external switch from one of the
783 // branches, without going through the exit candidate. We do this by simply checking
784 // that we can't reach any of the exits of outer switches.
785 //
786 // The reason is that it can lead to code like the following:
787 // ```
788 // if ... { // if #1
789 // if ... { // if #2
790 // ...
791 // // here, we have a `goto b1`, where b1 is the exit
792 // // of if #2: we thus stop translating the blocks.
793 // }
794 // else {
795 // ...
796 // // here, we have a `goto b2`, where b2 is the exit
797 // // of if #1: we thus stop translating the blocks.
798 // }
799 // // We insert code for the block b1 here (which is the exit of
800 // // the exit of if #2). However, this block should only
801 // // be executed in the branch "then" of the if #2, not in
802 // // the branch "else".
803 // ...
804 // }
805 // else {
806 // ...
807 // }
808 // ```
809 if let Some(exit_id) = best_exit
810 && !exits_set.contains(&exit_id)
811 && cfg.all_paths_go_through(bid, exit_id, &exits_set)
812 {
813 exits_set.insert(exit_id);
814 cfg.block_data[bid].exit_info.switch_exit = Some(exit_id);
815 }
816 }
817 }
818}
819
820/// Iter over the last non-switch statements that may be executed on any branch of this block.
821/// Skips over `Nop`s.
822fn iter_tail_statements(block: &mut tgt::Block, f: &mut impl FnMut(&mut tgt::Statement)) {
823 let Some(st) = block
824 .statements
825 .iter_mut()
826 .rev()
827 .skip_while(|st| st.kind.is_nop())
828 .next()
829 else {
830 return;
831 };
832 if let tgt::StatementKind::Switch(switch) = &mut st.kind {
833 for block in switch.iter_targets_mut() {
834 iter_tail_statements(block, f);
835 }
836 } else {
837 f(st)
838 };
839}
840
841type Depth = usize;
842
843#[derive(Debug, Clone, Copy)]
844enum SpecialJumpKind {
845 /// When encountering this block, `continue` to the given depth.
846 LoopContinue(Depth),
847 /// When encountering this block, `break` to the given depth. This comes from a loop.
848 LoopBreak(Depth),
849 /// When encountering this block, `break` to the given depth. This is a `loop` context
850 /// introduced only for forward jumps.
851 ForwardBreak(Depth),
852 /// When encountering this block, do nothing, as this is the next block that will be
853 /// translated.
854 NextBlock,
855}
856
857#[derive(Clone, Copy)]
858struct SpecialJump {
859 /// The relevant block.
860 target_block: BlockId,
861 /// How to translate a jump to the target block.
862 kind: SpecialJumpKind,
863}
864
865impl SpecialJump {
866 fn new(target_block: BlockId, kind: SpecialJumpKind) -> Self {
867 Self { target_block, kind }
868 }
869}
870
871impl std::fmt::Debug for SpecialJump {
872 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
873 write!(f, "SpecialJump({}, {:?})", self.target_block, self.kind)
874 }
875}
876
877/// How to handle blocks reachable from multiple branches.
878enum ReconstructMode {
879 /// Duplicate blocks reachable from multiple branches.
880 Duplicate,
881 /// Insert loops for the purpose of breaking forward to them to implement DAG-like control-flow
882 /// without duplicating blocks.
883 /// Based on the algorithm from "Beyond Relooper" (https://dl.acm.org/doi/10.1145/3547621).
884 ForwardBreak,
885}
886
887struct ReconstructCtx<'a> {
888 cfg: CfgInfo,
889 body: &'a src::ExprBody,
890 /// The depth of `loop` contexts we may `break`/`continue` to.
891 break_context_depth: Depth,
892 /// Stack of block ids that should be translated to special jumps (`break`/`continue`/do
893 /// nothing) in the current context.
894 /// The block where control-flow continues naturally after this block is kept at the top of the
895 /// stack.
896 special_jump_stack: Vec<SpecialJump>,
897 mode: ReconstructMode,
898}
899
900impl<'a> ReconstructCtx<'a> {
901 fn build(ctx: &TransformCtx, src_body: &'a src::ExprBody) -> Result<Self, Irreducible> {
902 // Compute all sorts of graph-related information about the control-flow graph, including
903 // reachability, the dominator tree, loop entries, and loop/switch exits.
904 let cfg = CfgInfo::build(ctx, &src_body.body)?;
905
906 // Translate the body by reconstructing the loops and the
907 // conditional branchings.
908 let allow_duplication = true;
909 Ok(ReconstructCtx {
910 cfg,
911 body: src_body,
912 break_context_depth: 0,
913 special_jump_stack: Vec::new(),
914 mode: if allow_duplication {
915 ReconstructMode::Duplicate
916 } else {
917 ReconstructMode::ForwardBreak
918 },
919 })
920 }
921
922 fn translate_statement(&self, st: &src::Statement) -> tgt::Statement {
923 let src_span = st.span;
924 let st = match st.kind.clone() {
925 src::StatementKind::Assign(place, rvalue) => tgt::StatementKind::Assign(place, rvalue),
926 src::StatementKind::SetDiscriminant(place, variant_id) => {
927 tgt::StatementKind::SetDiscriminant(place, variant_id)
928 }
929 src::StatementKind::CopyNonOverlapping(copy) => {
930 tgt::StatementKind::CopyNonOverlapping(copy)
931 }
932 src::StatementKind::StorageLive(var_id) => tgt::StatementKind::StorageLive(var_id),
933 src::StatementKind::StorageDead(var_id) => tgt::StatementKind::StorageDead(var_id),
934 src::StatementKind::Deinit(place) => tgt::StatementKind::Deinit(place),
935 src::StatementKind::Assert(assert) => tgt::StatementKind::Assert(assert),
936 src::StatementKind::Nop => tgt::StatementKind::Nop,
937 };
938 tgt::Statement::new(src_span, st)
939 }
940
941 /// Translate a jump to the given block. The span is used to create the jump statement, if any.
942 #[tracing::instrument(skip(self), ret, fields(stack = ?self.special_jump_stack))]
943 fn translate_jump(&mut self, span: Span, target_block: src::BlockId) -> tgt::Block {
944 match self
945 .special_jump_stack
946 .iter_mut()
947 .rev()
948 .enumerate()
949 .find(|(_, j)| j.target_block == target_block)
950 {
951 Some((i, jump_target)) => {
952 let mk_block = |kind| tgt::Statement::new(span, kind).into_block();
953 match jump_target.kind {
954 // The top of the stack is where control-flow goes naturally, no need to add a
955 // `break`/`continue`.
956 SpecialJumpKind::LoopContinue(_) | SpecialJumpKind::ForwardBreak(_)
957 if i == 0 && matches!(self.mode, ReconstructMode::ForwardBreak) =>
958 {
959 mk_block(tgt::StatementKind::Nop)
960 }
961 SpecialJumpKind::LoopContinue(depth) => mk_block(tgt::StatementKind::Continue(
962 self.break_context_depth - depth,
963 )),
964 SpecialJumpKind::ForwardBreak(depth) | SpecialJumpKind::LoopBreak(depth) => {
965 mk_block(tgt::StatementKind::Break(self.break_context_depth - depth))
966 }
967 SpecialJumpKind::NextBlock => mk_block(tgt::StatementKind::Nop),
968 }
969 }
970 // Translate the block without a jump.
971 None => return self.translate_block(target_block),
972 }
973 }
974
975 fn translate_terminator(&mut self, terminator: &src::Terminator) -> tgt::Block {
976 let src_span = terminator.span;
977
978 match &terminator.kind {
979 src::TerminatorKind::Abort(kind) => {
980 tgt::Statement::new(src_span, tgt::StatementKind::Abort(kind.clone())).into_block()
981 }
982 src::TerminatorKind::Return => {
983 tgt::Statement::new(src_span, tgt::StatementKind::Return).into_block()
984 }
985 src::TerminatorKind::UnwindResume => {
986 tgt::Statement::new(src_span, tgt::StatementKind::Abort(AbortKind::Panic(None)))
987 .into_block()
988 }
989 src::TerminatorKind::Call {
990 call,
991 target,
992 on_unwind: _,
993 } => {
994 // TODO: Have unwinds in the LLBC
995 let st = tgt::Statement::new(src_span, tgt::StatementKind::Call(call.clone()));
996 let mut block = self.translate_jump(terminator.span, *target);
997 block.statements.insert(0, st);
998 block
999 }
1000 src::TerminatorKind::Drop {
1001 kind,
1002 place,
1003 tref,
1004 target,
1005 on_unwind: _,
1006 } => {
1007 // TODO: Have unwinds in the LLBC
1008 let st = tgt::Statement::new(
1009 src_span,
1010 tgt::StatementKind::Drop(place.clone(), tref.clone(), kind.clone()),
1011 );
1012 let mut block = self.translate_jump(terminator.span, *target);
1013 block.statements.insert(0, st);
1014 block
1015 }
1016 src::TerminatorKind::Goto { target } => self.translate_jump(terminator.span, *target),
1017 src::TerminatorKind::Switch { discr, targets } => {
1018 // Translate the target expressions
1019 let switch = match &targets {
1020 src::SwitchTargets::If(then_tgt, else_tgt) => {
1021 let then_block = self.translate_jump(terminator.span, *then_tgt);
1022 let else_block = self.translate_jump(terminator.span, *else_tgt);
1023 tgt::Switch::If(discr.clone(), then_block, else_block)
1024 }
1025 src::SwitchTargets::SwitchInt(int_ty, targets, otherwise) => {
1026 // Note that some branches can be grouped together, like
1027 // here:
1028 // ```
1029 // match e {
1030 // E::V1 | E::V2 => ..., // Grouped
1031 // E::V3 => ...
1032 // }
1033 // ```
1034 // We detect this by checking if a block has already been
1035 // translated as one of the branches of the switch.
1036 //
1037 // Rk.: note there may be intermediate gotos depending
1038 // on the MIR we use. Typically, we manage to detect the
1039 // grouped branches with Optimized MIR, but not with Promoted
1040 // MIR. See the comment in "tests/src/matches.rs".
1041
1042 // We link block ids to:
1043 // - vector of matched integer values
1044 // - translated blocks
1045 let mut branches: SeqHashMap<src::BlockId, (Vec<Literal>, tgt::Block)> =
1046 SeqHashMap::new();
1047
1048 // Translate the children expressions
1049 for (v, bid) in targets.iter() {
1050 // Check if the block has already been translated:
1051 // if yes, it means we need to group branches
1052 if branches.contains_key(bid) {
1053 // Already translated: add the matched value to
1054 // the list of values
1055 let branch = branches.get_mut(bid).unwrap();
1056 branch.0.push(v.clone());
1057 } else {
1058 // Not translated: translate it
1059 let block = self.translate_jump(terminator.span, *bid);
1060 // We use the terminator span information in case then
1061 // then statement is `None`
1062 branches.insert(*bid, (vec![v.clone()], block));
1063 }
1064 }
1065 let targets_blocks: Vec<(Vec<Literal>, tgt::Block)> =
1066 branches.into_iter().map(|(_, x)| x).collect();
1067
1068 let otherwise_block = self.translate_jump(terminator.span, *otherwise);
1069
1070 // Translate
1071 tgt::Switch::SwitchInt(
1072 discr.clone(),
1073 *int_ty,
1074 targets_blocks,
1075 otherwise_block,
1076 )
1077 }
1078 };
1079
1080 // Return
1081 let span = tgt::combine_switch_targets_span(&switch);
1082 let span = combine_span(&src_span, &span);
1083 let st = tgt::StatementKind::Switch(switch);
1084 tgt::Statement::new(span, st).into_block()
1085 }
1086 }
1087 }
1088
1089 /// Translate just the block statements and terminator.
1090 fn translate_block_itself(&mut self, block_id: BlockId) -> tgt::Block {
1091 let block = &self.body.body[block_id];
1092 // Translate the statements inside the block
1093 let statements = block
1094 .statements
1095 .iter()
1096 .map(|st| self.translate_statement(st))
1097 .collect_vec();
1098 // Translate the terminator.
1099 let terminator = self.translate_terminator(&block.terminator);
1100 // Prepend the statements to the terminator.
1101 if let Some(st) = tgt::Block::from_seq(statements) {
1102 st.merge(terminator)
1103 } else {
1104 terminator
1105 }
1106 }
1107
1108 /// Translate a block including surrounding control-flow like looping.
1109 #[tracing::instrument(skip(self), fields(stack = ?self.special_jump_stack))]
1110 fn translate_block(&mut self, block_id: src::BlockId) -> tgt::Block {
1111 ensure_sufficient_stack(|| self.translate_block_inner(block_id))
1112 }
1113 fn translate_block_inner(&mut self, block_id: src::BlockId) -> tgt::Block {
1114 // Some of the blocks we might jump to inside this tree can't be translated as normal
1115 // blocks: the loop backward edges must become `continue`s and the merge nodes may need
1116 // some care if we're jumping to them from distant locations.
1117 // For this purpose, we push to the `special_jump_stack` the block ids that must be
1118 // translated specially. In `translate_jump` we check the stack. At the end of this
1119 // function we restore the stack to its previous state.
1120 let old_context_depth = self.special_jump_stack.len();
1121 let block_data = &self.cfg.block_data[block_id];
1122 let span = block_data.span;
1123
1124 // Catch jumps to the loop header or loop exit.
1125 if block_data.is_loop_header {
1126 self.break_context_depth += 1;
1127 if let Some(exit_id) = block_data.exit_info.loop_exit {
1128 self.special_jump_stack.push(SpecialJump::new(
1129 exit_id,
1130 SpecialJumpKind::LoopBreak(self.break_context_depth),
1131 ));
1132 }
1133 // Put the next block at the top of the stack.
1134 self.special_jump_stack.push(SpecialJump::new(
1135 block_id,
1136 SpecialJumpKind::LoopContinue(self.break_context_depth),
1137 ));
1138 }
1139
1140 // Catch jumps to a merge node.
1141 let merge_children = &block_data.immediately_dominated_merge_targets;
1142 if let ReconstructMode::ForwardBreak = self.mode {
1143 // We support forward-jumps using `break`
1144 // The child with highest postorder numbering is nested outermost in this scheme.
1145 for &child in merge_children {
1146 self.break_context_depth += 1;
1147 self.special_jump_stack.push(SpecialJump::new(
1148 child,
1149 SpecialJumpKind::ForwardBreak(self.break_context_depth),
1150 ));
1151 }
1152 }
1153
1154 if let Some(bid) = block_data.exit_info.switch_exit
1155 && !block_data.is_loop_header
1156 && !(matches!(self.mode, ReconstructMode::ForwardBreak)
1157 && merge_children.contains(&bid))
1158 {
1159 // Move some code that would be inside one or several switch branches to be after the
1160 // switch intead.
1161 self.special_jump_stack
1162 .push(SpecialJump::new(bid, SpecialJumpKind::NextBlock));
1163 }
1164
1165 // Translate this block. Any jumps to a loop header or a merge node will be replaced with
1166 // `continue`/`break`.
1167 let mut block = self.translate_block_itself(block_id);
1168
1169 // Reset the state to what it was previously, and translate what remains.
1170 let new_statement = move |kind| tgt::Statement::new(block.span, kind);
1171 while self.special_jump_stack.len() > old_context_depth {
1172 let special_jump = self.special_jump_stack.pop().unwrap();
1173 match &special_jump.kind {
1174 SpecialJumpKind::LoopContinue(_) => {
1175 self.break_context_depth -= 1;
1176 if let ReconstructMode::ForwardBreak = self.mode {
1177 // We add `continue` at the end for users that don't know that the default
1178 // behavior at the end of a loop block is `continue`. Not needed for
1179 // `Duplicate` mode because we use explicit `continue`s there. TODO: clean
1180 // that up.
1181 block
1182 .statements
1183 .push(new_statement(tgt::StatementKind::Continue(0)));
1184 }
1185 block = new_statement(tgt::StatementKind::Loop(block)).into_block();
1186 }
1187 SpecialJumpKind::ForwardBreak(_) => {
1188 self.break_context_depth -= 1;
1189 // Remove unneeded `break`s in branches leading up to that final one.
1190 iter_tail_statements(&mut block, &mut |st| {
1191 if matches!(st.kind, tgt::StatementKind::Break(0)) {
1192 st.kind = tgt::StatementKind::Nop;
1193 }
1194 });
1195 // We add a `loop { ...; break }` so that we can use `break` to jump forward.
1196 block
1197 .statements
1198 .push(new_statement(tgt::StatementKind::Break(0)));
1199 block = new_statement(tgt::StatementKind::Loop(block)).into_block();
1200 // We must translate the merge nodes after the block used for forward jumps to
1201 // them.
1202 let next_block = self.translate_jump(span, special_jump.target_block);
1203 block = block.merge(next_block);
1204 }
1205 SpecialJumpKind::NextBlock | SpecialJumpKind::LoopBreak(..) => {
1206 let next_block = self.translate_jump(span, special_jump.target_block);
1207 block = block.merge(next_block);
1208 }
1209 }
1210 }
1211 block
1212 }
1213}
1214
1215fn remove_useless_jump_blocks(body: &mut tgt::ExprBody) {
1216 use tgt::StatementKind;
1217 #[derive(Default)]
1218 struct Count {
1219 continue_count: u32,
1220 break_count: u32,
1221 }
1222 #[derive(Default, Visitor)]
1223 struct CountJumpsVisitor {
1224 counts: HashMap<StatementId, Count>,
1225 loop_stack: Vec<StatementId>,
1226 }
1227 #[derive(Visitor)]
1228 struct RemoveUselessJumpsVisitor {
1229 counts: HashMap<StatementId, Count>,
1230 /// For every loop we encounter, whether we're keeping it or removing it.
1231 loop_stack: Vec<bool>,
1232 }
1233
1234 impl VisitBodyMut for CountJumpsVisitor {
1235 fn visit_llbc_statement(&mut self, st: &mut tgt::Statement) -> ControlFlow<Self::Break> {
1236 if let StatementKind::Loop(_) = &st.kind {
1237 self.loop_stack.push(st.id);
1238 }
1239 match &st.kind {
1240 StatementKind::Break(depth) => {
1241 let loop_id = self.loop_stack[self.loop_stack.len() - 1 - depth];
1242 self.counts.entry(loop_id).or_default().break_count += 1;
1243 }
1244 StatementKind::Continue(depth) => {
1245 let loop_id = self.loop_stack[self.loop_stack.len() - 1 - depth];
1246 self.counts.entry(loop_id).or_default().continue_count += 1;
1247 }
1248 _ => {}
1249 }
1250 self.visit_inner(st)?;
1251 if let StatementKind::Loop(_) = &st.kind {
1252 self.loop_stack.pop();
1253 }
1254 ControlFlow::Continue(())
1255 }
1256 }
1257
1258 impl VisitBodyMut for RemoveUselessJumpsVisitor {
1259 fn visit_llbc_block(&mut self, block: &mut tgt::Block) -> ControlFlow<Self::Break> {
1260 for mut st in mem::take(&mut block.statements) {
1261 if let tgt::StatementKind::Loop(block) = &mut st.kind {
1262 let counts = &self.counts[&st.id];
1263 let remove = counts.continue_count == 0
1264 && counts.break_count == 1
1265 && matches!(
1266 block.statements.last().unwrap().kind,
1267 StatementKind::Break(0)
1268 );
1269 self.loop_stack.push(!remove);
1270 }
1271 self.visit(&mut st)?;
1272 if st.kind.is_loop() && !self.loop_stack.pop().unwrap() {
1273 // Remove the loop.
1274 let StatementKind::Loop(mut inner_block) = st.kind else {
1275 unreachable!()
1276 };
1277 inner_block.statements.last_mut().unwrap().kind = StatementKind::Nop;
1278 block.statements.extend(inner_block.statements);
1279 } else {
1280 block.statements.push(st);
1281 }
1282 }
1283 ControlFlow::Continue(())
1284 }
1285 fn enter_llbc_statement(&mut self, st: &mut tgt::Statement) {
1286 match &st.kind {
1287 StatementKind::Break(depth) => {
1288 let new_depth = self.loop_stack[self.loop_stack.len() - depth..]
1289 .iter()
1290 .filter(|&&keep| keep)
1291 .count();
1292 st.kind = StatementKind::Break(new_depth);
1293 }
1294 StatementKind::Continue(depth) => {
1295 let new_depth = self.loop_stack[self.loop_stack.len() - depth..]
1296 .iter()
1297 .filter(|&&keep| keep)
1298 .count();
1299 st.kind = StatementKind::Continue(new_depth);
1300 }
1301 _ => {}
1302 }
1303 }
1304 }
1305
1306 let mut v = CountJumpsVisitor::default();
1307 body.body.drive_body_mut(&mut v);
1308 let mut v = RemoveUselessJumpsVisitor {
1309 counts: v.counts,
1310 loop_stack: Default::default(),
1311 };
1312 body.body.drive_body_mut(&mut v);
1313}
1314
1315fn translate_body(ctx: &mut TransformCtx, body: &mut gast::Body) {
1316 use gast::Body::{Structured, Unstructured};
1317 let Unstructured(src_body) = body else {
1318 panic!("Called `ullbc_to_llbc` on an already restructured body")
1319 };
1320 trace!("About to translate to ullbc: {:?}", src_body.span);
1321
1322 // Calculate info about the graph and heuristically determine loop and switch exit blocks.
1323 let start_block = BlockId::ZERO;
1324 let mut ctx = match ReconstructCtx::build(ctx, src_body) {
1325 Ok(ctx) => ctx,
1326 Err(Irreducible(bid)) => {
1327 let span = src_body.body[bid].terminator.span;
1328 register_error!(
1329 ctx,
1330 span,
1331 "the control-flow graph of this function is not reducible"
1332 );
1333 panic!("can't reconstruct irreducible control-flow")
1334 }
1335 };
1336 // Translate the blocks using the computed data.
1337 let tgt_body = ctx.translate_block(start_block);
1338
1339 let mut tgt_body = tgt::ExprBody {
1340 span: src_body.span,
1341 locals: src_body.locals.clone(),
1342 bound_body_regions: src_body.bound_body_regions,
1343 body: tgt_body,
1344 comments: src_body.comments.clone(),
1345 };
1346 remove_useless_jump_blocks(&mut tgt_body);
1347
1348 *body = Structured(tgt_body);
1349}
1350
1351pub struct Transform;
1352impl TransformPass for Transform {
1353 fn transform_ctx(&self, ctx: &mut TransformCtx) {
1354 // Translate the bodies one at a time.
1355 ctx.for_each_body(|ctx, body| {
1356 translate_body(ctx, body);
1357 });
1358
1359 if ctx.options.print_built_llbc {
1360 eprintln!("# LLBC resulting from control-flow reconstruction:\n\n{ctx}\n",);
1361 } else {
1362 trace!("# LLBC resulting from control-flow reconstruction:\n\n{ctx}\n",);
1363 }
1364 }
1365}