charon_lib/transform/control_flow/ullbc_to_llbc.rs
1//! ULLBC to LLBC
2//!
3//! We reconstruct the control-flow in the Unstructured LLBC.
4//!
5//! The reconstruction algorithm is not written to be efficient (its complexity
6//! is probably very bad), but it was not written to be: this is still an early
7//! stage and we want the algorithm to generate the best reconstruction as
8//! possible. We still need to test the algorithm on more interesting examples,
9//! and will consider making it more efficient once it is a bit mature and well
10//! tested.
11//! Also note that we more importantly focus on making the algorithm sound: the
12//! reconstructed program must always be equivalent to the original MIR program,
13//! and the fact that the reconstruction preserves this property must be obvious.
14use itertools::Itertools;
15use petgraph::algo::dijkstra;
16use petgraph::algo::dominators::{Dominators, simple_fast};
17use petgraph::graphmap::DiGraphMap;
18use petgraph::visit::{
19 Dfs, DfsPostOrder, EdgeFiltered, EdgeRef, GraphRef, IntoNeighbors, VisitMap, Visitable, Walker,
20};
21use smallvec::SmallVec;
22use std::cmp::Reverse;
23use std::collections::{HashMap, HashSet};
24use std::mem;
25
26use crate::common::ensure_sufficient_stack;
27use crate::ids::IndexVec;
28use crate::llbc_ast::{self as tgt, StatementId};
29use crate::meta::{Span, combine_span};
30use crate::transform::TransformCtx;
31use crate::transform::ctx::TransformPass;
32use crate::ullbc_ast::{self as src, BlockId};
33use crate::{ast::*, register_error};
34
35pub enum StackAction<N> {
36 PopPath,
37 Explore(N),
38}
39pub struct DfsWithPath<N, VM> {
40 /// The stack of nodes to visit
41 pub stack: Vec<StackAction<N>>,
42 /// The map of discovered nodes
43 pub discovered: VM,
44 /// The path from start node to current node.
45 pub path: Vec<N>,
46}
47impl<N, VM> DfsWithPath<N, VM>
48where
49 N: Copy + PartialEq,
50 VM: VisitMap<N>,
51{
52 /// Create a new **DfsWithPath**, using the graph's visitor map, and put **start** in the stack
53 /// of nodes to visit.
54 pub fn new<G>(graph: G, start: N) -> Self
55 where
56 G: GraphRef + Visitable<NodeId = N, Map = VM>,
57 {
58 Self {
59 stack: vec![StackAction::Explore(start)],
60 discovered: graph.visit_map(),
61 path: vec![],
62 }
63 }
64
65 /// Return the next node in the dfs, or **None** if the traversal is done.
66 pub fn next<G>(&mut self, graph: G) -> Option<N>
67 where
68 G: IntoNeighbors<NodeId = N>,
69 {
70 while let Some(action) = self.stack.pop() {
71 match action {
72 StackAction::Explore(node) => {
73 if self.discovered.visit(node) {
74 self.path.push(node);
75 self.stack.push(StackAction::PopPath);
76 for succ in graph.neighbors(node) {
77 if !self.discovered.is_visited(&succ) {
78 self.stack.push(StackAction::Explore(succ));
79 }
80 }
81 return Some(node);
82 }
83 }
84 StackAction::PopPath => {
85 self.path.pop();
86 }
87 }
88 }
89 None
90 }
91}
92
93/// Arbitrary-precision numbers
94type BigUint = fraction::DynaInt<u64, fraction::BigUint>;
95type BigRational = fraction::Ratio<BigUint>;
96
97/// Control-Flow Graph
98type Cfg = DiGraphMap<src::BlockId, ()>;
99
100/// Information precomputed about a function's CFG.
101#[derive(Debug)]
102struct CfgInfo {
103 /// The CFG
104 pub cfg: Cfg,
105 /// The CFG where all the backward edges have been removed. Aka "forward CFG".
106 pub fwd_cfg: Cfg,
107 /// We consider the destination of the backward edges to be loop entries and
108 /// store them here.
109 pub loop_entries: HashSet<src::BlockId>,
110 /// The blocks whose terminators are a switch are stored here.
111 pub switch_blocks: HashSet<src::BlockId>,
112 /// Tree of which nodes dominates which other nodes.
113 #[expect(unused)]
114 pub dominator_tree: Dominators<BlockId>,
115 /// Computed data about each block.
116 pub block_data: IndexVec<BlockId, Box<BlockData>>,
117}
118
119#[derive(Debug)]
120struct BlockData {
121 pub id: BlockId,
122 pub span: Span,
123 /// The (unique) entrypoints of each loop. Unique because we error on irreducible cfgs.
124 pub is_loop_header: bool,
125 /// Whether this block is a switch.
126 pub is_switch: bool,
127 /// Whether this block has multiple incoming control-flow edges in the forward graph.
128 pub is_merge_target: bool,
129 /// Order in a reverse postorder numbering. `None` if the block is unreachable.
130 pub reverse_postorder: Option<u32>,
131 /// Nodes that this block immediately dominates. Sorted by reverse_postorder_id, with largest
132 /// id first.
133 pub immediately_dominates: SmallVec<[BlockId; 2]>,
134 /// The nodes from `immediately_dominates` that are also merge targets. Sorted in the same
135 /// order.
136 pub immediately_dominated_merge_targets: SmallVec<[BlockId; 2]>,
137 /// List of loops inside of which this node is (loops are identified by their header). A node
138 /// is considered inside a loop if it is reachable from the loop header and if it can reach the
139 /// loop header using only the backwards edges into it (i.e. we don't count a path that enters
140 /// the loop header through a forward edge).
141 ///
142 /// Note that we might have to take a backward edge to reach the loop header, e.g.:
143 /// 'a: loop {
144 /// // ...
145 /// 'b: loop {
146 /// // ...
147 /// if true {
148 /// continue 'a;
149 /// } else {
150 /// if true {
151 /// break 'a;
152 /// }
153 /// // This node has to take two backward edges in order to reach the start of `'a`.
154 /// }
155 /// }
156 /// }
157 ///
158 /// The restriction on backwards edges is for the following case:
159 /// loop {
160 /// loop {
161 /// ..
162 /// }
163 /// // Not in inner loop
164 /// }
165 ///
166 /// This is sorted by path order from the graph root.
167 pub within_loops: SmallVec<[BlockId; 2]>,
168 /// Node from where we can only reach error nodes (panic, etc.)
169 // TODO: track more nicely the set of targets reachable from a node: panic, return, exit loop,
170 // continue loop (this is a partial order).
171 pub only_reach_error: bool,
172 /// List of reachable nodes, with the length of shortest path to them. Includes the current
173 /// node.
174 pub shortest_paths: hashbrown::HashMap<BlockId, usize>,
175 /// Let's say we put a quantity of water equal to 1 on the block, and the water flows downards.
176 /// Whenever there is a branching, the quantity of water gets equally divided between the
177 /// branches. When the control flows join, we put the water back together. The set below
178 /// computes the amount of water received by each descendant of the node.
179 ///
180 /// TODO: there must be a known algorithm which computes this, right?...
181 /// This is exactly this problems:
182 /// <https://stackoverflow.com/questions/78221666/algorithm-for-total-flow-through-weighted-directed-acyclic-graph>
183 /// TODO: the way I compute this is not efficient.
184 pub flow: IndexVec<BlockId, BigRational>,
185 /// Reconstructed information about loops and switches.
186 pub exit_info: ExitInfo,
187}
188
189#[derive(Debug, Default, Clone)]
190struct ExitInfo {
191 /// The loop exit
192 loop_exit: Option<src::BlockId>,
193 /// The switch exit.
194 switch_exit: Option<src::BlockId>,
195}
196
197/// Error indicating that the control-flow graph is not reducible. The contained block id is a
198/// block involved in an irreducible subgraph.
199struct Irreducible(BlockId);
200
201impl CfgInfo {
202 /// Build the CFGs (the "regular" CFG and the CFG without backward edges) and precompute a
203 /// bunch of graph information about the CFG.
204 fn build(ctx: &TransformCtx, body: &src::BodyContents) -> Result<Self, Irreducible> {
205 // The steps in this function follow a precise order, as each step typically requires the
206 // previous one.
207 let start_block = BlockId::ZERO;
208
209 let empty_flow = body.map_ref(|_| BigRational::new(0u64.into(), 1u64.into()));
210 let mut block_data: IndexVec<BlockId, _> = body.map_ref_indexed(|id, contents| {
211 Box::new(BlockData {
212 id,
213 span: contents.terminator.span,
214 is_loop_header: false,
215 is_switch: false,
216 is_merge_target: false,
217 reverse_postorder: None,
218 immediately_dominates: Default::default(),
219 immediately_dominated_merge_targets: Default::default(),
220 within_loops: Default::default(),
221 only_reach_error: false,
222 shortest_paths: Default::default(),
223 flow: empty_flow.clone(),
224 exit_info: Default::default(),
225 })
226 });
227
228 // Build the node graph (we ignore unwind paths for now).
229 let mut cfg = Cfg::new();
230 for (block_id, block) in body.iter_enumerated() {
231 cfg.add_node(block_id);
232 for tgt in block.targets_ignoring_unwind() {
233 cfg.add_edge(block_id, tgt, ());
234 }
235 }
236
237 // Compute the dominator tree.
238 let dominator_tree = simple_fast(&cfg, start_block);
239
240 // Compute reverse postorder numbering.
241 for (i, block_id) in DfsPostOrder::new(&cfg, start_block).iter(&cfg).enumerate() {
242 let rev_post_id = body.len() - i;
243 block_data[block_id].reverse_postorder = Some(rev_post_id.try_into().unwrap());
244
245 // Store the dominator tree in `block_data`.
246 if let Some(dominator) = dominator_tree.immediate_dominator(block_id) {
247 block_data[dominator].immediately_dominates.push(block_id);
248 }
249 }
250
251 // Compute the forward graph (without backward edges). We do a dfs while keeping track of
252 // the path from the start node.
253 let mut fwd_cfg = Cfg::new();
254 let mut loop_entries = HashSet::new();
255 let mut switch_blocks = HashSet::new();
256 for block_id in Dfs::new(&cfg, start_block).iter(&cfg) {
257 fwd_cfg.add_node(block_id);
258
259 if body[block_id].terminator.kind.is_switch() {
260 switch_blocks.insert(block_id);
261 block_data[block_id].is_switch = true;
262 }
263
264 // Iterate over edges into this node (so that we can determine whether this node is a
265 // loop header).
266 let mut incoming_fwd_edges = 0;
267 for from in cfg.neighbors_directed(block_id, petgraph::Direction::Incoming) {
268 // Check if the edge is a backward edge.
269 if block_data[from].reverse_postorder >= block_data[block_id].reverse_postorder {
270 // This is a backward edge
271 block_data[block_id].is_loop_header = true;
272 loop_entries.insert(block_id);
273 // A cfg is reducible iff the target of every back edge dominates the
274 // edge's source.
275 if !dominator_tree.dominators(from).unwrap().contains(&block_id) {
276 return Err(Irreducible(from));
277 }
278 } else {
279 incoming_fwd_edges += 1;
280 fwd_cfg.add_edge(from, block_id, ());
281 }
282 }
283
284 // Detect merge targets.
285 if incoming_fwd_edges >= 2 {
286 block_data[block_id].is_merge_target = true;
287 }
288 }
289
290 // Finish filling in information.
291 for block_id in DfsPostOrder::new(&fwd_cfg, start_block).iter(&fwd_cfg) {
292 let block = &body[block_id];
293 let targets = cfg.neighbors(block_id).collect_vec();
294 let fwd_targets = fwd_cfg.neighbors(block_id).collect_vec();
295
296 // Compute the nodes that can only reach error nodes.
297 // The node can only reach error nodes if:
298 // - it is an error node;
299 // - or it has neighbors and they all lead to errors.
300 // Note that if there is a backward edge, `only_reach_error` cannot contain this
301 // node yet. In other words, this does not consider infinite loops as reaching an
302 // error node.
303 if block.terminator.is_error()
304 || (!targets.is_empty()
305 && targets.iter().all(|&tgt| block_data[tgt].only_reach_error))
306 {
307 block_data[block_id].only_reach_error = true;
308 }
309
310 // Compute the flows between each pair of nodes.
311 let mut flow: IndexVec<src::BlockId, BigRational> =
312 mem::take(&mut block_data[block_id].flow);
313 // The flow to self is 1.
314 flow[block_id] = BigRational::new(1u64.into(), 1u64.into());
315 // Divide the flow from each child to a given target block by the number of children.
316 // This is a sparse matrix multiplication and could be implemented using a linalg
317 // library.
318 let num_children: BigUint = fwd_targets.len().into();
319 for &child in &fwd_targets {
320 for grandchild in block_data[child].reachable_including_self() {
321 // Flow from `child` to `grandchild`
322 flow[grandchild] += &block_data[child].flow[grandchild] / &num_children;
323 }
324 }
325 block_data[block_id].flow = flow;
326
327 // Compute shortest paths to all reachable nodes in the forward graph.
328 block_data[block_id].shortest_paths = dijkstra(&fwd_cfg, block_id, None, |_| 1usize);
329
330 // Fill in the rest of the domination data.
331 let mut dominatees = mem::take(&mut block_data[block_id].immediately_dominates);
332 dominatees.sort_by_key(|&child| block_data[child].reverse_postorder);
333 dominatees.reverse();
334 block_data[block_id].immediately_dominates = dominatees;
335 block_data[block_id].immediately_dominated_merge_targets = block_data[block_id]
336 .immediately_dominates
337 .iter()
338 .copied()
339 .filter(|&child| block_data[child].is_merge_target)
340 .collect();
341 }
342
343 // Fill in the within_loop information. See the docs of `within_loops` to understand what
344 // we're computing.
345 let mut path_dfs = DfsWithPath::new(&cfg, start_block);
346 while let Some(block_id) = path_dfs.next(&cfg) {
347 // Store all the loops on the path to this
348 // node.
349 let mut within_loops: SmallVec<_> = path_dfs
350 .path
351 .iter()
352 .copied()
353 .filter(|&loop_id| block_data[loop_id].is_loop_header)
354 .collect();
355 // The loops that we can reach by taking a single backward edge.
356 let loops_directly_within = within_loops
357 .iter()
358 .copied()
359 .filter(|&loop_header| {
360 cfg.neighbors_directed(loop_header, petgraph::Direction::Incoming)
361 .any(|bid| block_data[block_id].shortest_paths.contains_key(&bid))
362 })
363 .collect_vec();
364 // The loops that we can reach by taking any number of backward edges.
365 let loops_indirectly_within: HashSet<_> = loops_directly_within
366 .iter()
367 .copied()
368 .flat_map(|loop_header| &block_data[loop_header].within_loops)
369 .chain(&loops_directly_within)
370 .copied()
371 .collect();
372 within_loops.retain(|id| loops_indirectly_within.contains(id));
373 block_data[block_id].within_loops = within_loops;
374 }
375
376 let mut cfg = CfgInfo {
377 cfg,
378 fwd_cfg,
379 loop_entries,
380 switch_blocks,
381 dominator_tree,
382 block_data,
383 };
384
385 // Pick an exit block for each loop, if we find one.
386 ExitInfo::compute_loop_exits(ctx, &mut cfg);
387
388 // Pick an exit block for each switch, if we find one.
389 ExitInfo::compute_switch_exits(&mut cfg);
390
391 Ok(cfg)
392 }
393
394 fn block_data(&self, block_id: BlockId) -> &BlockData {
395 &self.block_data[block_id]
396 }
397 // fn can_reach(&self, src: BlockId, tgt: BlockId) -> bool {
398 // self.block_data[src].shortest_paths.contains_key(&tgt)
399 // }
400 fn topo_rank(&self, block_id: BlockId) -> u32 {
401 self.block_data[block_id].reverse_postorder.unwrap()
402 }
403 #[expect(unused)]
404 fn is_backward_edge(&self, src: BlockId, tgt: BlockId) -> bool {
405 self.block_data[src].reverse_postorder >= self.block_data[tgt].reverse_postorder
406 && self.cfg.contains_edge(src, tgt)
407 }
408
409 /// Check if the node is within the given loop.
410 fn is_within_loop(&self, loop_header: src::BlockId, block_id: src::BlockId) -> bool {
411 self.block_data[block_id]
412 .within_loops
413 .contains(&loop_header)
414 }
415
416 /// Check if all paths from `src` to nodes in `target_set` go through `through_node`. If `src`
417 /// is already in `target_set`, we ignore that empty path.
418 fn all_paths_go_through(
419 &self,
420 src: src::BlockId,
421 through_node: src::BlockId,
422 target_set: &HashSet<src::BlockId>,
423 ) -> bool {
424 let graph = EdgeFiltered::from_fn(&self.fwd_cfg, |edge| edge.source() != through_node);
425 !Dfs::new(&graph, src)
426 .iter(&graph)
427 .skip(1) // skip src
428 .any(|bid| target_set.contains(&bid))
429 }
430}
431
432impl BlockData {
433 fn shortest_paths_including_self(&self) -> impl Iterator<Item = (BlockId, usize)> {
434 self.shortest_paths.iter().map(|(bid, d)| (*bid, *d))
435 }
436 fn shortest_paths_excluding_self(&self) -> impl Iterator<Item = (BlockId, usize)> {
437 self.shortest_paths_including_self()
438 .filter(move |&(bid, _)| bid != self.id)
439 }
440 fn reachable_including_self(&self) -> impl Iterator<Item = BlockId> {
441 self.shortest_paths_including_self().map(|(bid, _)| bid)
442 }
443 fn reachable_excluding_self(&self) -> impl Iterator<Item = BlockId> {
444 self.shortest_paths_excluding_self().map(|(bid, _)| bid)
445 }
446 #[expect(unused)]
447 fn can_reach_excluding_self(&self, other: BlockId) -> bool {
448 self.shortest_paths.contains_key(&other) && self.id != other
449 }
450}
451
452/// See [`ExitInfo::compute_loop_exit_ranks`].
453#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
454struct LoopExitRank {
455 /// Number of paths we found going to this exit.
456 path_count: usize,
457 /// Distance from the loop header.
458 distance_from_header: Reverse<usize>,
459}
460
461impl ExitInfo {
462 /// Compute the first node on each path that exits the loop.
463 fn compute_loop_exit_starting_points(
464 cfg: &CfgInfo,
465 loop_header: src::BlockId,
466 ) -> Vec<src::BlockId> {
467 let mut loop_exits = Vec::new();
468 // Do a dfs from the loop header while keeping track of the path from the loop header to
469 // the current node.
470 let mut dfs = Dfs::new(&cfg.fwd_cfg, loop_header);
471 while let Some(block_id) = dfs.next(&cfg.fwd_cfg) {
472 // If we've exited all the loops after and including the target one, this node is an
473 // exit node for the target loop.
474 if !cfg.is_within_loop(loop_header, block_id) {
475 loop_exits.push(block_id);
476 // Don't explore any more paths from this node.
477 dfs.discovered.extend(cfg.fwd_cfg.neighbors(block_id));
478 }
479 }
480 loop_exits
481 }
482
483 /// Compute the loop exit candidates along with a rank.
484 ///
485 /// In the simple case, there is one exit node through which all exit paths go. We want to be
486 /// sure to catch that case, and when that's not possible we want to still find a node through
487 /// which a lot of exit paths go.
488 ///
489 /// To do that, we first count for each exit node how many exit paths go through it, and pick
490 /// the node with most occurrences. If there are many such nodes, we pick the one with shortest
491 /// distance from the loop header. Finally if there are still many such nodes, we keep the
492 /// first node found (the order in which we explore the graph is deterministic, and we use an
493 /// insertion-order hash map).
494 ///
495 /// Note that exit candidates will typically be referenced more than once for one loop. This
496 /// comes from the fact that whenever we reach a node outside the current loop, we register
497 /// this node as well as all its children as exit candidates.
498 /// Consider the following example:
499 /// ```text
500 /// while i < max {
501 /// if cond {
502 /// break;
503 /// }
504 /// s += i;
505 /// i += 1
506 /// }
507 /// // All the below nodes are exit candidates (each of them is referenced twice)
508 /// s += 1;
509 /// return s;
510 /// ```
511 fn compute_loop_exit_ranks(
512 cfg: &CfgInfo,
513 loop_header: src::BlockId,
514 ) -> SeqHashMap<src::BlockId, LoopExitRank> {
515 let mut loop_exits: SeqHashMap<BlockId, LoopExitRank> = SeqHashMap::new();
516 for block_id in Self::compute_loop_exit_starting_points(cfg, loop_header) {
517 for bid in cfg.block_data(block_id).reachable_including_self() {
518 loop_exits
519 .entry(bid)
520 .or_insert_with(|| LoopExitRank {
521 path_count: 0,
522 distance_from_header: Reverse(
523 cfg.block_data[loop_header].shortest_paths[&bid],
524 ),
525 })
526 .path_count += 1;
527 }
528 }
529 loop_exits
530 }
531
532 /// A loop exit is any block reachable from the loop header that isn't inside the loop.
533 /// This function choses an exit for every loop. See `compute_loop_exit_ranks` for how we
534 /// select them.
535 ///
536 /// For example:
537 /// ```text
538 /// while ... {
539 /// ...
540 /// if ... {
541 /// // We can't reach the loop entry from here: this is an exit
542 /// // candidate
543 /// return;
544 /// }
545 /// }
546 /// // This is another exit candidate - and this is the one we want to use
547 /// // as the "real" exit...
548 /// ...
549 /// ```
550 ///
551 /// Once we listed all the exit candidates, we find the "best" one for every loop. The best
552 /// exit is the following one:
553 /// - it is the one which is used the most times (note that there can be
554 /// several candidates which are referenced strictly more than once: see the
555 /// comment below)
556 /// - if several exits have the same number of occurrences, we choose the one
557 /// for which we goto the "earliest" (earliest meaning that the goto is close to
558 /// the loop entry node in the AST). The reason is that all the loops should
559 /// have an outer if ... then ... else ... which executes the loop body or goes
560 /// to the exit (note that this is not necessarily the first
561 /// if ... then ... else ... we find: loop conditions can be arbitrary
562 /// expressions, containing branchings).
563 ///
564 /// # Several candidates for a loop exit:
565 /// =====================================
566 /// There used to be a sanity check to ensure there are no two different
567 /// candidates with exactly the same number of occurrences and distance from
568 /// the entry of the loop, if the number of occurrences is > 1.
569 ///
570 /// We removed it because it does happen, for instance here (the match
571 /// introduces an `unreachable` node, and it has the same number of
572 /// occurrences and the same distance to the loop entry as the `panic`
573 /// node):
574 ///
575 /// ```text
576 /// pub fn list_nth_mut_loop_pair<'a, T>(
577 /// mut ls: &'a mut List<T>,
578 /// mut i: u32,
579 /// ) -> &'a mut T {
580 /// loop {
581 /// match ls {
582 /// List::Nil => {
583 /// panic!() // <-- best candidate
584 /// }
585 /// List::Cons(x, tl) => {
586 /// if i == 0 {
587 /// return x;
588 /// } else {
589 /// ls = tl;
590 /// i -= 1;
591 /// }
592 /// }
593 /// _ => {
594 /// // Note that Rustc always introduces an unreachable branch after
595 /// // desugaring matches.
596 /// unreachable!(), // <-- best candidate
597 /// }
598 /// }
599 /// }
600 /// }
601 /// ```
602 ///
603 /// When this happens we choose an exit candidate whose edges don't necessarily
604 /// lead to an error (above there are none, so we don't choose any exits). Note
605 /// that this last condition is important to prevent loops from being unnecessarily
606 /// nested:
607 ///
608 /// ```text
609 /// pub fn nested_loops_enum(step_out: usize, step_in: usize) -> usize {
610 /// let mut s = 0;
611 ///
612 /// for _ in 0..128 { // We don't want this loop to be nested with the loops below
613 /// s += 1;
614 /// }
615 ///
616 /// for _ in 0..(step_out) {
617 /// for _ in 0..(step_in) {
618 /// s += 1;
619 /// }
620 /// }
621 ///
622 /// s
623 /// }
624 /// ```
625 fn compute_loop_exits(_ctx: &TransformCtx, cfg: &mut CfgInfo) {
626 for &loop_id in &cfg.loop_entries {
627 // Compute the candidates.
628 let loop_exits: SeqHashMap<BlockId, LoopExitRank> =
629 Self::compute_loop_exit_ranks(cfg, loop_id);
630 // We choose the exit with:
631 // - the most occurrences
632 // - the least total distance (if there are several possibilities)
633 // - doesn't necessarily lead to an error (panic, unreachable)
634 let best_exits: Vec<(BlockId, LoopExitRank)> =
635 loop_exits.into_iter().max_set_by_key(|&(_, rank)| rank);
636 // If there is exactly one best candidate, use it. Otherwise we need to split further.
637 let chosen_exit = match best_exits.into_iter().map(|(bid, _)| bid).exactly_one() {
638 Ok(best_exit) => Some(best_exit),
639 Err(best_exits) => {
640 // Remove the candidates which only lead to errors (panic or unreachable).
641 // If there is exactly one candidate we select it, otherwise we do not select any
642 // exit.
643 // We don't want to select any exit if we are in the below situation
644 // (all paths lead to errors). We added a sanity check below to
645 // catch the situations where there are several exits which don't
646 // lead to errors.
647 //
648 // Example:
649 // ========
650 // ```
651 // loop {
652 // match ls {
653 // List::Nil => {
654 // panic!() // <-- best candidate
655 // }
656 // List::Cons(x, tl) => {
657 // if i == 0 {
658 // return x;
659 // } else {
660 // ls = tl;
661 // i -= 1;
662 // }
663 // }
664 // _ => {
665 // unreachable!(); // <-- best candidate (Rustc introduces an `unreachable` case)
666 // }
667 // }
668 // }
669 // ```
670 // The `exactly_one` can fail, see `tests/ui/control-flow/ambiguous-loop-exit.rs`
671 best_exits
672 .filter(|&bid| !cfg.block_data[bid].only_reach_error)
673 .exactly_one()
674 .ok()
675 }
676 };
677 cfg.block_data[loop_id].exit_info.loop_exit = chosen_exit;
678 }
679 }
680
681 /// Let's consider the following piece of code:
682 /// ```text
683 /// if cond1 { ... } else { ... };
684 /// if cond2 { ... } else { ... };
685 /// ```
686 /// Once converted to MIR, the control-flow is destructured, which means we
687 /// have gotos everywhere. When reconstructing the control-flow, we have
688 /// to be careful about the point where we should join the two branches of
689 /// the first if.
690 /// For instance, if we don't notice they should be joined at some point (i.e,
691 /// whatever the branch we take, there is a moment when we go to the exact
692 /// same place, just before the second if), we might generate code like
693 /// this, with some duplicata:
694 /// ```text
695 /// if cond1 { ...; if cond2 { ... } else { ...} }
696 /// else { ...; if cond2 { ... } else { ...} }
697 /// ```
698 ///
699 /// Such a reconstructed program is valid, but it is definitely non-optimal:
700 /// it is very different from the original program (making it less clean and
701 /// clear), more bloated, and might involve duplicating the proof effort.
702 ///
703 /// For this reason, we need to find the "exit" of the first switch, which is
704 /// the point where the two branches join. Note that this can be a bit tricky,
705 /// because there may be more than two branches (if we do `switch(x) { ... }`),
706 /// and some of them might not join (if they contain a `break`, `panic`,
707 /// `return`, etc.).
708 ///
709 /// In order to compute the switch exits, we simply recursively compute a
710 /// topologically ordered set of "filtered successors" as follows (note
711 /// that we work in the CFG *without* back edges):
712 /// - for a block which doesn't branch (only one successor), the filtered
713 /// successors is the set of reachable nodes.
714 /// - for a block which branches, we compute the nodes reachable from all
715 /// the children, and find the "best" intersection between those.
716 /// Note that we find the "best" intersection (a pair of branches which
717 /// maximize the intersection of filtered successors) because some branches
718 /// might never join the control-flow of the other branches, if they contain
719 /// a `break`, `return`, `panic`, etc., like here:
720 /// ```text
721 /// if b { x = 3; } { return; }
722 /// y += x;
723 /// ...
724 /// ```
725 /// Note that with nested switches, the branches of the inner switches might
726 /// goto the exits of the outer switches: for this reason, we give precedence
727 /// to the outer switches.
728 fn compute_switch_exits(cfg: &mut CfgInfo) {
729 // We need to give precedence to the outer switches: we thus iterate
730 // over the switch blocks in topological order.
731 let mut exits_set = HashSet::new();
732 for bid in cfg
733 .switch_blocks
734 .iter()
735 .copied()
736 .sorted_unstable_by_key(|&bid| (cfg.topo_rank(bid), bid))
737 {
738 let block_data = &cfg.block_data[bid];
739 // Find the best successor: this is the node with the highest flow, and the lowest
740 // topological rank. If several nodes have the same flow, we want to take the highest
741 // one in the hierarchy: hence the use of the topological rank.
742 //
743 // Ex.:
744 // ```text
745 // A -- we start here
746 // |
747 // |---------------------------------------
748 // | | | |
749 // B:(0.25,-1) C:(0.25,-2) D:(0.25,-3) E:(0.25,-4)
750 // | | |
751 // |--------------------------
752 // |
753 // F:(0.75,-5)
754 // |
755 // |
756 // G:(0.75,-6)
757 // ```
758 // The "best" node (with the highest (flow, rank) in the graph above is F.
759 // If the switch is inside a loop, we also only consider exists that are inside that
760 // same loop. There must be one, otherwise the switch entry would not be inside the
761 // loop.
762 let current_loop = block_data.within_loops.last().copied();
763 let best_exit: Option<BlockId> = block_data
764 .reachable_excluding_self()
765 .filter(|&b| {
766 current_loop.is_none_or(|current_loop| cfg.is_within_loop(current_loop, b))
767 })
768 .max_by_key(|&id| {
769 let flow = &block_data.flow[id];
770 let rank = Reverse(cfg.topo_rank(id));
771 ((flow, rank), id)
772 });
773 // We have an exit candidate: we first check that it was not already taken by an
774 // external switch.
775 //
776 // We then check that we can't reach the exit of an external switch from one of the
777 // branches, without going through the exit candidate. We do this by simply checking
778 // that we can't reach any of the exits of outer switches.
779 //
780 // The reason is that it can lead to code like the following:
781 // ```
782 // if ... { // if #1
783 // if ... { // if #2
784 // ...
785 // // here, we have a `goto b1`, where b1 is the exit
786 // // of if #2: we thus stop translating the blocks.
787 // }
788 // else {
789 // ...
790 // // here, we have a `goto b2`, where b2 is the exit
791 // // of if #1: we thus stop translating the blocks.
792 // }
793 // // We insert code for the block b1 here (which is the exit of
794 // // the exit of if #2). However, this block should only
795 // // be executed in the branch "then" of the if #2, not in
796 // // the branch "else".
797 // ...
798 // }
799 // else {
800 // ...
801 // }
802 // ```
803 if let Some(exit_id) = best_exit
804 && !exits_set.contains(&exit_id)
805 && cfg.all_paths_go_through(bid, exit_id, &exits_set)
806 {
807 exits_set.insert(exit_id);
808 cfg.block_data[bid].exit_info.switch_exit = Some(exit_id);
809 }
810 }
811 }
812}
813
814/// Iter over the last non-switch statements that may be executed on any branch of this block.
815/// Skips over `Nop`s.
816fn iter_tail_statements(block: &mut tgt::Block, f: &mut impl FnMut(&mut tgt::Statement)) {
817 let Some(st) = block
818 .statements
819 .iter_mut()
820 .rev()
821 .find(|st| !st.kind.is_nop())
822 else {
823 return;
824 };
825 if let tgt::StatementKind::Switch(switch) = &mut st.kind {
826 for block in switch.iter_targets_mut() {
827 iter_tail_statements(block, f);
828 }
829 } else {
830 f(st)
831 };
832}
833
834type Depth = usize;
835
836#[derive(Debug, Clone, Copy)]
837enum SpecialJumpKind {
838 /// When encountering this block, `continue` to the given depth.
839 LoopContinue(Depth),
840 /// When encountering this block, `break` to the given depth. This comes from a loop.
841 LoopBreak(Depth),
842 /// When encountering this block, `break` to the given depth. This is a `loop` context
843 /// introduced only for forward jumps.
844 ForwardBreak(Depth),
845 /// When encountering this block, do nothing, as this is the next block that will be
846 /// translated.
847 NextBlock,
848}
849
850#[derive(Clone, Copy)]
851struct SpecialJump {
852 /// The relevant block.
853 target_block: BlockId,
854 /// How to translate a jump to the target block.
855 kind: SpecialJumpKind,
856}
857
858impl SpecialJump {
859 fn new(target_block: BlockId, kind: SpecialJumpKind) -> Self {
860 Self { target_block, kind }
861 }
862}
863
864impl std::fmt::Debug for SpecialJump {
865 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
866 write!(f, "SpecialJump({}, {:?})", self.target_block, self.kind)
867 }
868}
869
870/// How to handle blocks reachable from multiple branches.
871enum ReconstructMode {
872 /// Duplicate blocks reachable from multiple branches.
873 Duplicate,
874 /// Insert loops for the purpose of breaking forward to them to implement DAG-like control-flow
875 /// without duplicating blocks.
876 /// Based on the algorithm from "Beyond Relooper" (https://dl.acm.org/doi/10.1145/3547621).
877 ForwardBreak,
878}
879
880struct ReconstructCtx<'a> {
881 cfg: CfgInfo,
882 body: &'a src::ExprBody,
883 /// The depth of `loop` contexts we may `break`/`continue` to.
884 break_context_depth: Depth,
885 /// Stack of block ids that should be translated to special jumps (`break`/`continue`/do
886 /// nothing) in the current context.
887 /// The block where control-flow continues naturally after this block is kept at the top of the
888 /// stack.
889 special_jump_stack: Vec<SpecialJump>,
890 mode: ReconstructMode,
891}
892
893impl<'a> ReconstructCtx<'a> {
894 fn build(ctx: &TransformCtx, src_body: &'a src::ExprBody) -> Result<Self, Irreducible> {
895 // Compute all sorts of graph-related information about the control-flow graph, including
896 // reachability, the dominator tree, loop entries, and loop/switch exits.
897 let cfg = CfgInfo::build(ctx, &src_body.body)?;
898
899 // Translate the body by reconstructing the loops and the
900 // conditional branchings.
901 let allow_duplication = true;
902 Ok(ReconstructCtx {
903 cfg,
904 body: src_body,
905 break_context_depth: 0,
906 special_jump_stack: Vec::new(),
907 mode: if allow_duplication {
908 ReconstructMode::Duplicate
909 } else {
910 ReconstructMode::ForwardBreak
911 },
912 })
913 }
914
915 fn translate_statement(&self, st: &src::Statement) -> tgt::Statement {
916 let src_span = st.span;
917 let st = match st.kind.clone() {
918 src::StatementKind::Assign(place, rvalue) => tgt::StatementKind::Assign(place, rvalue),
919 src::StatementKind::SetDiscriminant(place, variant_id) => {
920 tgt::StatementKind::SetDiscriminant(place, variant_id)
921 }
922 src::StatementKind::CopyNonOverlapping(copy) => {
923 tgt::StatementKind::CopyNonOverlapping(copy)
924 }
925 src::StatementKind::StorageLive(var_id) => tgt::StatementKind::StorageLive(var_id),
926 src::StatementKind::StorageDead(var_id) => tgt::StatementKind::StorageDead(var_id),
927 src::StatementKind::Assert { assert, on_failure } => {
928 tgt::StatementKind::Assert { assert, on_failure }
929 }
930 src::StatementKind::PlaceMention(place) => tgt::StatementKind::PlaceMention(place),
931 src::StatementKind::Nop => tgt::StatementKind::Nop,
932 };
933 tgt::Statement::new(src_span, st)
934 }
935
936 /// Translate a jump to the given block. The span is used to create the jump statement, if any.
937 #[tracing::instrument(skip(self), ret, fields(stack = ?self.special_jump_stack))]
938 fn translate_jump(&mut self, span: Span, target_block: src::BlockId) -> tgt::Block {
939 match self
940 .special_jump_stack
941 .iter_mut()
942 .rev()
943 .enumerate()
944 .find(|(_, j)| j.target_block == target_block)
945 {
946 Some((i, jump_target)) => {
947 let mk_block = |kind| tgt::Statement::new(span, kind).into_block();
948 match jump_target.kind {
949 // The top of the stack is where control-flow goes naturally, no need to add a
950 // `break`/`continue`.
951 SpecialJumpKind::LoopContinue(_) | SpecialJumpKind::ForwardBreak(_)
952 if i == 0 && matches!(self.mode, ReconstructMode::ForwardBreak) =>
953 {
954 mk_block(tgt::StatementKind::Nop)
955 }
956 SpecialJumpKind::LoopContinue(depth) => mk_block(tgt::StatementKind::Continue(
957 self.break_context_depth - depth,
958 )),
959 SpecialJumpKind::ForwardBreak(depth) | SpecialJumpKind::LoopBreak(depth) => {
960 mk_block(tgt::StatementKind::Break(self.break_context_depth - depth))
961 }
962 SpecialJumpKind::NextBlock => mk_block(tgt::StatementKind::Nop),
963 }
964 }
965 // Translate the block without a jump.
966 None => return self.translate_block(target_block),
967 }
968 }
969
970 fn translate_terminator(&mut self, terminator: &src::Terminator) -> tgt::Block {
971 let src_span = terminator.span;
972
973 match &terminator.kind {
974 src::TerminatorKind::Abort(kind) => {
975 tgt::Statement::new(src_span, tgt::StatementKind::Abort(kind.clone())).into_block()
976 }
977 src::TerminatorKind::Return => {
978 tgt::Statement::new(src_span, tgt::StatementKind::Return).into_block()
979 }
980 src::TerminatorKind::UnwindResume => {
981 tgt::Statement::new(src_span, tgt::StatementKind::Abort(AbortKind::Panic(None)))
982 .into_block()
983 }
984 src::TerminatorKind::Call {
985 call,
986 target,
987 on_unwind: _,
988 } => {
989 // TODO: Have unwinds in the LLBC
990 let st = tgt::Statement::new(src_span, tgt::StatementKind::Call(call.clone()));
991 let mut block = self.translate_jump(terminator.span, *target);
992 block.statements.insert(0, st);
993 block
994 }
995 src::TerminatorKind::Drop {
996 kind,
997 place,
998 fn_ptr,
999 target,
1000 on_unwind: _,
1001 } => {
1002 // TODO: Have unwinds in the LLBC
1003 let st = tgt::Statement::new(
1004 src_span,
1005 tgt::StatementKind::Drop(place.clone(), fn_ptr.clone(), *kind),
1006 );
1007 let mut block = self.translate_jump(terminator.span, *target);
1008 block.statements.insert(0, st);
1009 block
1010 }
1011 src::TerminatorKind::Assert {
1012 assert,
1013 target,
1014 on_unwind,
1015 } => {
1016 let unwind_block = &self.body.body[*on_unwind];
1017 let on_failure = unwind_block.as_abort().unwrap_or(AbortKind::Panic(None));
1018 let st = tgt::StatementKind::Assert {
1019 assert: assert.clone(),
1020 on_failure,
1021 };
1022 let target = self.translate_jump(terminator.span, *target);
1023 tgt::Statement::new(src_span, st).into_block().merge(target)
1024 }
1025 src::TerminatorKind::InlineAsm {
1026 asm,
1027 targets,
1028 on_unwind: _,
1029 } => {
1030 // TODO: Have unwinds in the LLBC.
1031 let targets = targets
1032 .iter()
1033 .map(|target| self.translate_jump(terminator.span, *target))
1034 .collect();
1035 let st = tgt::StatementKind::InlineAsm {
1036 asm: asm.clone(),
1037 targets,
1038 };
1039 tgt::Statement::new(src_span, st).into_block()
1040 }
1041 src::TerminatorKind::Goto { target } => self.translate_jump(terminator.span, *target),
1042 src::TerminatorKind::Switch { discr, targets } => {
1043 // Translate the target expressions
1044 let switch = match &targets {
1045 src::SwitchTargets::If(then_tgt, else_tgt) => {
1046 let then_block = self.translate_jump(terminator.span, *then_tgt);
1047 let else_block = self.translate_jump(terminator.span, *else_tgt);
1048 tgt::Switch::If(discr.clone(), then_block, else_block)
1049 }
1050 src::SwitchTargets::SwitchInt(int_ty, targets, otherwise) => {
1051 // Note that some branches can be grouped together, like
1052 // here:
1053 // ```
1054 // match e {
1055 // E::V1 | E::V2 => ..., // Grouped
1056 // E::V3 => ...
1057 // }
1058 // ```
1059 // We detect this by checking if a block has already been
1060 // translated as one of the branches of the switch.
1061 //
1062 // Rk.: note there may be intermediate gotos depending
1063 // on the MIR we use. Typically, we manage to detect the
1064 // grouped branches with Optimized MIR, but not with Promoted
1065 // MIR. See the comment in "tests/src/matches.rs".
1066
1067 // We link block ids to:
1068 // - vector of matched integer values
1069 // - translated blocks
1070 let mut branches: SeqHashMap<src::BlockId, (Vec<Literal>, tgt::Block)> =
1071 SeqHashMap::new();
1072
1073 // Translate the children expressions
1074 for (v, bid) in targets.iter() {
1075 // Check if the block has already been translated:
1076 // if yes, it means we need to group branches
1077 if branches.contains_key(bid) {
1078 // Already translated: add the matched value to
1079 // the list of values
1080 let branch = branches.get_mut(bid).unwrap();
1081 branch.0.push(v.clone());
1082 } else {
1083 // Not translated: translate it
1084 let block = self.translate_jump(terminator.span, *bid);
1085 // We use the terminator span information in case then
1086 // then statement is `None`
1087 branches.insert(*bid, (vec![v.clone()], block));
1088 }
1089 }
1090 let targets_blocks: Vec<(Vec<Literal>, tgt::Block)> =
1091 branches.into_iter().map(|(_, x)| x).collect();
1092
1093 let otherwise_block = self.translate_jump(terminator.span, *otherwise);
1094
1095 // Translate
1096 tgt::Switch::SwitchInt(
1097 discr.clone(),
1098 *int_ty,
1099 targets_blocks,
1100 otherwise_block,
1101 )
1102 }
1103 };
1104
1105 // Return
1106 let span = tgt::combine_switch_targets_span(&switch);
1107 let span = combine_span(&src_span, &span);
1108 let st = tgt::StatementKind::Switch(switch);
1109 tgt::Statement::new(span, st).into_block()
1110 }
1111 }
1112 }
1113
1114 /// Translate just the block statements and terminator.
1115 fn translate_block_itself(&mut self, block_id: BlockId) -> tgt::Block {
1116 let block = &self.body.body[block_id];
1117 // Translate the statements inside the block
1118 let statements = block
1119 .statements
1120 .iter()
1121 .map(|st| self.translate_statement(st))
1122 .collect_vec();
1123 // Translate the terminator.
1124 let terminator = self.translate_terminator(&block.terminator);
1125 // Prepend the statements to the terminator.
1126 if let Some(st) = tgt::Block::from_seq(statements) {
1127 st.merge(terminator)
1128 } else {
1129 terminator
1130 }
1131 }
1132
1133 /// Translate a block including surrounding control-flow like looping.
1134 #[tracing::instrument(skip(self), fields(stack = ?self.special_jump_stack))]
1135 fn translate_block(&mut self, block_id: src::BlockId) -> tgt::Block {
1136 ensure_sufficient_stack(|| self.translate_block_inner(block_id))
1137 }
1138 fn translate_block_inner(&mut self, block_id: src::BlockId) -> tgt::Block {
1139 // Some of the blocks we might jump to inside this tree can't be translated as normal
1140 // blocks: the loop backward edges must become `continue`s and the merge nodes may need
1141 // some care if we're jumping to them from distant locations.
1142 // For this purpose, we push to the `special_jump_stack` the block ids that must be
1143 // translated specially. In `translate_jump` we check the stack. At the end of this
1144 // function we restore the stack to its previous state.
1145 let old_context_depth = self.special_jump_stack.len();
1146 let block_data = &self.cfg.block_data[block_id];
1147 let span = block_data.span;
1148
1149 // Catch jumps to the loop header or loop exit.
1150 if block_data.is_loop_header {
1151 self.break_context_depth += 1;
1152 if let Some(exit_id) = block_data.exit_info.loop_exit {
1153 self.special_jump_stack.push(SpecialJump::new(
1154 exit_id,
1155 SpecialJumpKind::LoopBreak(self.break_context_depth),
1156 ));
1157 }
1158 // Put the next block at the top of the stack.
1159 self.special_jump_stack.push(SpecialJump::new(
1160 block_id,
1161 SpecialJumpKind::LoopContinue(self.break_context_depth),
1162 ));
1163 }
1164
1165 // Catch jumps to a merge node.
1166 let merge_children = &block_data.immediately_dominated_merge_targets;
1167 if let ReconstructMode::ForwardBreak = self.mode {
1168 // We support forward-jumps using `break`
1169 // The child with highest postorder numbering is nested outermost in this scheme.
1170 for &child in merge_children {
1171 self.break_context_depth += 1;
1172 self.special_jump_stack.push(SpecialJump::new(
1173 child,
1174 SpecialJumpKind::ForwardBreak(self.break_context_depth),
1175 ));
1176 }
1177 }
1178
1179 if let Some(bid) = block_data.exit_info.switch_exit
1180 && !block_data.is_loop_header
1181 && !(matches!(self.mode, ReconstructMode::ForwardBreak)
1182 && merge_children.contains(&bid))
1183 {
1184 // Move some code that would be inside one or several switch branches to be after the
1185 // switch intead.
1186 self.special_jump_stack
1187 .push(SpecialJump::new(bid, SpecialJumpKind::NextBlock));
1188 }
1189
1190 // Translate this block. Any jumps to a loop header or a merge node will be replaced with
1191 // `continue`/`break`.
1192 let mut block = self.translate_block_itself(block_id);
1193
1194 // Reset the state to what it was previously, and translate what remains.
1195 let new_statement = move |kind| tgt::Statement::new(block.span, kind);
1196 while self.special_jump_stack.len() > old_context_depth {
1197 let special_jump = self.special_jump_stack.pop().unwrap();
1198 match &special_jump.kind {
1199 SpecialJumpKind::LoopContinue(_) => {
1200 self.break_context_depth -= 1;
1201 if let ReconstructMode::ForwardBreak = self.mode {
1202 // We add `continue` at the end for users that don't know that the default
1203 // behavior at the end of a loop block is `continue`. Not needed for
1204 // `Duplicate` mode because we use explicit `continue`s there. TODO: clean
1205 // that up.
1206 block
1207 .statements
1208 .push(new_statement(tgt::StatementKind::Continue(0)));
1209 }
1210 block = new_statement(tgt::StatementKind::Loop(block)).into_block();
1211 }
1212 SpecialJumpKind::ForwardBreak(_) => {
1213 self.break_context_depth -= 1;
1214 // Remove unneeded `break`s in branches leading up to that final one.
1215 iter_tail_statements(&mut block, &mut |st| {
1216 if matches!(st.kind, tgt::StatementKind::Break(0)) {
1217 st.kind = tgt::StatementKind::Nop;
1218 }
1219 });
1220 // We add a `loop { ...; break }` so that we can use `break` to jump forward.
1221 block
1222 .statements
1223 .push(new_statement(tgt::StatementKind::Break(0)));
1224 block = new_statement(tgt::StatementKind::Loop(block)).into_block();
1225 // We must translate the merge nodes after the block used for forward jumps to
1226 // them.
1227 let next_block = self.translate_jump(span, special_jump.target_block);
1228 block = block.merge(next_block);
1229 }
1230 SpecialJumpKind::NextBlock | SpecialJumpKind::LoopBreak(..) => {
1231 let next_block = self.translate_jump(span, special_jump.target_block);
1232 block = block.merge(next_block);
1233 }
1234 }
1235 }
1236 block
1237 }
1238}
1239
1240fn remove_useless_jump_blocks(body: &mut tgt::ExprBody) {
1241 use tgt::StatementKind;
1242 #[derive(Default)]
1243 struct Count {
1244 continue_count: u32,
1245 break_count: u32,
1246 }
1247 #[derive(Default, Visitor)]
1248 struct CountJumpsVisitor {
1249 counts: HashMap<StatementId, Count>,
1250 loop_stack: Vec<StatementId>,
1251 }
1252 #[derive(Visitor)]
1253 struct RemoveUselessJumpsVisitor {
1254 counts: HashMap<StatementId, Count>,
1255 /// For every loop we encounter, whether we're keeping it or removing it.
1256 loop_stack: Vec<bool>,
1257 }
1258
1259 impl VisitBodyMut for CountJumpsVisitor {
1260 fn visit_llbc_statement(&mut self, st: &mut tgt::Statement) -> ControlFlow<Self::Break> {
1261 if let StatementKind::Loop(_) = &st.kind {
1262 self.loop_stack.push(st.id);
1263 }
1264 match &st.kind {
1265 StatementKind::Break(depth) => {
1266 let loop_id = self.loop_stack[self.loop_stack.len() - 1 - depth];
1267 self.counts.entry(loop_id).or_default().break_count += 1;
1268 }
1269 StatementKind::Continue(depth) => {
1270 let loop_id = self.loop_stack[self.loop_stack.len() - 1 - depth];
1271 self.counts.entry(loop_id).or_default().continue_count += 1;
1272 }
1273 _ => {}
1274 }
1275 self.visit_inner(st)?;
1276 if let StatementKind::Loop(_) = &st.kind {
1277 self.loop_stack.pop();
1278 }
1279 ControlFlow::Continue(())
1280 }
1281 }
1282
1283 impl VisitBodyMut for RemoveUselessJumpsVisitor {
1284 fn visit_llbc_block(&mut self, block: &mut tgt::Block) -> ControlFlow<Self::Break> {
1285 for mut st in mem::take(&mut block.statements) {
1286 if let tgt::StatementKind::Loop(block) = &mut st.kind {
1287 let counts = &self.counts[&st.id];
1288 let remove = counts.continue_count == 0
1289 && counts.break_count == 1
1290 && matches!(
1291 block.statements.last().unwrap().kind,
1292 StatementKind::Break(0)
1293 );
1294 self.loop_stack.push(!remove);
1295 }
1296 self.visit(&mut st)?;
1297 if st.kind.is_loop() && !self.loop_stack.pop().unwrap() {
1298 // Remove the loop.
1299 let StatementKind::Loop(mut inner_block) = st.kind else {
1300 unreachable!()
1301 };
1302 inner_block.statements.last_mut().unwrap().kind = StatementKind::Nop;
1303 block.statements.extend(inner_block.statements);
1304 } else {
1305 block.statements.push(st);
1306 }
1307 }
1308 ControlFlow::Continue(())
1309 }
1310 fn enter_llbc_statement(&mut self, st: &mut tgt::Statement) {
1311 match &st.kind {
1312 StatementKind::Break(depth) => {
1313 let new_depth = self.loop_stack[self.loop_stack.len() - depth..]
1314 .iter()
1315 .filter(|&&keep| keep)
1316 .count();
1317 st.kind = StatementKind::Break(new_depth);
1318 }
1319 StatementKind::Continue(depth) => {
1320 let new_depth = self.loop_stack[self.loop_stack.len() - depth..]
1321 .iter()
1322 .filter(|&&keep| keep)
1323 .count();
1324 st.kind = StatementKind::Continue(new_depth);
1325 }
1326 _ => {}
1327 }
1328 }
1329 }
1330
1331 let mut v = CountJumpsVisitor::default();
1332 body.body.drive_body_mut(&mut v);
1333 let mut v = RemoveUselessJumpsVisitor {
1334 counts: v.counts,
1335 loop_stack: Default::default(),
1336 };
1337 body.body.drive_body_mut(&mut v);
1338}
1339
1340fn translate_body(ctx: &mut TransformCtx, body: &mut gast::Body) {
1341 use gast::Body::{Structured, Unstructured};
1342 let Unstructured(src_body) = body else {
1343 panic!("Called `ullbc_to_llbc` on an already restructured body")
1344 };
1345 trace!("About to translate to ullbc: {:?}", src_body.span);
1346
1347 // Calculate info about the graph and heuristically determine loop and switch exit blocks.
1348 let start_block = BlockId::ZERO;
1349 let mut ctx = match ReconstructCtx::build(ctx, src_body) {
1350 Ok(ctx) => ctx,
1351 Err(Irreducible(bid)) => {
1352 let span = src_body.body[bid].terminator.span;
1353 register_error!(
1354 ctx,
1355 span,
1356 "the control-flow graph of this function is not reducible"
1357 );
1358 panic!("can't reconstruct irreducible control-flow")
1359 }
1360 };
1361 // Translate the blocks using the computed data.
1362 let tgt_body = ctx.translate_block(start_block);
1363
1364 let mut tgt_body = tgt::ExprBody {
1365 span: src_body.span,
1366 locals: src_body.locals.clone(),
1367 bound_body_regions: src_body.bound_body_regions,
1368 body: tgt_body,
1369 comments: src_body.comments.clone(),
1370 };
1371 remove_useless_jump_blocks(&mut tgt_body);
1372
1373 *body = Structured(tgt_body);
1374}
1375
1376pub struct Transform;
1377impl TransformPass for Transform {
1378 fn transform_ctx(&self, ctx: &mut TransformCtx) {
1379 // Translate the bodies one at a time.
1380 ctx.for_each_body(|ctx, body| {
1381 translate_body(ctx, body);
1382 });
1383
1384 if ctx.options.print_built_llbc {
1385 eprintln!("# LLBC resulting from control-flow reconstruction:\n\n{ctx}\n",);
1386 } else {
1387 trace!("# LLBC resulting from control-flow reconstruction:\n\n{ctx}\n",);
1388 }
1389 }
1390}