rustc_mir_dataflow/framework/mod.rs
1//! A framework that can express both [gen-kill] and generic dataflow problems.
2//!
3//! To use this framework, implement the [`Analysis`] trait. There used to be a `GenKillAnalysis`
4//! alternative trait for gen-kill analyses that would pre-compute the transfer function for each
5//! block. It was intended as an optimization, but it ended up not being any faster than
6//! `Analysis`.
7//!
8//! The `impls` module contains several examples of dataflow analyses.
9//!
10//! Then call `iterate_to_fixpoint` on your type that impls `Analysis` to get a `Results`. From
11//! there, you can use a `ResultsCursor` to inspect the fixpoint solution to your dataflow problem
12//! (good for inspecting a small number of locations), or implement the `ResultsVisitor` interface
13//! and use `visit_results` (good for inspecting many or all locations). The following example uses
14//! the `ResultsCursor` approach.
15//!
16//! ```ignore (cross-crate-imports)
17//! use rustc_const_eval::dataflow::Analysis; // Makes `iterate_to_fixpoint` available.
18//!
19//! fn do_my_analysis(tcx: TyCtxt<'tcx>, body: &mir::Body<'tcx>) {
20//! let analysis = MyAnalysis::new()
21//! .iterate_to_fixpoint(tcx, body, None)
22//! .into_results_cursor(body);
23//!
24//! // Print the dataflow state *after* each statement in the start block.
25//! for (_, statement_index) in body.block_data[START_BLOCK].statements.iter_enumerated() {
26//! cursor.seek_after(Location { block: START_BLOCK, statement_index });
27//! let state = cursor.get();
28//! println!("{:?}", state);
29//! }
30//! }
31//! ```
32//!
33//! [gen-kill]: https://en.wikipedia.org/wiki/Data-flow_analysis#Bit_vector_problems
34
35use std::cmp::Ordering;
36
37use rustc_data_structures::work_queue::WorkQueue;
38use rustc_index::bit_set::{DenseBitSet, MixedBitSet};
39use rustc_index::{Idx, IndexVec};
40use rustc_middle::bug;
41use rustc_middle::mir::{
42 self, BasicBlock, CallReturnPlaces, Location, SwitchTargetValue, TerminatorEdges, traversal,
43};
44use rustc_middle::ty::TyCtxt;
45use tracing::error;
46
47use self::graphviz::write_graphviz_results;
48use super::fmt::DebugWithContext;
49
50mod cursor;
51mod direction;
52pub mod fmt;
53pub mod graphviz;
54pub mod lattice;
55mod results;
56mod visitor;
57
58pub use self::cursor::ResultsCursor;
59pub use self::direction::{Backward, Direction, Forward};
60pub use self::lattice::{JoinSemiLattice, MaybeReachable};
61pub(crate) use self::results::AnalysisAndResults;
62pub use self::results::Results;
63pub use self::visitor::{ResultsVisitor, visit_reachable_results, visit_results};
64
65/// Analysis domains are all bitsets of various kinds. This trait holds
66/// operations needed by all of them.
67pub trait BitSetExt<T> {
68 fn contains(&self, elem: T) -> bool;
69}
70
71impl<T: Idx> BitSetExt<T> for DenseBitSet<T> {
72 fn contains(&self, elem: T) -> bool {
73 self.contains(elem)
74 }
75}
76
77impl<T: Idx> BitSetExt<T> for MixedBitSet<T> {
78 fn contains(&self, elem: T) -> bool {
79 self.contains(elem)
80 }
81}
82
83/// A dataflow problem with an arbitrarily complex transfer function.
84///
85/// This trait specifies the lattice on which this analysis operates (the domain), its
86/// initial value at the entry point of each basic block, and various operations.
87///
88/// # Convergence
89///
90/// When implementing this trait it's possible to choose a transfer function such that the analysis
91/// does not reach fixpoint. To guarantee convergence, your transfer functions must maintain the
92/// following invariant:
93///
94/// > If the dataflow state **before** some point in the program changes to be greater
95/// than the prior state **before** that point, the dataflow state **after** that point must
96/// also change to be greater than the prior state **after** that point.
97///
98/// This invariant guarantees that the dataflow state at a given point in the program increases
99/// monotonically until fixpoint is reached. Note that this monotonicity requirement only applies
100/// to the same point in the program at different points in time. The dataflow state at a given
101/// point in the program may or may not be greater than the state at any preceding point.
102pub trait Analysis<'tcx> {
103 /// The type that holds the dataflow state at any given point in the program.
104 type Domain: Clone + JoinSemiLattice;
105
106 /// The direction of this analysis. Either `Forward` or `Backward`.
107 type Direction: Direction = Forward;
108
109 /// Auxiliary data used for analyzing `SwitchInt` terminators, if necessary.
110 type SwitchIntData = !;
111
112 /// A descriptive name for this analysis. Used only for debugging.
113 ///
114 /// This name should be brief and contain no spaces, periods or other characters that are not
115 /// suitable as part of a filename.
116 const NAME: &'static str;
117
118 /// Returns the initial value of the dataflow state upon entry to each basic block.
119 fn bottom_value(&self, body: &mir::Body<'tcx>) -> Self::Domain;
120
121 /// Mutates the initial value of the dataflow state upon entry to the `START_BLOCK`.
122 ///
123 /// For backward analyses, initial state (besides the bottom value) is not yet supported. Trying
124 /// to mutate the initial state will result in a panic.
125 //
126 // FIXME: For backward dataflow analyses, the initial state should be applied to every basic
127 // block where control flow could exit the MIR body (e.g., those terminated with `return` or
128 // `resume`). It's not obvious how to handle `yield` points in coroutines, however.
129 fn initialize_start_block(&self, body: &mir::Body<'tcx>, state: &mut Self::Domain);
130
131 /// Updates the current dataflow state with an "early" effect, i.e. one
132 /// that occurs immediately before the given statement.
133 ///
134 /// This method is useful if the consumer of the results of this analysis only needs to observe
135 /// *part* of the effect of a statement (e.g. for two-phase borrows). As a general rule,
136 /// analyses should not implement this without also implementing
137 /// `apply_primary_statement_effect`.
138 fn apply_early_statement_effect(
139 &mut self,
140 _state: &mut Self::Domain,
141 _statement: &mir::Statement<'tcx>,
142 _location: Location,
143 ) {
144 }
145
146 /// Updates the current dataflow state with the effect of evaluating a statement.
147 fn apply_primary_statement_effect(
148 &mut self,
149 state: &mut Self::Domain,
150 statement: &mir::Statement<'tcx>,
151 location: Location,
152 );
153
154 /// Updates the current dataflow state with an effect that occurs immediately *before* the
155 /// given terminator.
156 ///
157 /// This method is useful if the consumer of the results of this analysis needs only to observe
158 /// *part* of the effect of a terminator (e.g. for two-phase borrows). As a general rule,
159 /// analyses should not implement this without also implementing
160 /// `apply_primary_terminator_effect`.
161 fn apply_early_terminator_effect(
162 &mut self,
163 _state: &mut Self::Domain,
164 _terminator: &mir::Terminator<'tcx>,
165 _location: Location,
166 ) {
167 }
168
169 /// Updates the current dataflow state with the effect of evaluating a terminator.
170 ///
171 /// The effect of a successful return from a `Call` terminator should **not** be accounted for
172 /// in this function. That should go in `apply_call_return_effect`. For example, in the
173 /// `InitializedPlaces` analyses, the return place for a function call is not marked as
174 /// initialized here.
175 fn apply_primary_terminator_effect<'mir>(
176 &mut self,
177 _state: &mut Self::Domain,
178 terminator: &'mir mir::Terminator<'tcx>,
179 _location: Location,
180 ) -> TerminatorEdges<'mir, 'tcx> {
181 terminator.edges()
182 }
183
184 /* Edge-specific effects */
185
186 /// Updates the current dataflow state with the effect of a successful return from a `Call`
187 /// terminator.
188 ///
189 /// This is separate from `apply_primary_terminator_effect` to properly track state across
190 /// unwind edges.
191 fn apply_call_return_effect(
192 &mut self,
193 _state: &mut Self::Domain,
194 _block: BasicBlock,
195 _return_places: CallReturnPlaces<'_, 'tcx>,
196 ) {
197 }
198
199 /// Used to update the current dataflow state with the effect of taking a particular branch in
200 /// a `SwitchInt` terminator.
201 ///
202 /// Unlike the other edge-specific effects, which are allowed to mutate `Self::Domain`
203 /// directly, overriders of this method must return a `Self::SwitchIntData` value (wrapped in
204 /// `Some`). The `apply_switch_int_edge_effect` method will then be called once for each
205 /// outgoing edge and will have access to the dataflow state that will be propagated along that
206 /// edge, and also the `Self::SwitchIntData` value.
207 ///
208 /// This interface is somewhat more complex than the other visitor-like "effect" methods.
209 /// However, it is both more ergonomic—callers don't need to recompute or cache information
210 /// about a given `SwitchInt` terminator for each one of its edges—and more efficient—the
211 /// engine doesn't need to clone the exit state for a block unless
212 /// `get_switch_int_data` is actually called.
213 fn get_switch_int_data(
214 &mut self,
215 _block: mir::BasicBlock,
216 _discr: &mir::Operand<'tcx>,
217 ) -> Option<Self::SwitchIntData> {
218 None
219 }
220
221 /// See comments on `get_switch_int_data`.
222 fn apply_switch_int_edge_effect(
223 &mut self,
224 _data: &mut Self::SwitchIntData,
225 _state: &mut Self::Domain,
226 _value: SwitchTargetValue,
227 _targets: &mir::SwitchTargets,
228 ) {
229 unreachable!();
230 }
231
232 /* Extension methods */
233
234 /// Finds the fixpoint for this dataflow problem.
235 ///
236 /// You shouldn't need to override this. Its purpose is to enable method chaining like so:
237 ///
238 /// ```ignore (cross-crate-imports)
239 /// let results = MyAnalysis::new(tcx, body)
240 /// .iterate_to_fixpoint(tcx, body, None)
241 /// .into_results_cursor(body);
242 /// ```
243 /// You can optionally add a `pass_name` to the graphviz output for this particular run of a
244 /// dataflow analysis. Some analyses are run multiple times in the compilation pipeline.
245 /// Without a `pass_name` to differentiates them, only the results for the latest run will be
246 /// saved.
247 fn iterate_to_fixpoint<'mir>(
248 mut self,
249 tcx: TyCtxt<'tcx>,
250 body: &'mir mir::Body<'tcx>,
251 pass_name: Option<&'static str>,
252 ) -> AnalysisAndResults<'tcx, Self>
253 where
254 Self: Sized,
255 Self::Domain: DebugWithContext<Self>,
256 {
257 let mut results = IndexVec::from_fn_n(|_| self.bottom_value(body), body.basic_blocks.len());
258 self.initialize_start_block(body, &mut results[mir::START_BLOCK]);
259
260 if Self::Direction::IS_BACKWARD && results[mir::START_BLOCK] != self.bottom_value(body) {
261 bug!("`initialize_start_block` is not yet supported for backward dataflow analyses");
262 }
263
264 let mut dirty_queue: WorkQueue<BasicBlock> = WorkQueue::with_none(body.basic_blocks.len());
265
266 if Self::Direction::IS_FORWARD {
267 for (bb, _) in traversal::reverse_postorder(body) {
268 dirty_queue.insert(bb);
269 }
270 } else {
271 // Reverse post-order on the reverse CFG may generate a better iteration order for
272 // backward dataflow analyses, but probably not enough to matter.
273 for (bb, _) in traversal::postorder(body) {
274 dirty_queue.insert(bb);
275 }
276 }
277
278 // `state` is not actually used between iterations;
279 // this is just an optimization to avoid reallocating
280 // every iteration.
281 let mut state = self.bottom_value(body);
282 while let Some(bb) = dirty_queue.pop() {
283 // Set the state to the entry state of the block. This is equivalent to `state =
284 // results[bb].clone()`, but it saves an allocation, thus improving compile times.
285 state.clone_from(&results[bb]);
286
287 Self::Direction::apply_effects_in_block(
288 &mut self,
289 body,
290 &mut state,
291 bb,
292 &body[bb],
293 |target: BasicBlock, state: &Self::Domain| {
294 let set_changed = results[target].join(state);
295 if set_changed {
296 dirty_queue.insert(target);
297 }
298 },
299 );
300 }
301
302 if tcx.sess.opts.unstable_opts.dump_mir_dataflow {
303 let res = write_graphviz_results(tcx, body, &mut self, &results, pass_name);
304 if let Err(e) = res {
305 error!("Failed to write graphviz dataflow results: {}", e);
306 }
307 }
308
309 AnalysisAndResults { analysis: self, results }
310 }
311}
312
313/// The legal operations for a transfer function in a gen/kill problem.
314pub trait GenKill<T> {
315 /// Inserts `elem` into the state vector.
316 fn gen_(&mut self, elem: T);
317
318 /// Removes `elem` from the state vector.
319 fn kill(&mut self, elem: T);
320
321 /// Calls `gen` for each element in `elems`.
322 fn gen_all(&mut self, elems: impl IntoIterator<Item = T>) {
323 for elem in elems {
324 self.gen_(elem);
325 }
326 }
327
328 /// Calls `kill` for each element in `elems`.
329 fn kill_all(&mut self, elems: impl IntoIterator<Item = T>) {
330 for elem in elems {
331 self.kill(elem);
332 }
333 }
334}
335
336impl<T: Idx> GenKill<T> for DenseBitSet<T> {
337 fn gen_(&mut self, elem: T) {
338 self.insert(elem);
339 }
340
341 fn kill(&mut self, elem: T) {
342 self.remove(elem);
343 }
344}
345
346impl<T: Idx> GenKill<T> for MixedBitSet<T> {
347 fn gen_(&mut self, elem: T) {
348 self.insert(elem);
349 }
350
351 fn kill(&mut self, elem: T) {
352 self.remove(elem);
353 }
354}
355
356impl<T, S: GenKill<T>> GenKill<T> for MaybeReachable<S> {
357 fn gen_(&mut self, elem: T) {
358 match self {
359 // If the state is not reachable, adding an element does nothing.
360 MaybeReachable::Unreachable => {}
361 MaybeReachable::Reachable(set) => set.gen_(elem),
362 }
363 }
364
365 fn kill(&mut self, elem: T) {
366 match self {
367 // If the state is not reachable, killing an element does nothing.
368 MaybeReachable::Unreachable => {}
369 MaybeReachable::Reachable(set) => set.kill(elem),
370 }
371 }
372}
373
374// NOTE: DO NOT CHANGE VARIANT ORDER. The derived `Ord` impls rely on the current order.
375#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
376enum Effect {
377 /// The "early" effect (e.g., `apply_early_statement_effect`) for a statement/terminator.
378 Early,
379
380 /// The "primary" effect (e.g., `apply_primary_statement_effect`) for a statement/terminator.
381 Primary,
382}
383
384impl Effect {
385 const fn at_index(self, statement_index: usize) -> EffectIndex {
386 EffectIndex { effect: self, statement_index }
387 }
388}
389
390#[derive(Clone, Copy, Debug, PartialEq, Eq)]
391pub struct EffectIndex {
392 statement_index: usize,
393 effect: Effect,
394}
395
396impl EffectIndex {
397 fn next_in_forward_order(self) -> Self {
398 match self.effect {
399 Effect::Early => Effect::Primary.at_index(self.statement_index),
400 Effect::Primary => Effect::Early.at_index(self.statement_index + 1),
401 }
402 }
403
404 fn next_in_backward_order(self) -> Self {
405 match self.effect {
406 Effect::Early => Effect::Primary.at_index(self.statement_index),
407 Effect::Primary => Effect::Early.at_index(self.statement_index - 1),
408 }
409 }
410
411 /// Returns `true` if the effect at `self` should be applied earlier than the effect at `other`
412 /// in forward order.
413 fn precedes_in_forward_order(self, other: Self) -> bool {
414 let ord = self
415 .statement_index
416 .cmp(&other.statement_index)
417 .then_with(|| self.effect.cmp(&other.effect));
418 ord == Ordering::Less
419 }
420
421 /// Returns `true` if the effect at `self` should be applied earlier than the effect at `other`
422 /// in backward order.
423 fn precedes_in_backward_order(self, other: Self) -> bool {
424 let ord = other
425 .statement_index
426 .cmp(&self.statement_index)
427 .then_with(|| self.effect.cmp(&other.effect));
428 ord == Ordering::Less
429 }
430}
431
432#[cfg(test)]
433mod tests;