rustc_ast_lowering/
format.rs

1use std::borrow::Cow;
2
3use rustc_ast::*;
4use rustc_data_structures::fx::FxIndexMap;
5use rustc_hir as hir;
6use rustc_session::config::FmtDebug;
7use rustc_span::{ByteSymbol, DesugaringKind, Ident, Span, Symbol, sym};
8
9use super::LoweringContext;
10
11impl<'hir> LoweringContext<'_, 'hir> {
12    pub(crate) fn lower_format_args(&mut self, sp: Span, fmt: &FormatArgs) -> hir::ExprKind<'hir> {
13        // Never call the const constructor of `fmt::Arguments` if the
14        // format_args!() had any arguments _before_ flattening/inlining.
15        let allow_const = fmt.arguments.all_args().is_empty();
16        let mut fmt = Cow::Borrowed(fmt);
17
18        let sp = self.mark_span_with_reason(
19            DesugaringKind::FormatLiteral { source: fmt.is_source_literal },
20            sp,
21            sp.ctxt().outer_expn_data().allow_internal_unstable,
22        );
23
24        if self.tcx.sess.opts.unstable_opts.flatten_format_args {
25            fmt = flatten_format_args(fmt);
26            fmt = self.inline_literals(fmt);
27        }
28        expand_format_args(self, sp, &fmt, allow_const)
29    }
30
31    /// Try to convert a literal into an interned string
32    fn try_inline_lit(&self, lit: token::Lit) -> Option<Symbol> {
33        match LitKind::from_token_lit(lit) {
34            Ok(LitKind::Str(s, _)) => Some(s),
35            Ok(LitKind::Int(n, ty)) => {
36                match ty {
37                    // unsuffixed integer literals are assumed to be i32's
38                    LitIntType::Unsuffixed => {
39                        (n <= i32::MAX as u128).then_some(Symbol::intern(&n.to_string()))
40                    }
41                    LitIntType::Signed(int_ty) => {
42                        let max_literal = self.int_ty_max(int_ty);
43                        (n <= max_literal).then_some(Symbol::intern(&n.to_string()))
44                    }
45                    LitIntType::Unsigned(uint_ty) => {
46                        let max_literal = self.uint_ty_max(uint_ty);
47                        (n <= max_literal).then_some(Symbol::intern(&n.to_string()))
48                    }
49                }
50            }
51            _ => None,
52        }
53    }
54
55    /// Get the maximum value of int_ty. It is platform-dependent due to the byte size of isize
56    fn int_ty_max(&self, int_ty: IntTy) -> u128 {
57        match int_ty {
58            IntTy::Isize => self.tcx.data_layout.pointer_size().signed_int_max() as u128,
59            IntTy::I8 => i8::MAX as u128,
60            IntTy::I16 => i16::MAX as u128,
61            IntTy::I32 => i32::MAX as u128,
62            IntTy::I64 => i64::MAX as u128,
63            IntTy::I128 => i128::MAX as u128,
64        }
65    }
66
67    /// Get the maximum value of uint_ty. It is platform-dependent due to the byte size of usize
68    fn uint_ty_max(&self, uint_ty: UintTy) -> u128 {
69        match uint_ty {
70            UintTy::Usize => self.tcx.data_layout.pointer_size().unsigned_int_max(),
71            UintTy::U8 => u8::MAX as u128,
72            UintTy::U16 => u16::MAX as u128,
73            UintTy::U32 => u32::MAX as u128,
74            UintTy::U64 => u64::MAX as u128,
75            UintTy::U128 => u128::MAX as u128,
76        }
77    }
78
79    /// Inline literals into the format string.
80    ///
81    /// Turns
82    ///
83    /// `format_args!("Hello, {}! {} {}", "World", 123, x)`
84    ///
85    /// into
86    ///
87    /// `format_args!("Hello, World! 123 {}", x)`.
88    fn inline_literals<'fmt>(&self, mut fmt: Cow<'fmt, FormatArgs>) -> Cow<'fmt, FormatArgs> {
89        let mut was_inlined = vec![false; fmt.arguments.all_args().len()];
90        let mut inlined_anything = false;
91
92        for i in 0..fmt.template.len() {
93            if let FormatArgsPiece::Placeholder(placeholder) = &fmt.template[i]
94                && let Ok(arg_index) = placeholder.argument.index
95                && let FormatTrait::Display = placeholder.format_trait
96                && placeholder.format_options == Default::default()
97                && let arg = fmt.arguments.all_args()[arg_index].expr.peel_parens_and_refs()
98                && let ExprKind::Lit(lit) = arg.kind
99                && let Some(literal) = self.try_inline_lit(lit)
100            {
101                // Now we need to mutate the outer FormatArgs.
102                // If this is the first time, this clones the outer FormatArgs.
103                let fmt = fmt.to_mut();
104                // Replace the placeholder with the literal.
105                fmt.template[i] = FormatArgsPiece::Literal(literal);
106                was_inlined[arg_index] = true;
107                inlined_anything = true;
108            }
109        }
110
111        // Remove the arguments that were inlined.
112        if inlined_anything {
113            let fmt = fmt.to_mut();
114
115            let mut remove = was_inlined;
116
117            // Don't remove anything that's still used.
118            for_all_argument_indexes(&mut fmt.template, |index| remove[*index] = false);
119
120            // Drop all the arguments that are marked for removal.
121            let mut remove_it = remove.iter();
122            fmt.arguments.all_args_mut().retain(|_| remove_it.next() != Some(&true));
123
124            // Calculate the mapping of old to new indexes for the remaining arguments.
125            let index_map: Vec<usize> = remove
126                .into_iter()
127                .scan(0, |i, remove| {
128                    let mapped = *i;
129                    *i += !remove as usize;
130                    Some(mapped)
131                })
132                .collect();
133
134            // Correct the indexes that refer to arguments that have shifted position.
135            for_all_argument_indexes(&mut fmt.template, |index| *index = index_map[*index]);
136        }
137
138        fmt
139    }
140}
141
142/// Flattens nested `format_args!()` into one.
143///
144/// Turns
145///
146/// `format_args!("a {} {} {}.", 1, format_args!("b{}!", 2), 3)`
147///
148/// into
149///
150/// `format_args!("a {} b{}! {}.", 1, 2, 3)`.
151fn flatten_format_args(mut fmt: Cow<'_, FormatArgs>) -> Cow<'_, FormatArgs> {
152    let mut i = 0;
153    while i < fmt.template.len() {
154        if let FormatArgsPiece::Placeholder(placeholder) = &fmt.template[i]
155            && let FormatTrait::Display | FormatTrait::Debug = &placeholder.format_trait
156            && let Ok(arg_index) = placeholder.argument.index
157            && let arg = fmt.arguments.all_args()[arg_index].expr.peel_parens_and_refs()
158            && let ExprKind::FormatArgs(_) = &arg.kind
159            // Check that this argument is not used by any other placeholders.
160            && fmt.template.iter().enumerate().all(|(j, p)|
161                i == j ||
162                !matches!(p, FormatArgsPiece::Placeholder(placeholder)
163                    if placeholder.argument.index == Ok(arg_index))
164            )
165        {
166            // Now we need to mutate the outer FormatArgs.
167            // If this is the first time, this clones the outer FormatArgs.
168            let fmt = fmt.to_mut();
169
170            // Take the inner FormatArgs out of the outer arguments, and
171            // replace it by the inner arguments. (We can't just put those at
172            // the end, because we need to preserve the order of evaluation.)
173
174            let args = fmt.arguments.all_args_mut();
175            let remaining_args = args.split_off(arg_index + 1);
176            let old_arg_offset = args.len();
177            let mut fmt2 = &mut args.pop().unwrap().expr; // The inner FormatArgs.
178            let fmt2 = loop {
179                // Unwrap the Expr to get to the FormatArgs.
180                match &mut fmt2.kind {
181                    ExprKind::Paren(inner) | ExprKind::AddrOf(BorrowKind::Ref, _, inner) => {
182                        fmt2 = inner
183                    }
184                    ExprKind::FormatArgs(fmt2) => break fmt2,
185                    _ => unreachable!(),
186                }
187            };
188
189            args.append(fmt2.arguments.all_args_mut());
190            let new_arg_offset = args.len();
191            args.extend(remaining_args);
192
193            // Correct the indexes that refer to the arguments after the newly inserted arguments.
194            for_all_argument_indexes(&mut fmt.template, |index| {
195                if *index >= old_arg_offset {
196                    *index -= old_arg_offset;
197                    *index += new_arg_offset;
198                }
199            });
200
201            // Now merge the placeholders:
202
203            let rest = fmt.template.split_off(i + 1);
204            fmt.template.pop(); // remove the placeholder for the nested fmt args.
205            // Insert the pieces from the nested format args, but correct any
206            // placeholders to point to the correct argument index.
207            for_all_argument_indexes(&mut fmt2.template, |index| *index += arg_index);
208            fmt.template.append(&mut fmt2.template);
209            fmt.template.extend(rest);
210
211            // Don't increment `i` here, so we recurse into the newly added pieces.
212        } else {
213            i += 1;
214        }
215    }
216    fmt
217}
218
219#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq)]
220enum ArgumentType {
221    Format(FormatTrait),
222    Usize,
223}
224
225/// Generate a hir expression representing an argument to a format_args invocation.
226///
227/// Generates:
228///
229/// ```text
230///     <core::fmt::Argument>::new_…(arg)
231/// ```
232fn make_argument<'hir>(
233    ctx: &mut LoweringContext<'_, 'hir>,
234    sp: Span,
235    arg: &'hir hir::Expr<'hir>,
236    ty: ArgumentType,
237) -> hir::Expr<'hir> {
238    use ArgumentType::*;
239    use FormatTrait::*;
240    let new_fn = ctx.arena.alloc(ctx.expr_lang_item_type_relative(
241        sp,
242        hir::LangItem::FormatArgument,
243        match ty {
244            Format(Display) => sym::new_display,
245            Format(Debug) => match ctx.tcx.sess.opts.unstable_opts.fmt_debug {
246                FmtDebug::Full | FmtDebug::Shallow => sym::new_debug,
247                FmtDebug::None => sym::new_debug_noop,
248            },
249            Format(LowerExp) => sym::new_lower_exp,
250            Format(UpperExp) => sym::new_upper_exp,
251            Format(Octal) => sym::new_octal,
252            Format(Pointer) => sym::new_pointer,
253            Format(Binary) => sym::new_binary,
254            Format(LowerHex) => sym::new_lower_hex,
255            Format(UpperHex) => sym::new_upper_hex,
256            Usize => sym::from_usize,
257        },
258    ));
259    ctx.expr_call_mut(sp, new_fn, std::slice::from_ref(arg))
260}
261
262/// Get the value for a `width` or `precision` field.
263///
264/// Returns the value and whether it is indirect (an indexed argument) or not.
265fn make_count(
266    count: &FormatCount,
267    argmap: &mut FxIndexMap<(usize, ArgumentType), Option<Span>>,
268) -> (bool, u16) {
269    match count {
270        FormatCount::Literal(n) => (false, *n),
271        FormatCount::Argument(arg) => (
272            true,
273            argmap.insert_full((arg.index.unwrap_or(usize::MAX), ArgumentType::Usize), arg.span).0
274                as u16,
275        ),
276    }
277}
278
279fn expand_format_args<'hir>(
280    ctx: &mut LoweringContext<'_, 'hir>,
281    macsp: Span,
282    fmt: &FormatArgs,
283    allow_const: bool,
284) -> hir::ExprKind<'hir> {
285    let macsp = ctx.lower_span(macsp);
286
287    // Create a list of all _unique_ (argument, format trait) combinations.
288    // E.g. "{0} {0:x} {0} {1}" -> [(0, Display), (0, LowerHex), (1, Display)]
289    //
290    // We use usize::MAX for arguments that don't exist, because that can never be a valid index
291    // into the arguments array.
292    let mut argmap = FxIndexMap::default();
293
294    let mut incomplete_lit = String::new();
295
296    let mut implicit_arg_index = 0;
297
298    let mut bytecode = Vec::new();
299
300    let template = if fmt.template.is_empty() {
301        // Treat empty templates as a single literal piece (with an empty string),
302        // so we produce `from_str("")` for those.
303        &[FormatArgsPiece::Literal(sym::empty)][..]
304    } else {
305        &fmt.template[..]
306    };
307
308    // See library/core/src/fmt/mod.rs for the format string encoding format.
309
310    for (i, piece) in template.iter().enumerate() {
311        match piece {
312            &FormatArgsPiece::Literal(sym) => {
313                // Coalesce adjacent literal pieces.
314                if let Some(FormatArgsPiece::Literal(_)) = template.get(i + 1) {
315                    incomplete_lit.push_str(sym.as_str());
316                    continue;
317                }
318                let mut s = if incomplete_lit.is_empty() {
319                    sym.as_str()
320                } else {
321                    incomplete_lit.push_str(sym.as_str());
322                    &incomplete_lit
323                };
324
325                // If this is the last piece and was the only piece, that means
326                // there are no placeholders and the entire format string is just a literal.
327                //
328                // In that case, we can just use `from_str`.
329                if i + 1 == template.len() && bytecode.is_empty() {
330                    // Generate:
331                    //     <core::fmt::Arguments>::from_str("meow")
332                    let from_str = ctx.arena.alloc(ctx.expr_lang_item_type_relative(
333                        macsp,
334                        hir::LangItem::FormatArguments,
335                        if allow_const { sym::from_str } else { sym::from_str_nonconst },
336                    ));
337                    let sym = if incomplete_lit.is_empty() { sym } else { Symbol::intern(s) };
338                    let s = ctx.expr_str(fmt.span, sym);
339                    let args = ctx.arena.alloc_from_iter([s]);
340                    return hir::ExprKind::Call(from_str, args);
341                }
342
343                // Encode the literal in chunks of up to u16::MAX bytes, split at utf-8 boundaries.
344                while !s.is_empty() {
345                    let len = s.floor_char_boundary(usize::from(u16::MAX));
346                    if len < 0x80 {
347                        bytecode.push(len as u8);
348                    } else {
349                        bytecode.push(0x80);
350                        bytecode.extend_from_slice(&(len as u16).to_le_bytes());
351                    }
352                    bytecode.extend(&s.as_bytes()[..len]);
353                    s = &s[len..];
354                }
355
356                incomplete_lit.clear();
357            }
358            FormatArgsPiece::Placeholder(p) => {
359                // Push the start byte and remember its index so we can set the option bits later.
360                let i = bytecode.len();
361                bytecode.push(0xC0);
362
363                let position = argmap
364                    .insert_full(
365                        (
366                            p.argument.index.unwrap_or(usize::MAX),
367                            ArgumentType::Format(p.format_trait),
368                        ),
369                        p.span,
370                    )
371                    .0 as u64;
372
373                // This needs to match the constants in library/core/src/fmt/mod.rs.
374                let o = &p.format_options;
375                let align = match o.alignment {
376                    Some(FormatAlignment::Left) => 0,
377                    Some(FormatAlignment::Right) => 1,
378                    Some(FormatAlignment::Center) => 2,
379                    None => 3,
380                };
381                let default_flags = 0x6000_0020;
382                let flags: u32 = o.fill.unwrap_or(' ') as u32
383                    | ((o.sign == Some(FormatSign::Plus)) as u32) << 21
384                    | ((o.sign == Some(FormatSign::Minus)) as u32) << 22
385                    | (o.alternate as u32) << 23
386                    | (o.zero_pad as u32) << 24
387                    | ((o.debug_hex == Some(FormatDebugHex::Lower)) as u32) << 25
388                    | ((o.debug_hex == Some(FormatDebugHex::Upper)) as u32) << 26
389                    | (o.width.is_some() as u32) << 27
390                    | (o.precision.is_some() as u32) << 28
391                    | align << 29;
392                if flags != default_flags {
393                    bytecode[i] |= 1;
394                    bytecode.extend_from_slice(&flags.to_le_bytes());
395                    if let Some(val) = &o.width {
396                        let (indirect, val) = make_count(val, &mut argmap);
397                        // Only encode if nonzero; zero is the default.
398                        if indirect || val != 0 {
399                            bytecode[i] |= 1 << 1 | (indirect as u8) << 4;
400                            bytecode.extend_from_slice(&val.to_le_bytes());
401                        }
402                    }
403                    if let Some(val) = &o.precision {
404                        let (indirect, val) = make_count(val, &mut argmap);
405                        // Only encode if nonzero; zero is the default.
406                        if indirect || val != 0 {
407                            bytecode[i] |= 1 << 2 | (indirect as u8) << 5;
408                            bytecode.extend_from_slice(&val.to_le_bytes());
409                        }
410                    }
411                }
412                if implicit_arg_index != position {
413                    bytecode[i] |= 1 << 3;
414                    bytecode.extend_from_slice(&(position as u16).to_le_bytes());
415                }
416                implicit_arg_index = position + 1;
417            }
418        }
419    }
420
421    assert!(incomplete_lit.is_empty());
422
423    // Zero terminator.
424    bytecode.push(0);
425
426    // Ensure all argument indexes actually fit in 16 bits, as we truncated them to 16 bits before.
427    if argmap.len() > u16::MAX as usize {
428        ctx.dcx().span_err(macsp, "too many format arguments");
429    }
430
431    let arguments = fmt.arguments.all_args();
432
433    let (let_statements, args) = if arguments.is_empty() {
434        // Generate:
435        //     []
436        (vec![], ctx.arena.alloc(ctx.expr(macsp, hir::ExprKind::Array(&[]))))
437    } else {
438        // Generate:
439        //     super let args = (&arg0, &arg1, &…);
440        let args_ident = Ident::new(sym::args, macsp);
441        let (args_pat, args_hir_id) = ctx.pat_ident(macsp, args_ident);
442        let elements = ctx.arena.alloc_from_iter(arguments.iter().map(|arg| {
443            let arg_expr = ctx.lower_expr(&arg.expr);
444            ctx.expr(
445                arg.expr.span.with_ctxt(macsp.ctxt()),
446                hir::ExprKind::AddrOf(hir::BorrowKind::Ref, hir::Mutability::Not, arg_expr),
447            )
448        }));
449        let args_tuple = ctx.arena.alloc(ctx.expr(macsp, hir::ExprKind::Tup(elements)));
450        let let_statement_1 = ctx.stmt_super_let_pat(macsp, args_pat, Some(args_tuple));
451
452        // Generate:
453        //     super let args = [
454        //         <core::fmt::Argument>::new_display(args.0),
455        //         <core::fmt::Argument>::new_lower_hex(args.1),
456        //         <core::fmt::Argument>::new_debug(args.0),
457        //         …
458        //     ];
459        let args = ctx.arena.alloc_from_iter(argmap.iter().map(
460            |(&(arg_index, ty), &placeholder_span)| {
461                if let Some(arg) = arguments.get(arg_index) {
462                    let placeholder_span =
463                        placeholder_span.unwrap_or(arg.expr.span).with_ctxt(macsp.ctxt());
464                    let arg_span = match arg.kind {
465                        FormatArgumentKind::Captured(_) => placeholder_span,
466                        _ => arg.expr.span.with_ctxt(macsp.ctxt()),
467                    };
468                    let args_ident_expr = ctx.expr_ident(macsp, args_ident, args_hir_id);
469                    let arg = ctx.arena.alloc(ctx.expr(
470                        arg_span,
471                        hir::ExprKind::Field(
472                            args_ident_expr,
473                            Ident::new(sym::integer(arg_index), macsp),
474                        ),
475                    ));
476                    make_argument(ctx, placeholder_span, arg, ty)
477                } else {
478                    ctx.expr(
479                        macsp,
480                        hir::ExprKind::Err(
481                            ctx.dcx().span_delayed_bug(macsp, "missing format_args argument"),
482                        ),
483                    )
484                }
485            },
486        ));
487        let args = ctx.arena.alloc(ctx.expr(macsp, hir::ExprKind::Array(args)));
488        let (args_pat, args_hir_id) = ctx.pat_ident(macsp, args_ident);
489        let let_statement_2 = ctx.stmt_super_let_pat(macsp, args_pat, Some(args));
490        (
491            vec![let_statement_1, let_statement_2],
492            ctx.arena.alloc(ctx.expr_ident_mut(macsp, args_ident, args_hir_id)),
493        )
494    };
495
496    // Generate:
497    //     unsafe {
498    //         <core::fmt::Arguments>::new(b"…", &args)
499    //     }
500    let template = ctx.expr_byte_str(macsp, ByteSymbol::intern(&bytecode));
501    let call = {
502        let new = ctx.arena.alloc(ctx.expr_lang_item_type_relative(
503            macsp,
504            hir::LangItem::FormatArguments,
505            sym::new,
506        ));
507        let args = ctx.expr_ref(macsp, args);
508        let new_args = ctx.arena.alloc_from_iter([template, args]);
509        ctx.expr_call(macsp, new, new_args)
510    };
511    let call = hir::ExprKind::Block(
512        ctx.arena.alloc(hir::Block {
513            stmts: &[],
514            expr: Some(call),
515            hir_id: ctx.next_id(),
516            rules: hir::BlockCheckMode::UnsafeBlock(hir::UnsafeSource::CompilerGenerated),
517            span: macsp,
518            targeted_by_break: false,
519        }),
520        None,
521    );
522
523    if !let_statements.is_empty() {
524        // Generate:
525        //     {
526        //         super let …
527        //         super let …
528        //         <core::fmt::Arguments>::new(…)
529        //     }
530        let call = ctx.arena.alloc(ctx.expr(macsp, call));
531        let block = ctx.block_all(macsp, ctx.arena.alloc_from_iter(let_statements), Some(call));
532        hir::ExprKind::Block(block, None)
533    } else {
534        call
535    }
536}
537
538fn for_all_argument_indexes(template: &mut [FormatArgsPiece], mut f: impl FnMut(&mut usize)) {
539    for piece in template {
540        let FormatArgsPiece::Placeholder(placeholder) = piece else { continue };
541        if let Ok(index) = &mut placeholder.argument.index {
542            f(index);
543        }
544        if let Some(FormatCount::Argument(FormatArgPosition { index: Ok(index), .. })) =
545            &mut placeholder.format_options.width
546        {
547            f(index);
548        }
549        if let Some(FormatCount::Argument(FormatArgPosition { index: Ok(index), .. })) =
550            &mut placeholder.format_options.precision
551        {
552            f(index);
553        }
554    }
555}