rustc_codegen_llvm/
va_arg.rs

1use rustc_abi::{Align, BackendRepr, Endian, HasDataLayout, Primitive, Size, TyAndLayout};
2use rustc_codegen_ssa::MemFlags;
3use rustc_codegen_ssa::common::IntPredicate;
4use rustc_codegen_ssa::mir::operand::OperandRef;
5use rustc_codegen_ssa::traits::{
6    BaseTypeCodegenMethods, BuilderMethods, ConstCodegenMethods, LayoutTypeCodegenMethods,
7};
8use rustc_middle::ty::Ty;
9use rustc_middle::ty::layout::{HasTyCtxt, LayoutOf};
10
11use crate::builder::Builder;
12use crate::type_::Type;
13use crate::type_of::LayoutLlvmExt;
14use crate::value::Value;
15
16fn round_up_to_alignment<'ll>(
17    bx: &mut Builder<'_, 'll, '_>,
18    mut value: &'ll Value,
19    align: Align,
20) -> &'ll Value {
21    value = bx.add(value, bx.cx().const_i32(align.bytes() as i32 - 1));
22    return bx.and(value, bx.cx().const_i32(-(align.bytes() as i32)));
23}
24
25fn round_pointer_up_to_alignment<'ll>(
26    bx: &mut Builder<'_, 'll, '_>,
27    addr: &'ll Value,
28    align: Align,
29    ptr_ty: &'ll Type,
30) -> &'ll Value {
31    let mut ptr_as_int = bx.ptrtoint(addr, bx.cx().type_isize());
32    ptr_as_int = round_up_to_alignment(bx, ptr_as_int, align);
33    bx.inttoptr(ptr_as_int, ptr_ty)
34}
35
36fn emit_direct_ptr_va_arg<'ll, 'tcx>(
37    bx: &mut Builder<'_, 'll, 'tcx>,
38    list: OperandRef<'tcx, &'ll Value>,
39    size: Size,
40    align: Align,
41    slot_size: Align,
42    allow_higher_align: bool,
43    force_right_adjust: bool,
44) -> (&'ll Value, Align) {
45    let va_list_ty = bx.type_ptr();
46    let va_list_addr = list.immediate();
47
48    let ptr_align_abi = bx.tcx().data_layout.pointer_align().abi;
49    let ptr = bx.load(va_list_ty, va_list_addr, ptr_align_abi);
50
51    let (addr, addr_align) = if allow_higher_align && align > slot_size {
52        (round_pointer_up_to_alignment(bx, ptr, align, bx.type_ptr()), align)
53    } else {
54        (ptr, slot_size)
55    };
56
57    let aligned_size = size.align_to(slot_size).bytes() as i32;
58    let full_direct_size = bx.cx().const_i32(aligned_size);
59    let next = bx.inbounds_ptradd(addr, full_direct_size);
60    bx.store(next, va_list_addr, ptr_align_abi);
61
62    if size.bytes() < slot_size.bytes()
63        && bx.tcx().sess.target.endian == Endian::Big
64        && force_right_adjust
65    {
66        let adjusted_size = bx.cx().const_i32((slot_size.bytes() - size.bytes()) as i32);
67        let adjusted = bx.inbounds_ptradd(addr, adjusted_size);
68        (adjusted, addr_align)
69    } else {
70        (addr, addr_align)
71    }
72}
73
74enum PassMode {
75    Direct,
76    Indirect,
77}
78
79enum SlotSize {
80    Bytes8 = 8,
81    Bytes4 = 4,
82}
83
84enum AllowHigherAlign {
85    No,
86    Yes,
87}
88
89enum ForceRightAdjust {
90    No,
91    Yes,
92}
93
94fn emit_ptr_va_arg<'ll, 'tcx>(
95    bx: &mut Builder<'_, 'll, 'tcx>,
96    list: OperandRef<'tcx, &'ll Value>,
97    target_ty: Ty<'tcx>,
98    pass_mode: PassMode,
99    slot_size: SlotSize,
100    allow_higher_align: AllowHigherAlign,
101    force_right_adjust: ForceRightAdjust,
102) -> &'ll Value {
103    let indirect = matches!(pass_mode, PassMode::Indirect);
104    let allow_higher_align = matches!(allow_higher_align, AllowHigherAlign::Yes);
105    let force_right_adjust = matches!(force_right_adjust, ForceRightAdjust::Yes);
106    let slot_size = Align::from_bytes(slot_size as u64).unwrap();
107
108    let layout = bx.cx.layout_of(target_ty);
109    let (llty, size, align) = if indirect {
110        (
111            bx.cx.layout_of(Ty::new_imm_ptr(bx.cx.tcx, target_ty)).llvm_type(bx.cx),
112            bx.cx.data_layout().pointer_size(),
113            bx.cx.data_layout().pointer_align(),
114        )
115    } else {
116        (layout.llvm_type(bx.cx), layout.size, layout.align)
117    };
118    let (addr, addr_align) = emit_direct_ptr_va_arg(
119        bx,
120        list,
121        size,
122        align.abi,
123        slot_size,
124        allow_higher_align,
125        force_right_adjust,
126    );
127    if indirect {
128        let tmp_ret = bx.load(llty, addr, addr_align);
129        bx.load(bx.cx.layout_of(target_ty).llvm_type(bx.cx), tmp_ret, align.abi)
130    } else {
131        bx.load(llty, addr, addr_align)
132    }
133}
134
135fn emit_aapcs_va_arg<'ll, 'tcx>(
136    bx: &mut Builder<'_, 'll, 'tcx>,
137    list: OperandRef<'tcx, &'ll Value>,
138    target_ty: Ty<'tcx>,
139) -> &'ll Value {
140    let dl = bx.cx.data_layout();
141
142    // Implementation of the AAPCS64 calling convention for va_args see
143    // https://github.com/ARM-software/abi-aa/blob/master/aapcs64/aapcs64.rst
144    //
145    // typedef struct  va_list {
146    //     void * stack; // next stack param
147    //     void * gr_top; // end of GP arg reg save area
148    //     void * vr_top; // end of FP/SIMD arg reg save area
149    //     int gr_offs; // offset from  gr_top to next GP register arg
150    //     int vr_offs; // offset from  vr_top to next FP/SIMD register arg
151    // } va_list;
152    let va_list_addr = list.immediate();
153
154    // There is no padding between fields since `void*` is size=8 align=8, `int` is size=4 align=4.
155    // See https://github.com/ARM-software/abi-aa/blob/master/aapcs64/aapcs64.rst
156    // Table 1, Byte size and byte alignment of fundamental data types
157    // Table 3, Mapping of C & C++ built-in data types
158    let ptr_offset = 8;
159    let i32_offset = 4;
160    let gr_top = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(ptr_offset));
161    let vr_top = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(2 * ptr_offset));
162    let gr_offs = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(3 * ptr_offset));
163    let vr_offs = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(3 * ptr_offset + i32_offset));
164
165    let layout = bx.cx.layout_of(target_ty);
166
167    let maybe_reg = bx.append_sibling_block("va_arg.maybe_reg");
168    let in_reg = bx.append_sibling_block("va_arg.in_reg");
169    let on_stack = bx.append_sibling_block("va_arg.on_stack");
170    let end = bx.append_sibling_block("va_arg.end");
171    let zero = bx.const_i32(0);
172    let offset_align = Align::from_bytes(4).unwrap();
173
174    let gr_type = target_ty.is_any_ptr() || target_ty.is_integral();
175    let (reg_off, reg_top, slot_size) = if gr_type {
176        let nreg = layout.size.bytes().div_ceil(8);
177        (gr_offs, gr_top, nreg * 8)
178    } else {
179        let nreg = layout.size.bytes().div_ceil(16);
180        (vr_offs, vr_top, nreg * 16)
181    };
182
183    // if the offset >= 0 then the value will be on the stack
184    let mut reg_off_v = bx.load(bx.type_i32(), reg_off, offset_align);
185    let use_stack = bx.icmp(IntPredicate::IntSGE, reg_off_v, zero);
186    bx.cond_br(use_stack, on_stack, maybe_reg);
187
188    // The value at this point might be in a register, but there is a chance that
189    // it could be on the stack so we have to update the offset and then check
190    // the offset again.
191
192    bx.switch_to_block(maybe_reg);
193    if gr_type && layout.align.abi.bytes() > 8 {
194        reg_off_v = bx.add(reg_off_v, bx.const_i32(15));
195        reg_off_v = bx.and(reg_off_v, bx.const_i32(-16));
196    }
197    let new_reg_off_v = bx.add(reg_off_v, bx.const_i32(slot_size as i32));
198
199    bx.store(new_reg_off_v, reg_off, offset_align);
200
201    // Check to see if we have overflowed the registers as a result of this.
202    // If we have then we need to use the stack for this value
203    let use_stack = bx.icmp(IntPredicate::IntSGT, new_reg_off_v, zero);
204    bx.cond_br(use_stack, on_stack, in_reg);
205
206    bx.switch_to_block(in_reg);
207    let top_type = bx.type_ptr();
208    let top = bx.load(top_type, reg_top, dl.pointer_align().abi);
209
210    // reg_value = *(@top + reg_off_v);
211    let mut reg_addr = bx.ptradd(top, reg_off_v);
212    if bx.tcx().sess.target.endian == Endian::Big && layout.size.bytes() != slot_size {
213        // On big-endian systems the value is right-aligned in its slot.
214        let offset = bx.const_i32((slot_size - layout.size.bytes()) as i32);
215        reg_addr = bx.ptradd(reg_addr, offset);
216    }
217    let reg_type = layout.llvm_type(bx);
218    let reg_value = bx.load(reg_type, reg_addr, layout.align.abi);
219    bx.br(end);
220
221    // On Stack block
222    bx.switch_to_block(on_stack);
223    let stack_value = emit_ptr_va_arg(
224        bx,
225        list,
226        target_ty,
227        PassMode::Direct,
228        SlotSize::Bytes8,
229        AllowHigherAlign::Yes,
230        ForceRightAdjust::No,
231    );
232    bx.br(end);
233
234    bx.switch_to_block(end);
235    let val =
236        bx.phi(layout.immediate_llvm_type(bx), &[reg_value, stack_value], &[in_reg, on_stack]);
237
238    val
239}
240
241fn emit_powerpc_va_arg<'ll, 'tcx>(
242    bx: &mut Builder<'_, 'll, 'tcx>,
243    list: OperandRef<'tcx, &'ll Value>,
244    target_ty: Ty<'tcx>,
245) -> &'ll Value {
246    let dl = bx.cx.data_layout();
247
248    // struct __va_list_tag {
249    //   unsigned char gpr;
250    //   unsigned char fpr;
251    //   unsigned short reserved;
252    //   void *overflow_arg_area;
253    //   void *reg_save_area;
254    // };
255    let va_list_addr = list.immediate();
256
257    // Peel off any newtype wrappers.
258    let layout = {
259        let mut layout = bx.cx.layout_of(target_ty);
260
261        while let Some((_, inner)) = layout.non_1zst_field(bx.cx) {
262            layout = inner;
263        }
264
265        layout
266    };
267
268    // Rust does not currently support any powerpc softfloat targets.
269    let target = &bx.cx.tcx.sess.target;
270    let is_soft_float_abi = target.abi == "softfloat";
271    assert!(!is_soft_float_abi);
272
273    // All instances of VaArgSafe are passed directly.
274    let is_indirect = false;
275
276    let (is_i64, is_int, is_f64) = match layout.layout.backend_repr() {
277        BackendRepr::Scalar(scalar) => match scalar.primitive() {
278            rustc_abi::Primitive::Int(integer, _) => (integer.size().bits() == 64, true, false),
279            rustc_abi::Primitive::Float(float) => (false, false, float.size().bits() == 64),
280            rustc_abi::Primitive::Pointer(_) => (false, true, false),
281        },
282        _ => unreachable!("all instances of VaArgSafe are represented as scalars"),
283    };
284
285    let num_regs_addr = if is_int || is_soft_float_abi {
286        va_list_addr // gpr
287    } else {
288        bx.inbounds_ptradd(va_list_addr, bx.const_usize(1)) // fpr
289    };
290
291    let mut num_regs = bx.load(bx.type_i8(), num_regs_addr, dl.i8_align.abi);
292
293    // "Align" the register count when the type is passed as `i64`.
294    if is_i64 || (is_f64 && is_soft_float_abi) {
295        num_regs = bx.add(num_regs, bx.const_u8(1));
296        num_regs = bx.and(num_regs, bx.const_u8(0b1111_1110));
297    }
298
299    let max_regs = 8u8;
300    let use_regs = bx.icmp(IntPredicate::IntULT, num_regs, bx.const_u8(max_regs));
301    let ptr_align_abi = bx.tcx().data_layout.pointer_align().abi;
302
303    let in_reg = bx.append_sibling_block("va_arg.in_reg");
304    let in_mem = bx.append_sibling_block("va_arg.in_mem");
305    let end = bx.append_sibling_block("va_arg.end");
306
307    bx.cond_br(use_regs, in_reg, in_mem);
308
309    let reg_addr = {
310        bx.switch_to_block(in_reg);
311
312        let reg_safe_area_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(1 + 1 + 2 + 4));
313        let mut reg_addr = bx.load(bx.type_ptr(), reg_safe_area_ptr, ptr_align_abi);
314
315        // Floating-point registers start after the general-purpose registers.
316        if !is_int && !is_soft_float_abi {
317            reg_addr = bx.inbounds_ptradd(reg_addr, bx.cx.const_usize(32))
318        }
319
320        // Get the address of the saved value by scaling the number of
321        // registers we've used by the number of.
322        let reg_size = if is_int || is_soft_float_abi { 4 } else { 8 };
323        let reg_offset = bx.mul(num_regs, bx.cx().const_u8(reg_size));
324        let reg_addr = bx.inbounds_ptradd(reg_addr, reg_offset);
325
326        // Increase the used-register count.
327        let reg_incr = if is_i64 || (is_f64 && is_soft_float_abi) { 2 } else { 1 };
328        let new_num_regs = bx.add(num_regs, bx.cx.const_u8(reg_incr));
329        bx.store(new_num_regs, num_regs_addr, dl.i8_align.abi);
330
331        bx.br(end);
332
333        reg_addr
334    };
335
336    let mem_addr = {
337        bx.switch_to_block(in_mem);
338
339        bx.store(bx.const_u8(max_regs), num_regs_addr, dl.i8_align.abi);
340
341        // Everything in the overflow area is rounded up to a size of at least 4.
342        let overflow_area_align = Align::from_bytes(4).unwrap();
343
344        let size = if !is_indirect {
345            layout.layout.size.align_to(overflow_area_align)
346        } else {
347            dl.pointer_size()
348        };
349
350        let overflow_area_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(1 + 1 + 2));
351        let mut overflow_area = bx.load(bx.type_ptr(), overflow_area_ptr, ptr_align_abi);
352
353        // Round up address of argument to alignment
354        if layout.layout.align.abi > overflow_area_align {
355            overflow_area = round_pointer_up_to_alignment(
356                bx,
357                overflow_area,
358                layout.layout.align.abi,
359                bx.type_ptr(),
360            );
361        }
362
363        let mem_addr = overflow_area;
364
365        // Increase the overflow area.
366        overflow_area = bx.inbounds_ptradd(overflow_area, bx.const_usize(size.bytes()));
367        bx.store(overflow_area, overflow_area_ptr, ptr_align_abi);
368
369        bx.br(end);
370
371        mem_addr
372    };
373
374    // Return the appropriate result.
375    bx.switch_to_block(end);
376    let val_addr = bx.phi(bx.type_ptr(), &[reg_addr, mem_addr], &[in_reg, in_mem]);
377    let val_type = layout.llvm_type(bx);
378    let val_addr =
379        if is_indirect { bx.load(bx.cx.type_ptr(), val_addr, ptr_align_abi) } else { val_addr };
380    bx.load(val_type, val_addr, layout.align.abi)
381}
382
383fn emit_s390x_va_arg<'ll, 'tcx>(
384    bx: &mut Builder<'_, 'll, 'tcx>,
385    list: OperandRef<'tcx, &'ll Value>,
386    target_ty: Ty<'tcx>,
387) -> &'ll Value {
388    let dl = bx.cx.data_layout();
389
390    // Implementation of the s390x ELF ABI calling convention for va_args see
391    // https://github.com/IBM/s390x-abi (chapter 1.2.4)
392    //
393    // typedef struct __va_list_tag {
394    //     long __gpr;
395    //     long __fpr;
396    //     void *__overflow_arg_area;
397    //     void *__reg_save_area;
398    // } va_list[1];
399    let va_list_addr = list.immediate();
400
401    // There is no padding between fields since `long` and `void*` both have size=8 align=8.
402    // https://github.com/IBM/s390x-abi (Table 1.1.: Scalar types)
403    let i64_offset = 8;
404    let ptr_offset = 8;
405    let gpr = va_list_addr;
406    let fpr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(i64_offset));
407    let overflow_arg_area = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(2 * i64_offset));
408    let reg_save_area =
409        bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(2 * i64_offset + ptr_offset));
410
411    let layout = bx.cx.layout_of(target_ty);
412
413    let in_reg = bx.append_sibling_block("va_arg.in_reg");
414    let in_mem = bx.append_sibling_block("va_arg.in_mem");
415    let end = bx.append_sibling_block("va_arg.end");
416    let ptr_align_abi = dl.pointer_align().abi;
417
418    // FIXME: vector ABI not yet supported.
419    let target_ty_size = bx.cx.size_of(target_ty).bytes();
420    let indirect: bool = target_ty_size > 8 || !target_ty_size.is_power_of_two();
421    let unpadded_size = if indirect { 8 } else { target_ty_size };
422    let padded_size = 8;
423    let padding = padded_size - unpadded_size;
424
425    let gpr_type = indirect || !layout.is_single_fp_element(bx.cx);
426    let (max_regs, reg_count, reg_save_index, reg_padding) =
427        if gpr_type { (5, gpr, 2, padding) } else { (4, fpr, 16, 0) };
428
429    // Check whether the value was passed in a register or in memory.
430    let reg_count_v = bx.load(bx.type_i64(), reg_count, Align::from_bytes(8).unwrap());
431    let use_regs = bx.icmp(IntPredicate::IntULT, reg_count_v, bx.const_u64(max_regs));
432    bx.cond_br(use_regs, in_reg, in_mem);
433
434    // Emit code to load the value if it was passed in a register.
435    bx.switch_to_block(in_reg);
436
437    // Work out the address of the value in the register save area.
438    let reg_ptr_v = bx.load(bx.type_ptr(), reg_save_area, ptr_align_abi);
439    let scaled_reg_count = bx.mul(reg_count_v, bx.const_u64(8));
440    let reg_off = bx.add(scaled_reg_count, bx.const_u64(reg_save_index * 8 + reg_padding));
441    let reg_addr = bx.ptradd(reg_ptr_v, reg_off);
442
443    // Update the register count.
444    let new_reg_count_v = bx.add(reg_count_v, bx.const_u64(1));
445    bx.store(new_reg_count_v, reg_count, Align::from_bytes(8).unwrap());
446    bx.br(end);
447
448    // Emit code to load the value if it was passed in memory.
449    bx.switch_to_block(in_mem);
450
451    // Work out the address of the value in the argument overflow area.
452    let arg_ptr_v = bx.load(bx.type_ptr(), overflow_arg_area, ptr_align_abi);
453    let arg_off = bx.const_u64(padding);
454    let mem_addr = bx.ptradd(arg_ptr_v, arg_off);
455
456    // Update the argument overflow area pointer.
457    let arg_size = bx.cx().const_u64(padded_size);
458    let new_arg_ptr_v = bx.inbounds_ptradd(arg_ptr_v, arg_size);
459    bx.store(new_arg_ptr_v, overflow_arg_area, ptr_align_abi);
460    bx.br(end);
461
462    // Return the appropriate result.
463    bx.switch_to_block(end);
464    let val_addr = bx.phi(bx.type_ptr(), &[reg_addr, mem_addr], &[in_reg, in_mem]);
465    let val_type = layout.llvm_type(bx);
466    let val_addr =
467        if indirect { bx.load(bx.cx.type_ptr(), val_addr, ptr_align_abi) } else { val_addr };
468    bx.load(val_type, val_addr, layout.align.abi)
469}
470
471fn emit_x86_64_sysv64_va_arg<'ll, 'tcx>(
472    bx: &mut Builder<'_, 'll, 'tcx>,
473    list: OperandRef<'tcx, &'ll Value>,
474    target_ty: Ty<'tcx>,
475) -> &'ll Value {
476    let dl = bx.cx.data_layout();
477
478    // Implementation of the systemv x86_64 ABI calling convention for va_args, see
479    // https://gitlab.com/x86-psABIs/x86-64-ABI (section 3.5.7). This implementation is heavily
480    // based on the one in clang.
481
482    // We're able to take some shortcuts because the return type of `va_arg` must implement the
483    // `VaArgSafe` trait. Currently, only pointers, f64, i32, u32, i64 and u64 implement this trait.
484
485    // typedef struct __va_list_tag {
486    //     unsigned int gp_offset;
487    //     unsigned int fp_offset;
488    //     void *overflow_arg_area;
489    //     void *reg_save_area;
490    // } va_list[1];
491    let va_list_addr = list.immediate();
492
493    // Peel off any newtype wrappers.
494    //
495    // The "C" ABI does not unwrap newtypes (see `ReprOptions::inhibit_newtype_abi_optimization`).
496    // Here, we do actually want the unwrapped representation, because that is how LLVM/Clang
497    // pass such types to variadic functions.
498    //
499    // An example of a type that must be unwrapped is `Foo` below. Without the unwrapping, it has
500    // `BackendRepr::Memory`, but we need it to be `BackendRepr::Scalar` to generate correct code.
501    //
502    // ```
503    // #[repr(C)]
504    // struct Empty;
505    //
506    // #[repr(C)]
507    // struct Foo([Empty; 8], i32);
508    // ```
509    let layout = {
510        let mut layout = bx.cx.layout_of(target_ty);
511
512        while let Some((_, inner)) = layout.non_1zst_field(bx.cx) {
513            layout = inner;
514        }
515
516        layout
517    };
518
519    // AMD64-ABI 3.5.7p5: Step 1. Determine whether type may be passed
520    // in the registers. If not go to step 7.
521
522    // AMD64-ABI 3.5.7p5: Step 2. Compute num_gp to hold the number of
523    // general purpose registers needed to pass type and num_fp to hold
524    // the number of floating point registers needed.
525
526    let mut num_gp_registers = 0;
527    let mut num_fp_registers = 0;
528
529    let mut registers_for_primitive = |p| match p {
530        Primitive::Int(integer, _is_signed) => {
531            num_gp_registers += integer.size().bytes().div_ceil(8) as u32;
532        }
533        Primitive::Float(float) => {
534            num_fp_registers += float.size().bytes().div_ceil(16) as u32;
535        }
536        Primitive::Pointer(_) => {
537            num_gp_registers += 1;
538        }
539    };
540
541    match layout.layout.backend_repr() {
542        BackendRepr::Scalar(scalar) => {
543            registers_for_primitive(scalar.primitive());
544        }
545        BackendRepr::ScalarPair(scalar1, scalar2) => {
546            registers_for_primitive(scalar1.primitive());
547            registers_for_primitive(scalar2.primitive());
548        }
549        BackendRepr::SimdVector { .. } => {
550            // Because no instance of VaArgSafe uses a non-scalar `BackendRepr`.
551            unreachable!(
552                "No x86-64 SysV va_arg implementation for {:?}",
553                layout.layout.backend_repr()
554            )
555        }
556        BackendRepr::Memory { .. } => {
557            let mem_addr = x86_64_sysv64_va_arg_from_memory(bx, va_list_addr, layout);
558            return bx.load(layout.llvm_type(bx), mem_addr, layout.align.abi);
559        }
560    };
561
562    // AMD64-ABI 3.5.7p5: Step 3. Verify whether arguments fit into
563    // registers. In the case: l->gp_offset > 48 - num_gp * 8 or
564    // l->fp_offset > 176 - num_fp * 16 go to step 7.
565
566    let unsigned_int_offset = 4;
567    let ptr_offset = 8;
568    let gp_offset_ptr = va_list_addr;
569    let fp_offset_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(unsigned_int_offset));
570
571    let gp_offset_v = bx.load(bx.type_i32(), gp_offset_ptr, Align::from_bytes(8).unwrap());
572    let fp_offset_v = bx.load(bx.type_i32(), fp_offset_ptr, Align::from_bytes(4).unwrap());
573
574    let mut use_regs = bx.const_bool(false);
575
576    if num_gp_registers > 0 {
577        let max_offset_val = 48u32 - num_gp_registers * 8;
578        let fits_in_gp = bx.icmp(IntPredicate::IntULE, gp_offset_v, bx.const_u32(max_offset_val));
579        use_regs = fits_in_gp;
580    }
581
582    if num_fp_registers > 0 {
583        let max_offset_val = 176u32 - num_fp_registers * 16;
584        let fits_in_fp = bx.icmp(IntPredicate::IntULE, fp_offset_v, bx.const_u32(max_offset_val));
585        use_regs = if num_gp_registers > 0 { bx.and(use_regs, fits_in_fp) } else { fits_in_fp };
586    }
587
588    let in_reg = bx.append_sibling_block("va_arg.in_reg");
589    let in_mem = bx.append_sibling_block("va_arg.in_mem");
590    let end = bx.append_sibling_block("va_arg.end");
591
592    bx.cond_br(use_regs, in_reg, in_mem);
593
594    // Emit code to load the value if it was passed in a register.
595    bx.switch_to_block(in_reg);
596
597    // AMD64-ABI 3.5.7p5: Step 4. Fetch type from l->reg_save_area with
598    // an offset of l->gp_offset and/or l->fp_offset. This may require
599    // copying to a temporary location in case the parameter is passed
600    // in different register classes or requires an alignment greater
601    // than 8 for general purpose registers and 16 for XMM registers.
602    //
603    // FIXME(llvm): This really results in shameful code when we end up needing to
604    // collect arguments from different places; often what should result in a
605    // simple assembling of a structure from scattered addresses has many more
606    // loads than necessary. Can we clean this up?
607    let reg_save_area_ptr =
608        bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(2 * unsigned_int_offset + ptr_offset));
609    let reg_save_area_v = bx.load(bx.type_ptr(), reg_save_area_ptr, dl.pointer_align().abi);
610
611    let reg_addr = match layout.layout.backend_repr() {
612        BackendRepr::Scalar(scalar) => match scalar.primitive() {
613            Primitive::Int(_, _) | Primitive::Pointer(_) => {
614                let reg_addr = bx.inbounds_ptradd(reg_save_area_v, gp_offset_v);
615
616                // Copy into a temporary if the type is more aligned than the register save area.
617                let gp_align = Align::from_bytes(8).unwrap();
618                copy_to_temporary_if_more_aligned(bx, reg_addr, layout, gp_align)
619            }
620            Primitive::Float(_) => bx.inbounds_ptradd(reg_save_area_v, fp_offset_v),
621        },
622        BackendRepr::ScalarPair(scalar1, scalar2) => {
623            let ty_lo = bx.cx().scalar_pair_element_backend_type(layout, 0, false);
624            let ty_hi = bx.cx().scalar_pair_element_backend_type(layout, 1, false);
625
626            let align_lo = layout.field(bx.cx, 0).layout.align().abi;
627            let align_hi = layout.field(bx.cx, 1).layout.align().abi;
628
629            match (scalar1.primitive(), scalar2.primitive()) {
630                (Primitive::Float(_), Primitive::Float(_)) => {
631                    // SSE registers are spaced 16 bytes apart in the register save
632                    // area, we need to collect the two eightbytes together.
633                    // The ABI isn't explicit about this, but it seems reasonable
634                    // to assume that the slots are 16-byte aligned, since the stack is
635                    // naturally 16-byte aligned and the prologue is expected to store
636                    // all the SSE registers to the RSA.
637                    let reg_lo_addr = bx.inbounds_ptradd(reg_save_area_v, fp_offset_v);
638                    let reg_hi_addr = bx.inbounds_ptradd(reg_lo_addr, bx.const_i32(16));
639
640                    let align = layout.layout.align().abi;
641                    let tmp = bx.alloca(layout.layout.size(), align);
642
643                    let reg_lo = bx.load(ty_lo, reg_lo_addr, align_lo);
644                    let reg_hi = bx.load(ty_hi, reg_hi_addr, align_hi);
645
646                    let offset = scalar1.size(bx.cx).align_to(align_hi).bytes();
647                    let field0 = tmp;
648                    let field1 = bx.inbounds_ptradd(tmp, bx.const_u32(offset as u32));
649
650                    bx.store(reg_lo, field0, align);
651                    bx.store(reg_hi, field1, align);
652
653                    tmp
654                }
655                (Primitive::Float(_), _) | (_, Primitive::Float(_)) => {
656                    let gp_addr = bx.inbounds_ptradd(reg_save_area_v, gp_offset_v);
657                    let fp_addr = bx.inbounds_ptradd(reg_save_area_v, fp_offset_v);
658
659                    let (reg_lo_addr, reg_hi_addr) = match scalar1.primitive() {
660                        Primitive::Float(_) => (fp_addr, gp_addr),
661                        Primitive::Int(_, _) | Primitive::Pointer(_) => (gp_addr, fp_addr),
662                    };
663
664                    let tmp = bx.alloca(layout.layout.size(), layout.layout.align().abi);
665
666                    let reg_lo = bx.load(ty_lo, reg_lo_addr, align_lo);
667                    let reg_hi = bx.load(ty_hi, reg_hi_addr, align_hi);
668
669                    let offset = scalar1.size(bx.cx).align_to(align_hi).bytes();
670                    let field0 = tmp;
671                    let field1 = bx.inbounds_ptradd(tmp, bx.const_u32(offset as u32));
672
673                    bx.store(reg_lo, field0, align_lo);
674                    bx.store(reg_hi, field1, align_hi);
675
676                    tmp
677                }
678                (_, _) => {
679                    // Two integer/pointer values are just contiguous in memory.
680                    let reg_addr = bx.inbounds_ptradd(reg_save_area_v, gp_offset_v);
681
682                    // Copy into a temporary if the type is more aligned than the register save area.
683                    let gp_align = Align::from_bytes(8).unwrap();
684                    copy_to_temporary_if_more_aligned(bx, reg_addr, layout, gp_align)
685                }
686            }
687        }
688        // The Previous match on `BackendRepr` means control flow already escaped.
689        BackendRepr::SimdVector { .. } | BackendRepr::Memory { .. } => unreachable!(),
690    };
691
692    // AMD64-ABI 3.5.7p5: Step 5. Set:
693    // l->gp_offset = l->gp_offset + num_gp * 8
694    if num_gp_registers > 0 {
695        let offset = bx.const_u32(num_gp_registers * 8);
696        let sum = bx.add(gp_offset_v, offset);
697        // An alignment of 8 because `__va_list_tag` is 8-aligned and this is its first field.
698        bx.store(sum, gp_offset_ptr, Align::from_bytes(8).unwrap());
699    }
700
701    // l->fp_offset = l->fp_offset + num_fp * 16.
702    if num_fp_registers > 0 {
703        let offset = bx.const_u32(num_fp_registers * 16);
704        let sum = bx.add(fp_offset_v, offset);
705        bx.store(sum, fp_offset_ptr, Align::from_bytes(4).unwrap());
706    }
707
708    bx.br(end);
709
710    bx.switch_to_block(in_mem);
711    let mem_addr = x86_64_sysv64_va_arg_from_memory(bx, va_list_addr, layout);
712    bx.br(end);
713
714    bx.switch_to_block(end);
715
716    let val_type = layout.llvm_type(bx);
717    let val_addr = bx.phi(bx.type_ptr(), &[reg_addr, mem_addr], &[in_reg, in_mem]);
718
719    bx.load(val_type, val_addr, layout.align.abi)
720}
721
722/// Copy into a temporary if the type is more aligned than the register save area.
723fn copy_to_temporary_if_more_aligned<'ll, 'tcx>(
724    bx: &mut Builder<'_, 'll, 'tcx>,
725    reg_addr: &'ll Value,
726    layout: TyAndLayout<'tcx, Ty<'tcx>>,
727    src_align: Align,
728) -> &'ll Value {
729    if layout.layout.align.abi > src_align {
730        let tmp = bx.alloca(layout.layout.size(), layout.layout.align().abi);
731        bx.memcpy(
732            tmp,
733            layout.layout.align.abi,
734            reg_addr,
735            src_align,
736            bx.const_u32(layout.layout.size().bytes() as u32),
737            MemFlags::empty(),
738        );
739        tmp
740    } else {
741        reg_addr
742    }
743}
744
745fn x86_64_sysv64_va_arg_from_memory<'ll, 'tcx>(
746    bx: &mut Builder<'_, 'll, 'tcx>,
747    va_list_addr: &'ll Value,
748    layout: TyAndLayout<'tcx, Ty<'tcx>>,
749) -> &'ll Value {
750    let dl = bx.cx.data_layout();
751    let ptr_align_abi = dl.data_layout().pointer_align().abi;
752
753    let overflow_arg_area_ptr = bx.inbounds_ptradd(va_list_addr, bx.const_usize(8));
754
755    let overflow_arg_area_v = bx.load(bx.type_ptr(), overflow_arg_area_ptr, ptr_align_abi);
756    // AMD64-ABI 3.5.7p5: Step 7. Align l->overflow_arg_area upwards to a 16
757    // byte boundary if alignment needed by type exceeds 8 byte boundary.
758    // It isn't stated explicitly in the standard, but in practice we use
759    // alignment greater than 16 where necessary.
760    if layout.layout.align.abi.bytes() > 8 {
761        unreachable!("all instances of VaArgSafe have an alignment <= 8");
762    }
763
764    // AMD64-ABI 3.5.7p5: Step 8. Fetch type from l->overflow_arg_area.
765    let mem_addr = overflow_arg_area_v;
766
767    // AMD64-ABI 3.5.7p5: Step 9. Set l->overflow_arg_area to:
768    // l->overflow_arg_area + sizeof(type).
769    // AMD64-ABI 3.5.7p5: Step 10. Align l->overflow_arg_area upwards to
770    // an 8 byte boundary.
771    let size_in_bytes = layout.layout.size().bytes();
772    let offset = bx.const_i32(size_in_bytes.next_multiple_of(8) as i32);
773    let overflow_arg_area = bx.inbounds_ptradd(overflow_arg_area_v, offset);
774    bx.store(overflow_arg_area, overflow_arg_area_ptr, ptr_align_abi);
775
776    mem_addr
777}
778
779fn emit_xtensa_va_arg<'ll, 'tcx>(
780    bx: &mut Builder<'_, 'll, 'tcx>,
781    list: OperandRef<'tcx, &'ll Value>,
782    target_ty: Ty<'tcx>,
783) -> &'ll Value {
784    // Implementation of va_arg for Xtensa. There doesn't seem to be an authoritative source for
785    // this, other than "what GCC does".
786    //
787    // The va_list type has three fields:
788    // struct __va_list_tag {
789    //   int32_t *va_stk; // Arguments passed on the stack
790    //   int32_t *va_reg; // Arguments passed in registers, saved to memory by the prologue.
791    //   int32_t va_ndx; // Offset into the arguments, in bytes
792    // };
793    //
794    // The first 24 bytes (equivalent to 6 registers) come from va_reg, the rest from va_stk.
795    // Thus if va_ndx is less than 24, the next va_arg *may* read from va_reg,
796    // otherwise it must come from va_stk.
797    //
798    // Primitive arguments are never split between registers and the stack. For example, if loading an 8 byte
799    // primitive value and va_ndx = 20, we instead bump the offset and read everything from va_stk.
800    let va_list_addr = list.immediate();
801    // FIXME: handle multi-field structs that split across regsave/stack?
802    let layout = bx.cx.layout_of(target_ty);
803    let from_stack = bx.append_sibling_block("va_arg.from_stack");
804    let from_regsave = bx.append_sibling_block("va_arg.from_regsave");
805    let end = bx.append_sibling_block("va_arg.end");
806    let ptr_align_abi = bx.tcx().data_layout.pointer_align().abi;
807
808    // (*va).va_ndx
809    let va_reg_offset = 4;
810    let va_ndx_offset = va_reg_offset + 4;
811    let offset_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(va_ndx_offset));
812
813    let offset = bx.load(bx.type_i32(), offset_ptr, bx.tcx().data_layout.i32_align.abi);
814    let offset = round_up_to_alignment(bx, offset, layout.align.abi);
815
816    let slot_size = layout.size.align_to(Align::from_bytes(4).unwrap()).bytes() as i32;
817
818    // Update the offset in va_list, by adding the slot's size.
819    let offset_next = bx.add(offset, bx.const_i32(slot_size));
820
821    // Figure out where to look for our value. We do that by checking the end of our slot (offset_next).
822    // If that is within the regsave area, then load from there. Otherwise load from the stack area.
823    let regsave_size = bx.const_i32(24);
824    let use_regsave = bx.icmp(IntPredicate::IntULE, offset_next, regsave_size);
825    bx.cond_br(use_regsave, from_regsave, from_stack);
826
827    bx.switch_to_block(from_regsave);
828    // update va_ndx
829    bx.store(offset_next, offset_ptr, ptr_align_abi);
830
831    // (*va).va_reg
832    let regsave_area_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(va_reg_offset));
833    let regsave_area = bx.load(bx.type_ptr(), regsave_area_ptr, ptr_align_abi);
834    let regsave_value_ptr = bx.inbounds_ptradd(regsave_area, offset);
835    bx.br(end);
836
837    bx.switch_to_block(from_stack);
838
839    // The first time we switch from regsave to stack we needs to adjust our offsets a bit.
840    // va_stk is set up such that the first stack argument is always at va_stk + 32.
841    // The corrected offset is written back into the va_list struct.
842
843    // let offset_corrected = cmp::max(offset, 32);
844    let stack_offset_start = bx.const_i32(32);
845    let needs_correction = bx.icmp(IntPredicate::IntULE, offset, stack_offset_start);
846    let offset_corrected = bx.select(needs_correction, stack_offset_start, offset);
847
848    // let offset_next_corrected = offset_corrected + slot_size;
849    // va_ndx = offset_next_corrected;
850    let offset_next_corrected = bx.add(offset_next, bx.const_i32(slot_size));
851    // update va_ndx
852    bx.store(offset_next_corrected, offset_ptr, ptr_align_abi);
853
854    // let stack_value_ptr = unsafe { (*va).va_stk.byte_add(offset_corrected) };
855    let stack_area_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(0));
856    let stack_area = bx.load(bx.type_ptr(), stack_area_ptr, ptr_align_abi);
857    let stack_value_ptr = bx.inbounds_ptradd(stack_area, offset_corrected);
858    bx.br(end);
859
860    bx.switch_to_block(end);
861
862    // On big-endian, for values smaller than the slot size we'd have to align the read to the end
863    // of the slot rather than the start. While the ISA and GCC support big-endian, all the Xtensa
864    // targets supported by rustc are little-endian so don't worry about it.
865
866    // if from_regsave {
867    //     unsafe { *regsave_value_ptr }
868    // } else {
869    //     unsafe { *stack_value_ptr }
870    // }
871    assert!(bx.tcx().sess.target.endian == Endian::Little);
872    let value_ptr =
873        bx.phi(bx.type_ptr(), &[regsave_value_ptr, stack_value_ptr], &[from_regsave, from_stack]);
874    return bx.load(layout.llvm_type(bx), value_ptr, layout.align.abi);
875}
876
877pub(super) fn emit_va_arg<'ll, 'tcx>(
878    bx: &mut Builder<'_, 'll, 'tcx>,
879    addr: OperandRef<'tcx, &'ll Value>,
880    target_ty: Ty<'tcx>,
881) -> &'ll Value {
882    // Determine the va_arg implementation to use. The LLVM va_arg instruction
883    // is lacking in some instances, so we should only use it as a fallback.
884    let target = &bx.cx.tcx.sess.target;
885
886    match &*target.arch {
887        "x86" => emit_ptr_va_arg(
888            bx,
889            addr,
890            target_ty,
891            PassMode::Direct,
892            SlotSize::Bytes4,
893            if target.is_like_windows { AllowHigherAlign::No } else { AllowHigherAlign::Yes },
894            ForceRightAdjust::No,
895        ),
896        "aarch64" | "arm64ec" if target.is_like_windows || target.is_like_darwin => {
897            emit_ptr_va_arg(
898                bx,
899                addr,
900                target_ty,
901                PassMode::Direct,
902                SlotSize::Bytes8,
903                if target.is_like_windows { AllowHigherAlign::No } else { AllowHigherAlign::Yes },
904                ForceRightAdjust::No,
905            )
906        }
907        "aarch64" => emit_aapcs_va_arg(bx, addr, target_ty),
908        "s390x" => emit_s390x_va_arg(bx, addr, target_ty),
909        "powerpc" => emit_powerpc_va_arg(bx, addr, target_ty),
910        "powerpc64" | "powerpc64le" => emit_ptr_va_arg(
911            bx,
912            addr,
913            target_ty,
914            PassMode::Direct,
915            SlotSize::Bytes8,
916            AllowHigherAlign::Yes,
917            match &*target.arch {
918                "powerpc64" => ForceRightAdjust::Yes,
919                _ => ForceRightAdjust::No,
920            },
921        ),
922        // Windows x86_64
923        "x86_64" if target.is_like_windows => {
924            let target_ty_size = bx.cx.size_of(target_ty).bytes();
925            emit_ptr_va_arg(
926                bx,
927                addr,
928                target_ty,
929                if target_ty_size > 8 || !target_ty_size.is_power_of_two() {
930                    PassMode::Indirect
931                } else {
932                    PassMode::Direct
933                },
934                SlotSize::Bytes8,
935                AllowHigherAlign::No,
936                ForceRightAdjust::No,
937            )
938        }
939        // This includes `target.is_like_darwin`, which on x86_64 targets is like sysv64.
940        "x86_64" => emit_x86_64_sysv64_va_arg(bx, addr, target_ty),
941        "xtensa" => emit_xtensa_va_arg(bx, addr, target_ty),
942        // For all other architecture/OS combinations fall back to using
943        // the LLVM va_arg instruction.
944        // https://llvm.org/docs/LangRef.html#va-arg-instruction
945        _ => bx.va_arg(addr.immediate(), bx.cx.layout_of(target_ty).llvm_type(bx.cx)),
946    }
947}