miri/shims/x86/
sse41.rs

1use rustc_abi::CanonAbi;
2use rustc_middle::ty::Ty;
3use rustc_span::Symbol;
4use rustc_target::callconv::FnAbi;
5
6use super::{conditional_dot_product, mpsadbw, packusdw, round_all, round_first, test_bits_masked};
7use crate::*;
8
9impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
10pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
11    fn emulate_x86_sse41_intrinsic(
12        &mut self,
13        link_name: Symbol,
14        abi: &FnAbi<'tcx, Ty<'tcx>>,
15        args: &[OpTy<'tcx>],
16        dest: &MPlaceTy<'tcx>,
17    ) -> InterpResult<'tcx, EmulateItemResult> {
18        let this = self.eval_context_mut();
19        this.expect_target_feature_for_intrinsic(link_name, "sse4.1")?;
20        // Prefix should have already been checked.
21        let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.sse41.").unwrap();
22
23        match unprefixed_name {
24            // Used to implement the _mm_insert_ps function.
25            // Takes one element of `right` and inserts it into `left` and
26            // optionally zero some elements. Source index is specified
27            // in bits `6..=7` of `imm`, destination index is specified in
28            // bits `4..=5` if `imm`, and `i`th bit specifies whether element
29            // `i` is zeroed.
30            "insertps" => {
31                let [left, right, imm] = this.check_shim(abi, CanonAbi::C, link_name, args)?;
32
33                let (left, left_len) = this.project_to_simd(left)?;
34                let (right, right_len) = this.project_to_simd(right)?;
35                let (dest, dest_len) = this.project_to_simd(dest)?;
36
37                assert_eq!(dest_len, left_len);
38                assert_eq!(dest_len, right_len);
39                assert!(dest_len <= 4);
40
41                let imm = this.read_scalar(imm)?.to_u8()?;
42                let src_index = u64::from((imm >> 6) & 0b11);
43                let dst_index = u64::from((imm >> 4) & 0b11);
44
45                let src_value = this.read_immediate(&this.project_index(&right, src_index)?)?;
46
47                for i in 0..dest_len {
48                    let dest = this.project_index(&dest, i)?;
49
50                    if imm & (1 << i) != 0 {
51                        // zeroed
52                        this.write_scalar(Scalar::from_u32(0), &dest)?;
53                    } else if i == dst_index {
54                        // copy from `right` at specified index
55                        this.write_immediate(*src_value, &dest)?;
56                    } else {
57                        // copy from `left`
58                        this.copy_op(&this.project_index(&left, i)?, &dest)?;
59                    }
60                }
61            }
62            // Used to implement the _mm_packus_epi32 function.
63            // Concatenates two 32-bit signed integer vectors and converts
64            // the result to a 16-bit unsigned integer vector with saturation.
65            "packusdw" => {
66                let [left, right] = this.check_shim(abi, CanonAbi::C, link_name, args)?;
67
68                packusdw(this, left, right, dest)?;
69            }
70            // Used to implement the _mm_dp_ps and _mm_dp_pd functions.
71            // Conditionally multiplies the packed floating-point elements in
72            // `left` and `right` using the high 4 bits in `imm`, sums the four
73            // products, and conditionally stores the sum in `dest` using the low
74            // 4 bits of `imm`.
75            "dpps" | "dppd" => {
76                let [left, right, imm] = this.check_shim(abi, CanonAbi::C, link_name, args)?;
77
78                conditional_dot_product(this, left, right, imm, dest)?;
79            }
80            // Used to implement the _mm_floor_ss, _mm_ceil_ss and _mm_round_ss
81            // functions. Rounds the first element of `right` according to `rounding`
82            // and copies the remaining elements from `left`.
83            "round.ss" => {
84                let [left, right, rounding] = this.check_shim(abi, CanonAbi::C, link_name, args)?;
85
86                round_first::<rustc_apfloat::ieee::Single>(this, left, right, rounding, dest)?;
87            }
88            // Used to implement the _mm_floor_ps, _mm_ceil_ps and _mm_round_ps
89            // functions. Rounds the elements of `op` according to `rounding`.
90            "round.ps" => {
91                let [op, rounding] = this.check_shim(abi, CanonAbi::C, link_name, args)?;
92
93                round_all::<rustc_apfloat::ieee::Single>(this, op, rounding, dest)?;
94            }
95            // Used to implement the _mm_floor_sd, _mm_ceil_sd and _mm_round_sd
96            // functions. Rounds the first element of `right` according to `rounding`
97            // and copies the remaining elements from `left`.
98            "round.sd" => {
99                let [left, right, rounding] = this.check_shim(abi, CanonAbi::C, link_name, args)?;
100
101                round_first::<rustc_apfloat::ieee::Double>(this, left, right, rounding, dest)?;
102            }
103            // Used to implement the _mm_floor_pd, _mm_ceil_pd and _mm_round_pd
104            // functions. Rounds the elements of `op` according to `rounding`.
105            "round.pd" => {
106                let [op, rounding] = this.check_shim(abi, CanonAbi::C, link_name, args)?;
107
108                round_all::<rustc_apfloat::ieee::Double>(this, op, rounding, dest)?;
109            }
110            // Used to implement the _mm_minpos_epu16 function.
111            // Find the minimum unsinged 16-bit integer in `op` and
112            // returns its value and position.
113            "phminposuw" => {
114                let [op] = this.check_shim(abi, CanonAbi::C, link_name, args)?;
115
116                let (op, op_len) = this.project_to_simd(op)?;
117                let (dest, dest_len) = this.project_to_simd(dest)?;
118
119                // Find minimum
120                let mut min_value = u16::MAX;
121                let mut min_index = 0;
122                for i in 0..op_len {
123                    let op = this.read_scalar(&this.project_index(&op, i)?)?.to_u16()?;
124                    if op < min_value {
125                        min_value = op;
126                        min_index = i;
127                    }
128                }
129
130                // Write value and index
131                this.write_scalar(Scalar::from_u16(min_value), &this.project_index(&dest, 0)?)?;
132                this.write_scalar(
133                    Scalar::from_u16(min_index.try_into().unwrap()),
134                    &this.project_index(&dest, 1)?,
135                )?;
136                // Fill remainder with zeros
137                for i in 2..dest_len {
138                    this.write_scalar(Scalar::from_u16(0), &this.project_index(&dest, i)?)?;
139                }
140            }
141            // Used to implement the _mm_mpsadbw_epu8 function.
142            // Compute the sum of absolute differences of quadruplets of unsigned
143            // 8-bit integers in `left` and `right`, and store the 16-bit results
144            // in `right`. Quadruplets are selected from `left` and `right` with
145            // offsets specified in `imm`.
146            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mpsadbw_epu8
147            "mpsadbw" => {
148                let [left, right, imm] = this.check_shim(abi, CanonAbi::C, link_name, args)?;
149
150                mpsadbw(this, left, right, imm, dest)?;
151            }
152            // Used to implement the _mm_testz_si128, _mm_testc_si128
153            // and _mm_testnzc_si128 functions.
154            // Tests `(op & mask) == 0`, `(op & mask) == mask` or
155            // `(op & mask) != 0 && (op & mask) != mask`
156            "ptestz" | "ptestc" | "ptestnzc" => {
157                let [op, mask] = this.check_shim(abi, CanonAbi::C, link_name, args)?;
158
159                let (all_zero, masked_set) = test_bits_masked(this, op, mask)?;
160                let res = match unprefixed_name {
161                    "ptestz" => all_zero,
162                    "ptestc" => masked_set,
163                    "ptestnzc" => !all_zero && !masked_set,
164                    _ => unreachable!(),
165                };
166
167                this.write_scalar(Scalar::from_i32(res.into()), dest)?;
168            }
169            _ => return interp_ok(EmulateItemResult::NotSupported),
170        }
171        interp_ok(EmulateItemResult::NeedsReturn)
172    }
173}