miri/shims/x86/
gfni.rs

1use rustc_abi::CanonAbi;
2use rustc_middle::ty::Ty;
3use rustc_span::Symbol;
4use rustc_target::callconv::FnAbi;
5
6use crate::*;
7
8impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
9pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
10    fn emulate_x86_gfni_intrinsic(
11        &mut self,
12        link_name: Symbol,
13        abi: &FnAbi<'tcx, Ty<'tcx>>,
14        args: &[OpTy<'tcx>],
15        dest: &MPlaceTy<'tcx>,
16    ) -> InterpResult<'tcx, EmulateItemResult> {
17        let this = self.eval_context_mut();
18
19        // Prefix should have already been checked.
20        let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.").unwrap();
21
22        this.expect_target_feature_for_intrinsic(link_name, "gfni")?;
23        if unprefixed_name.ends_with(".256") {
24            this.expect_target_feature_for_intrinsic(link_name, "avx")?;
25        } else if unprefixed_name.ends_with(".512") {
26            this.expect_target_feature_for_intrinsic(link_name, "avx512f")?;
27        }
28
29        match unprefixed_name {
30            // Used to implement the `_mm{, 256, 512}_gf2p8affine_epi64_epi8` functions.
31            // See `affine_transform` for details.
32            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=gf2p8affine_
33            "vgf2p8affineqb.128" | "vgf2p8affineqb.256" | "vgf2p8affineqb.512" => {
34                let [left, right, imm8] = this.check_shim(abi, CanonAbi::C, link_name, args)?;
35                affine_transform(this, left, right, imm8, dest, /* inverse */ false)?;
36            }
37            // Used to implement the `_mm{, 256, 512}_gf2p8affineinv_epi64_epi8` functions.
38            // See `affine_transform` for details.
39            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=gf2p8affineinv
40            "vgf2p8affineinvqb.128" | "vgf2p8affineinvqb.256" | "vgf2p8affineinvqb.512" => {
41                let [left, right, imm8] = this.check_shim(abi, CanonAbi::C, link_name, args)?;
42                affine_transform(this, left, right, imm8, dest, /* inverse */ true)?;
43            }
44            // Used to implement the `_mm{, 256, 512}_gf2p8mul_epi8` functions.
45            // Multiplies packed 8-bit integers in `left` and `right` in the finite field GF(2^8)
46            // and store the results in `dst`. The field GF(2^8) is represented in
47            // polynomial representation with the reduction polynomial x^8 + x^4 + x^3 + x + 1.
48            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=gf2p8mul
49            "vgf2p8mulb.128" | "vgf2p8mulb.256" | "vgf2p8mulb.512" => {
50                let [left, right] = this.check_shim(abi, CanonAbi::C, link_name, args)?;
51                let (left, left_len) = this.project_to_simd(left)?;
52                let (right, right_len) = this.project_to_simd(right)?;
53                let (dest, dest_len) = this.project_to_simd(dest)?;
54
55                assert_eq!(left_len, right_len);
56                assert_eq!(dest_len, right_len);
57
58                for i in 0..dest_len {
59                    let left = this.read_scalar(&this.project_index(&left, i)?)?.to_u8()?;
60                    let right = this.read_scalar(&this.project_index(&right, i)?)?.to_u8()?;
61                    let dest = this.project_index(&dest, i)?;
62                    this.write_scalar(Scalar::from_u8(gf2p8_mul(left, right)), &dest)?;
63                }
64            }
65            _ => return interp_ok(EmulateItemResult::NotSupported),
66        }
67        interp_ok(EmulateItemResult::NeedsReturn)
68    }
69}
70
71/// Calculates the affine transformation `right * left + imm8` inside the finite field GF(2^8).
72/// `right` is an 8x8 bit matrix, `left` and `imm8` are bit vectors.
73/// If `inverse` is set, then the inverse transformation with respect to the reduction polynomial
74/// x^8 + x^4 + x^3 + x + 1 is performed instead.
75fn affine_transform<'tcx>(
76    ecx: &mut MiriInterpCx<'tcx>,
77    left: &OpTy<'tcx>,
78    right: &OpTy<'tcx>,
79    imm8: &OpTy<'tcx>,
80    dest: &MPlaceTy<'tcx>,
81    inverse: bool,
82) -> InterpResult<'tcx, ()> {
83    let (left, left_len) = ecx.project_to_simd(left)?;
84    let (right, right_len) = ecx.project_to_simd(right)?;
85    let (dest, dest_len) = ecx.project_to_simd(dest)?;
86
87    assert_eq!(dest_len, right_len);
88    assert_eq!(dest_len, left_len);
89
90    let imm8 = ecx.read_scalar(imm8)?.to_u8()?;
91
92    // Each 8x8 bit matrix gets multiplied with eight bit vectors.
93    // Therefore, the iteration is done in chunks of eight.
94    for i in (0..dest_len).step_by(8) {
95        // Get the bit matrix.
96        let mut matrix = [0u8; 8];
97        for j in 0..8 {
98            matrix[usize::try_from(j).unwrap()] =
99                ecx.read_scalar(&ecx.project_index(&right, i.wrapping_add(j))?)?.to_u8()?;
100        }
101
102        // Multiply the matrix with the vector and perform the addition.
103        for j in 0..8 {
104            let index = i.wrapping_add(j);
105            let left = ecx.read_scalar(&ecx.project_index(&left, index)?)?.to_u8()?;
106            let left = if inverse { TABLE[usize::from(left)] } else { left };
107
108            let mut res = 0;
109
110            // Do the matrix multiplication.
111            for bit in 0u8..8 {
112                let mut b = matrix[usize::from(bit)] & left;
113
114                // Calculate the parity bit.
115                b = (b & 0b1111) ^ (b >> 4);
116                b = (b & 0b11) ^ (b >> 2);
117                b = (b & 0b1) ^ (b >> 1);
118
119                res |= b << 7u8.wrapping_sub(bit);
120            }
121
122            // Perform the addition.
123            res ^= imm8;
124
125            let dest = ecx.project_index(&dest, index)?;
126            ecx.write_scalar(Scalar::from_u8(res), &dest)?;
127        }
128    }
129
130    interp_ok(())
131}
132
133/// A lookup table for computing the inverse byte for the inverse affine transformation.
134// This is a evaluated at compile time. Trait based conversion is not available.
135/// See <https://www.corsix.org/content/galois-field-instructions-2021-cpus> for the
136/// definition of `gf_inv` which was used for the creation of this table.
137static TABLE: [u8; 256] = {
138    let mut array = [0; 256];
139
140    let mut i = 1;
141    while i < 256 {
142        #[expect(clippy::as_conversions)] // no `try_from` in const...
143        let mut x = i as u8;
144        let mut y = gf2p8_mul(x, x);
145        x = y;
146        let mut j = 2;
147        while j < 8 {
148            x = gf2p8_mul(x, x);
149            y = gf2p8_mul(x, y);
150            j += 1;
151        }
152        array[i] = y;
153        i += 1;
154    }
155
156    array
157};
158
159/// Multiplies packed 8-bit integers in `left` and `right` in the finite field GF(2^8)
160/// and store the results in `dst`. The field GF(2^8) is represented in
161/// polynomial representation with the reduction polynomial x^8 + x^4 + x^3 + x + 1.
162/// See <https://www.corsix.org/content/galois-field-instructions-2021-cpus> for details.
163// This is a const function. Trait based conversion is not available.
164#[expect(clippy::as_conversions)]
165const fn gf2p8_mul(left: u8, right: u8) -> u8 {
166    // This implementation is based on the `gf2p8mul_byte` definition found inside the Intel intrinsics guide.
167    // See https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=gf2p8mul
168    // for more information.
169
170    const POLYNOMIAL: u32 = 0x11b;
171
172    let left = left as u32;
173    let right = right as u32;
174
175    let mut result = 0u32;
176
177    let mut i = 0u32;
178    while i < 8 {
179        if left & (1 << i) != 0 {
180            result ^= right << i;
181        }
182        i = i.wrapping_add(1);
183    }
184
185    let mut i = 14u32;
186    while i >= 8 {
187        if result & (1 << i) != 0 {
188            result ^= POLYNOMIAL << i.wrapping_sub(8);
189        }
190        i = i.wrapping_sub(1);
191    }
192
193    result as u8
194}