[llvm] 06f640d - [X86] Enable EVEX GFNI instructions without avx512bw.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 31 10:32:06 PDT 2022
Author: Craig Topper
Date: 2022-10-31T10:31:45-07:00
New Revision: 06f640d3fb060e2e9cfed1d7c44636c7ffe3308b
URL: https://github.com/llvm/llvm-project/commit/06f640d3fb060e2e9cfed1d7c44636c7ffe3308b
DIFF: https://github.com/llvm/llvm-project/commit/06f640d3fb060e2e9cfed1d7c44636c7ffe3308b.diff
LOG: [X86] Enable EVEX GFNI instructions without avx512bw.
We only really need avx512bw for masking 256 or 512 bit GFNI
instructions due to the need for v32i1 or v64i1.
I wanted to enable 128-bit intrinsics with avx512vl, but the
__builtin_ia32_selectb_128 used in the header file requires avx512bw.
The codegen test for the same is also not using a masked instruction
because vselect with v16i1 mask and v16i8 is not legal so is expanded
before isel. To fix these issues we need a mask specific builtin and a
mask specific ISD opcode.
Fixes PR58687.
Reviewed By: pengfei
Differential Revision: https://reviews.llvm.org/D137036
Added:
Modified:
clang/include/clang/Basic/BuiltinsX86.def
clang/lib/Headers/gfniintrin.h
clang/test/CodeGen/X86/gfni-builtins.c
llvm/lib/Target/X86/X86InstrAVX512.td
llvm/lib/Target/X86/X86InstrSSE.td
llvm/test/CodeGen/X86/avx512-gfni-intrinsics.ll
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index fff7d58fb0d3c..c94c5b3d2f7dd 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -432,13 +432,13 @@ TARGET_BUILTIN(__builtin_ia32_aesdeclast512, "V8OiV8OiV8Oi", "ncV:512:", "avx512
// GFNI
TARGET_BUILTIN(__builtin_ia32_vgf2p8affineinvqb_v16qi, "V16cV16cV16cIc", "ncV:128:", "gfni")
TARGET_BUILTIN(__builtin_ia32_vgf2p8affineinvqb_v32qi, "V32cV32cV32cIc", "ncV:256:", "avx,gfni")
-TARGET_BUILTIN(__builtin_ia32_vgf2p8affineinvqb_v64qi, "V64cV64cV64cIc", "ncV:512:", "avx512bw,gfni")
+TARGET_BUILTIN(__builtin_ia32_vgf2p8affineinvqb_v64qi, "V64cV64cV64cIc", "ncV:512:", "avx512f,gfni")
TARGET_BUILTIN(__builtin_ia32_vgf2p8affineqb_v16qi, "V16cV16cV16cIc", "ncV:128:", "gfni")
TARGET_BUILTIN(__builtin_ia32_vgf2p8affineqb_v32qi, "V32cV32cV32cIc", "ncV:256:", "avx,gfni")
-TARGET_BUILTIN(__builtin_ia32_vgf2p8affineqb_v64qi, "V64cV64cV64cIc", "ncV:512:", "avx512bw,gfni")
+TARGET_BUILTIN(__builtin_ia32_vgf2p8affineqb_v64qi, "V64cV64cV64cIc", "ncV:512:", "avx512f,gfni")
TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v16qi, "V16cV16cV16c", "ncV:128:", "gfni")
TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v32qi, "V32cV32cV32c", "ncV:256:", "avx,gfni")
-TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v64qi, "V64cV64cV64c", "ncV:512:", "avx512bw,gfni")
+TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v64qi, "V64cV64cV64c", "ncV:512:", "avx512f,gfni")
// CLMUL
TARGET_BUILTIN(__builtin_ia32_pclmulqdq128, "V2OiV2OiV2OiIc", "ncV:128:", "pclmul")
diff --git a/clang/lib/Headers/gfniintrin.h b/clang/lib/Headers/gfniintrin.h
index a59238b0b1319..5ec53c54fc4ec 100644
--- a/clang/lib/Headers/gfniintrin.h
+++ b/clang/lib/Headers/gfniintrin.h
@@ -20,10 +20,12 @@
/* Default attributes for YMM unmasked form. */
#define __DEFAULT_FN_ATTRS_Y __attribute__((__always_inline__, __nodebug__, __target__("avx,gfni"), __min_vector_width__(256)))
-/* Default attributes for ZMM forms. */
-#define __DEFAULT_FN_ATTRS_Z __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,gfni"), __min_vector_width__(512)))
+/* Default attributes for ZMM unmasked forms. */
+#define __DEFAULT_FN_ATTRS_Z __attribute__((__always_inline__, __nodebug__, __target__("avx512f,gfni"), __min_vector_width__(512)))
+/* Default attributes for ZMM masked forms. */
+#define __DEFAULT_FN_ATTRS_Z_MASK __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,gfni"), __min_vector_width__(512)))
-/* Default attributes for VLX forms. */
+/* Default attributes for VLX masked forms. */
#define __DEFAULT_FN_ATTRS_VL128 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS_VL256 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"), __min_vector_width__(256)))
@@ -99,7 +101,7 @@ _mm512_gf2p8mul_epi8(__m512i __A, __m512i __B)
(__v64qi) __B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS_Z
+static __inline__ __m512i __DEFAULT_FN_ATTRS_Z_MASK
_mm512_mask_gf2p8mul_epi8(__m512i __S, __mmask64 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_selectb_512(__U,
@@ -107,7 +109,7 @@ _mm512_mask_gf2p8mul_epi8(__m512i __S, __mmask64 __U, __m512i __A, __m512i __B)
(__v64qi) __S);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS_Z
+static __inline__ __m512i __DEFAULT_FN_ATTRS_Z_MASK
_mm512_maskz_gf2p8mul_epi8(__mmask64 __U, __m512i __A, __m512i __B)
{
return _mm512_mask_gf2p8mul_epi8((__m512i)_mm512_setzero_si512(),
diff --git a/clang/test/CodeGen/X86/gfni-builtins.c b/clang/test/CodeGen/X86/gfni-builtins.c
index 61d957d59e768..7f196e08f4d80 100644
--- a/clang/test/CodeGen/X86/gfni-builtins.c
+++ b/clang/test/CodeGen/X86/gfni-builtins.c
@@ -1,6 +1,7 @@
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +gfni -emit-llvm -o - | FileCheck %s --check-prefix SSE
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +gfni -target-feature +avx -emit-llvm -o - | FileCheck %s --check-prefixes SSE,AVX
-// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +gfni -target-feature +avx512bw -target-feature +avx512vl -emit-llvm -o - | FileCheck %s --check-prefixes SSE,AVX,AVX512
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +gfni -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o - | FileCheck %s --check-prefixes SSE,AVX,AVX512
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +gfni -target-feature +avx512bw -target-feature +avx512vl -emit-llvm -o - | FileCheck %s --check-prefixes SSE,AVX,AVX512,AVX512BW
#include <immintrin.h>
@@ -42,141 +43,150 @@ __m256i test_mm256_gf2p8mul_epi8(__m256i A, __m256i B) {
}
#endif // __AVX__
-#ifdef __AVX512BW__
+#ifdef __AVX512F__
__m512i test_mm512_gf2p8affineinv_epi64_epi8(__m512i A, __m512i B) {
// AVX512-LABEL: @test_mm512_gf2p8affineinv_epi64_epi8
// AVX512: @llvm.x86.vgf2p8affineinvqb.512
return _mm512_gf2p8affineinv_epi64_epi8(A, B, 1);
}
-__m512i test_mm512_mask_gf2p8affineinv_epi64_epi8(__m512i S, __mmask64 U, __m512i A, __m512i B) {
- // AVX512-LABEL: @test_mm512_mask_gf2p8affineinv_epi64_epi8
- // AVX512: @llvm.x86.vgf2p8affineinvqb.512
- // AVX512: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}}
- return _mm512_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, 1);
+__m512i test_mm512_gf2p8affine_epi64_epi8(__m512i A, __m512i B) {
+ // AVX512-LABEL: @test_mm512_gf2p8affine_epi64_epi8
+ // AVX512: @llvm.x86.vgf2p8affineqb.512
+ return _mm512_gf2p8affine_epi64_epi8(A, B, 1);
}
-__m512i test_mm512_maskz_gf2p8affineinv_epi64_epi8(__mmask64 U, __m512i A, __m512i B) {
- // AVX512-LABEL: @test_mm512_maskz_gf2p8affineinv_epi64_epi8
- // AVX512: @llvm.x86.vgf2p8affineinvqb.512
- // AVX512: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}}
- return _mm512_maskz_gf2p8affineinv_epi64_epi8(U, A, B, 1);
+__m512i test_mm512_gf2p8mul_epi8(__m512i A, __m512i B) {
+ // AVX512-LABEL: @test_mm512_gf2p8mul_epi8
+ // AVX512: @llvm.x86.vgf2p8mulb.512
+ return _mm512_gf2p8mul_epi8(A, B);
}
+#endif // __AVX512F__
-__m256i test_mm256_mask_gf2p8affineinv_epi64_epi8(__m256i S, __mmask32 U, __m256i A, __m256i B) {
- // AVX256-LABEL: @test_mm256_mask_gf2p8affineinv_epi64_epi8
- // AVX256: @llvm.x86.vgf2p8affineinvqb.256
- // AVX256: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}}, <32 x i8> {{.*}}
- return _mm256_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, 1);
+#ifdef __AVX512BW__
+__m512i test_mm512_mask_gf2p8affineinv_epi64_epi8(__m512i S, __mmask64 U, __m512i A, __m512i B) {
+ // AVX512BW-LABEL: @test_mm512_mask_gf2p8affineinv_epi64_epi8
+ // AVX512BW: @llvm.x86.vgf2p8affineinvqb.512
+ // AVX512BW: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}}
+ return _mm512_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, 1);
}
-__m256i test_mm256_maskz_gf2p8affineinv_epi64_epi8(__mmask32 U, __m256i A, __m256i B) {
- // AVX256-LABEL: @test_mm256_maskz_gf2p8affineinv_epi64_epi8
- // AVX256: @llvm.x86.vgf2p8affineinvqb.256
- // AVX256: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}}, <32 x i8> {{.*}}
- return _mm256_maskz_gf2p8affineinv_epi64_epi8(U, A, B, 1);
+__m512i test_mm512_maskz_gf2p8affineinv_epi64_epi8(__mmask64 U, __m512i A, __m512i B) {
+ // AVX512BW-LABEL: @test_mm512_maskz_gf2p8affineinv_epi64_epi8
+ // AVX512BW: @llvm.x86.vgf2p8affineinvqb.512
+ // AVX512BW: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}}
+ return _mm512_maskz_gf2p8affineinv_epi64_epi8(U, A, B, 1);
}
__m128i test_mm_mask_gf2p8affineinv_epi64_epi8(__m128i S, __mmask16 U, __m128i A, __m128i B) {
- // AVX512-LABEL: @test_mm_mask_gf2p8affineinv_epi64_epi8
- // AVX512: @llvm.x86.vgf2p8affineinvqb.128
- // AVX512: select <16 x i1> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i8> {{.*}}
+ // AVX512BW-LABEL: @test_mm_mask_gf2p8affineinv_epi64_epi8
+ // AVX512BW: @llvm.x86.vgf2p8affineinvqb.128
+ // AVX512BW: select <16 x i1> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i8> {{.*}}
return _mm_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, 1);
}
__m128i test_mm_maskz_gf2p8affineinv_epi64_epi8(__mmask16 U, __m128i A, __m128i B) {
- // AVX512-LABEL: @test_mm_maskz_gf2p8affineinv_epi64_epi8
- // AVX512: @llvm.x86.vgf2p8affineinvqb.128
- // AVX512: select <16 x i1> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i8> {{.*}}
+ // AVX512BW-LABEL: @test_mm_maskz_gf2p8affineinv_epi64_epi8
+ // AVX512BW: @llvm.x86.vgf2p8affineinvqb.128
+ // AVX512BW: select <16 x i1> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i8> {{.*}}
return _mm_maskz_gf2p8affineinv_epi64_epi8(U, A, B, 1);
}
-__m512i test_mm512_gf2p8affine_epi64_epi8(__m512i A, __m512i B) {
- // AVX512-LABEL: @test_mm512_gf2p8affine_epi64_epi8
- // AVX512: @llvm.x86.vgf2p8affineqb.512
- return _mm512_gf2p8affine_epi64_epi8(A, B, 1);
+__m256i test_mm256_mask_gf2p8affineinv_epi64_epi8(__m256i S, __mmask32 U, __m256i A, __m256i B) {
+ // AVX512BW-LABEL: @test_mm256_mask_gf2p8affineinv_epi64_epi8
+ // AVX512BW: @llvm.x86.vgf2p8affineinvqb.256
+ // AVX512BW: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}}, <32 x i8> {{.*}}
+ return _mm256_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, 1);
+}
+
+__m256i test_mm256_maskz_gf2p8affineinv_epi64_epi8(__mmask32 U, __m256i A, __m256i B) {
+ // AVX512BW-LABEL: @test_mm256_maskz_gf2p8affineinv_epi64_epi8
+ // AVX512BW: @llvm.x86.vgf2p8affineinvqb.256
+ // AVX512BW: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}}, <32 x i8> {{.*}}
+ return _mm256_maskz_gf2p8affineinv_epi64_epi8(U, A, B, 1);
}
__m512i test_mm512_mask_gf2p8affine_epi64_epi8(__m512i S, __mmask64 U, __m512i A, __m512i B) {
- // AVX512-LABEL: @test_mm512_mask_gf2p8affine_epi64_epi8
- // AVX512: @llvm.x86.vgf2p8affineqb.512
- // AVX512: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}}
+ // AVX512BW-LABEL: @test_mm512_mask_gf2p8affine_epi64_epi8
+ // AVX512BW: @llvm.x86.vgf2p8affineqb.512
+ // AVX512BW: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}}
return _mm512_mask_gf2p8affine_epi64_epi8(S, U, A, B, 1);
}
__m512i test_mm512_maskz_gf2p8affine_epi64_epi8(__mmask64 U, __m512i A, __m512i B) {
- // AVX512-LABEL: @test_mm512_maskz_gf2p8affine_epi64_epi8
- // AVX512: @llvm.x86.vgf2p8affineqb.512
- // AVX512: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}}
+ // AVX512BW-LABEL: @test_mm512_maskz_gf2p8affine_epi64_epi8
+ // AVX512BW: @llvm.x86.vgf2p8affineqb.512
+ // AVX512BW: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}}
return _mm512_maskz_gf2p8affine_epi64_epi8(U, A, B, 1);
}
-__m256i test_mm256_mask_gf2p8affine_epi64_epi8(__m256i S, __mmask32 U, __m256i A, __m256i B) {
- // AVX256-LABEL: @test_mm256_mask_gf2p8affine_epi64_epi8
- // AVX256: @llvm.x86.vgf2p8affineqb.256
- // AVX256: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}}, <32 x i8> {{.*}}
- return _mm256_mask_gf2p8affine_epi64_epi8(S, U, A, B, 1);
-}
-
-__m256i test_mm256_maskz_gf2p8affine_epi64_epi8(__mmask32 U, __m256i A, __m256i B) {
- // AVX256-LABEL: @test_mm256_maskz_gf2p8affine_epi64_epi8
- // AVX256: @llvm.x86.vgf2p8affineqb.256
- // AVX256: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}}, <32 x i8> {{.*}}
- return _mm256_maskz_gf2p8affine_epi64_epi8(U, A, B, 1);
-}
-
__m128i test_mm_mask_gf2p8affine_epi64_epi8(__m128i S, __mmask16 U, __m128i A, __m128i B) {
- // AVX512-LABEL: @test_mm_mask_gf2p8affine_epi64_epi8
- // AVX512: @llvm.x86.vgf2p8affineqb.128
- // AVX512: select <16 x i1> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i8> {{.*}}
+ // AVX512BW-LABEL: @test_mm_mask_gf2p8affine_epi64_epi8
+ // AVX512BW: @llvm.x86.vgf2p8affineqb.128
+ // AVX512BW: select <16 x i1> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i8> {{.*}}
return _mm_mask_gf2p8affine_epi64_epi8(S, U, A, B, 1);
}
__m128i test_mm_maskz_gf2p8affine_epi64_epi8(__mmask16 U, __m128i A, __m128i B) {
- // AVX512-LABEL: @test_mm_maskz_gf2p8affine_epi64_epi8
- // AVX512: @llvm.x86.vgf2p8affineqb.128
- // AVX512: select <16 x i1> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i8> {{.*}}
+ // AVX512BW-LABEL: @test_mm_maskz_gf2p8affine_epi64_epi8
+ // AVX512BW: @llvm.x86.vgf2p8affineqb.128
+ // AVX512BW: select <16 x i1> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i8> {{.*}}
return _mm_maskz_gf2p8affine_epi64_epi8(U, A, B, 1);
}
-__m512i test_mm512_gf2p8mul_epi8(__m512i A, __m512i B) {
- // AVX512-LABEL: @test_mm512_gf2p8mul_epi8
- // AVX512: @llvm.x86.vgf2p8mulb.512
- return _mm512_gf2p8mul_epi8(A, B);
+__m256i test_mm256_mask_gf2p8affine_epi64_epi8(__m256i S, __mmask32 U, __m256i A, __m256i B) {
+ // AVX512BW-LABEL: @test_mm256_mask_gf2p8affine_epi64_epi8
+ // AVX512BW: @llvm.x86.vgf2p8affineqb.256
+ // AVX512BW: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}}, <32 x i8> {{.*}}
+ return _mm256_mask_gf2p8affine_epi64_epi8(S, U, A, B, 1);
+}
+
+__m256i test_mm256_maskz_gf2p8affine_epi64_epi8(__mmask32 U, __m256i A, __m256i B) {
+ // AVX512BW-LABEL: @test_mm256_maskz_gf2p8affine_epi64_epi8
+ // AVX512BW: @llvm.x86.vgf2p8affineqb.256
+ // AVX512BW: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}}, <32 x i8> {{.*}}
+ return _mm256_maskz_gf2p8affine_epi64_epi8(U, A, B, 1);
}
__m512i test_mm512_mask_gf2p8mul_epi8(__m512i S, __mmask64 U, __m512i A, __m512i B) {
- // AVX512-LABEL: @test_mm512_mask_gf2p8mul_epi8
- // AVX512: @llvm.x86.vgf2p8mulb.512
- // AVX512: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}}
+ // AVX512BW-LABEL: @test_mm512_mask_gf2p8mul_epi8
+ // AVX512BW: @llvm.x86.vgf2p8mulb.512
+ // AVX512BW: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}}
return _mm512_mask_gf2p8mul_epi8(S, U, A, B);
}
__m512i test_mm512_maskz_gf2p8mul_epi8(__mmask64 U, __m512i A, __m512i B) {
- // AVX512-LABEL: @test_mm512_maskz_gf2p8mul_epi8
- // AVX512: @llvm.x86.vgf2p8mulb.512
- // AVX512: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}}
+ // AVX512BW-LABEL: @test_mm512_maskz_gf2p8mul_epi8
+ // AVX512BW: @llvm.x86.vgf2p8mulb.512
+ // AVX512BW: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}}
return _mm512_maskz_gf2p8mul_epi8(U, A, B);
}
+__m128i test_mm_mask_gf2p8mul_epi8(__m128i S, __mmask16 U, __m128i A, __m128i B) {
+ // AVX512BW-LABEL: @test_mm_mask_gf2p8mul_epi8
+ // AVX512BW: @llvm.x86.vgf2p8mulb.128
+ // AVX512BW: select <16 x i1> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i8> {{.*}}
+ return _mm_mask_gf2p8mul_epi8(S, U, A, B);
+}
+
+__m128i test_mm_maskz_gf2p8mul_epi8(__mmask16 U, __m128i A, __m128i B) {
+ // AVX512BW-LABEL: @test_mm_maskz_gf2p8mul_epi8
+ // AVX512BW: @llvm.x86.vgf2p8mulb.128
+ // AVX512BW: select <16 x i1> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i8> {{.*}}
+ return _mm_maskz_gf2p8mul_epi8(U, A, B);
+}
+
__m256i test_mm256_mask_gf2p8mul_epi8(__m256i S, __mmask32 U, __m256i A, __m256i B) {
- // AVX256-LABEL: @test_mm256_mask_gf2p8mul_epi8
- // AVX256: @llvm.x86.vgf2p8mulb.256
- // AVX256: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}}, <32 x i8> {{.*}}
+ // AVX512BW-LABEL: @test_mm256_mask_gf2p8mul_epi8
+ // AVX512BW: @llvm.x86.vgf2p8mulb.256
+ // AVX512BW: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}}, <32 x i8> {{.*}}
return _mm256_mask_gf2p8mul_epi8(S, U, A, B);
}
__m256i test_mm256_maskz_gf2p8mul_epi8(__mmask32 U, __m256i A, __m256i B) {
- // AVX256-LABEL: @test_mm256_maskz_gf2p8mul_epi8
- // AVX256: @llvm.x86.vgf2p8mulb.256
- // AVX256: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}}, <32 x i8> {{.*}}
+ // AVX512BW-LABEL: @test_mm256_maskz_gf2p8mul_epi8
+ // AVX512BW: @llvm.x86.vgf2p8mulb.256
+ // AVX512BW: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}}, <32 x i8> {{.*}}
return _mm256_maskz_gf2p8mul_epi8(U, A, B);
}
-
-__m128i test_mm_mask_gf2p8mul_epi8(__m128i S, __mmask16 U, __m128i A, __m128i B) {
- // AVX512-LABEL: @test_mm_mask_gf2p8mul_epi8
- // AVX512: @llvm.x86.vgf2p8mulb.128
- // AVX512: select <16 x i1> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i8> {{.*}}
- return _mm_mask_gf2p8mul_epi8(S, U, A, B);
-}
#endif // __AVX512BW__
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 35664b606ea1d..e4373bb16f100 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -12694,10 +12694,10 @@ defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>;
multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
X86SchedWriteWidths sched> {
- let Predicates = [HasGFNI, HasAVX512, HasBWI] in
+ let Predicates = [HasGFNI, HasAVX512] in
defm Z : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>,
EVEX_V512;
- let Predicates = [HasGFNI, HasVLX, HasBWI] in {
+ let Predicates = [HasGFNI, HasVLX] in {
defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>,
EVEX_V256;
defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>,
@@ -12726,10 +12726,10 @@ multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
X86SchedWriteWidths sched> {
- let Predicates = [HasGFNI, HasAVX512, HasBWI] in
+ let Predicates = [HasGFNI, HasAVX512] in
defm Z : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM,
v64i8_info, v8i64_info>, EVEX_V512;
- let Predicates = [HasGFNI, HasVLX, HasBWI] in {
+ let Predicates = [HasGFNI, HasVLX] in {
defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM,
v32i8x_info, v4i64x_info>, EVEX_V256;
defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM,
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 91d29cf5d2ef1..dde7c0f36fd1b 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -8096,7 +8096,7 @@ multiclass GF2P8AFFINE_common<bits<8> Op, string OpStr, SDNode OpNode> {
Predicates = [HasGFNI, UseSSE2] in
defm NAME : GF2P8AFFINE_rmi<Op, OpStr, v16i8, OpNode,
VR128, load, i128mem, 1>;
- let Predicates = [HasGFNI, HasAVX, NoVLX_Or_NoBWI] in {
+ let Predicates = [HasGFNI, HasAVX, NoVLX] in {
defm V#NAME : GF2P8AFFINE_rmi<Op, "v"#OpStr, v16i8, OpNode, VR128,
load, i128mem>, VEX_4V, VEX_W;
defm V#NAME#Y : GF2P8AFFINE_rmi<Op, "v"#OpStr, v32i8, OpNode, VR256,
@@ -8109,7 +8109,7 @@ let Constraints = "$src1 = $dst",
Predicates = [HasGFNI, UseSSE2] in
defm GF2P8MULB : GF2P8MULB_rm<"gf2p8mulb", v16i8, VR128, memop,
i128mem, 1>;
-let Predicates = [HasGFNI, HasAVX, NoVLX_Or_NoBWI] in {
+let Predicates = [HasGFNI, HasAVX, NoVLX] in {
defm VGF2P8MULB : GF2P8MULB_rm<"vgf2p8mulb", v16i8, VR128, load,
i128mem>, VEX_4V;
defm VGF2P8MULBY : GF2P8MULB_rm<"vgf2p8mulb", v32i8, VR256, load,
diff --git a/llvm/test/CodeGen/X86/avx512-gfni-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-gfni-intrinsics.ll
index 290c500efcdb5..83b30d991442c 100644
--- a/llvm/test/CodeGen/X86/avx512-gfni-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512-gfni-intrinsics.ll
@@ -1,28 +1,58 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl,+gfni,+avx512bw --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+gfni,+avx512bw --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl,+gfni,+avx512bw --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86BW
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+gfni,+avx512bw --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64BW
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl,+gfni --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86NOBW
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+gfni --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64NOBW
declare <16 x i8> @llvm.x86.vgf2p8affineinvqb.128(<16 x i8>, <16 x i8>, i8)
define { <16 x i8>, <16 x i8>, <16 x i8> } @test_vgf2p8affineinvqb_128(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %passthru, i16 %mask) {
-; X86-LABEL: test_vgf2p8affineinvqb_128:
-; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
-; X86-NEXT: vgf2p8affineinvqb $3, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xcf,0xd9,0x03]
-; X86-NEXT: vgf2p8affineinvqb $4, %xmm1, %xmm0, %xmm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0xcf,0xe1,0x04]
-; X86-NEXT: vgf2p8affineinvqb $5, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0xcf,0xd1,0x05]
-; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
-; X86-NEXT: vmovdqa %xmm4, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcc]
-; X86-NEXT: retl # encoding: [0xc3]
-;
-; X64-LABEL: test_vgf2p8affineinvqb_128:
-; X64: # %bb.0:
-; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
-; X64-NEXT: vgf2p8affineinvqb $3, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xcf,0xd9,0x03]
-; X64-NEXT: vgf2p8affineinvqb $4, %xmm1, %xmm0, %xmm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0xcf,0xe1,0x04]
-; X64-NEXT: vgf2p8affineinvqb $5, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0xcf,0xd1,0x05]
-; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
-; X64-NEXT: vmovdqa %xmm4, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcc]
-; X64-NEXT: retq # encoding: [0xc3]
+; X86BW-LABEL: test_vgf2p8affineinvqb_128:
+; X86BW: # %bb.0:
+; X86BW-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86BW-NEXT: vgf2p8affineinvqb $3, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xcf,0xd9,0x03]
+; X86BW-NEXT: vgf2p8affineinvqb $4, %xmm1, %xmm0, %xmm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0xcf,0xe1,0x04]
+; X86BW-NEXT: vgf2p8affineinvqb $5, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0xcf,0xd1,0x05]
+; X86BW-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
+; X86BW-NEXT: vmovdqa %xmm4, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcc]
+; X86BW-NEXT: retl # encoding: [0xc3]
+;
+; X64BW-LABEL: test_vgf2p8affineinvqb_128:
+; X64BW: # %bb.0:
+; X64BW-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64BW-NEXT: vgf2p8affineinvqb $3, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xcf,0xd9,0x03]
+; X64BW-NEXT: vgf2p8affineinvqb $4, %xmm1, %xmm0, %xmm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0xcf,0xe1,0x04]
+; X64BW-NEXT: vgf2p8affineinvqb $5, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0xcf,0xd1,0x05]
+; X64BW-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
+; X64BW-NEXT: vmovdqa %xmm4, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcc]
+; X64BW-NEXT: retq # encoding: [0xc3]
+;
+; X86NOBW-LABEL: test_vgf2p8affineinvqb_128:
+; X86NOBW: # %bb.0:
+; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86NOBW-NEXT: vgf2p8affineinvqb $3, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xcf,0xd9,0x03]
+; X86NOBW-NEXT: vgf2p8affineinvqb $4, %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xcf,0xe1,0x04]
+; X86NOBW-NEXT: vgf2p8affineinvqb $5, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xcf,0xc1,0x05]
+; X86NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
+; X86NOBW-NEXT: vpmovdb %zmm1, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xcd]
+; X86NOBW-NEXT: vpand %xmm4, %xmm5, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xd1,0xdb,0xcc]
+; X86NOBW-NEXT: vpblendvb %xmm5, %xmm0, %xmm2, %xmm2 # encoding: [0xc4,0xe3,0x69,0x4c,0xd0,0x50]
+; X86NOBW-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
+; X86NOBW-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86NOBW-NEXT: retl # encoding: [0xc3]
+;
+; X64NOBW-LABEL: test_vgf2p8affineinvqb_128:
+; X64NOBW: # %bb.0:
+; X64NOBW-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64NOBW-NEXT: vgf2p8affineinvqb $3, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xcf,0xd9,0x03]
+; X64NOBW-NEXT: vgf2p8affineinvqb $4, %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xcf,0xe1,0x04]
+; X64NOBW-NEXT: vgf2p8affineinvqb $5, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xcf,0xc1,0x05]
+; X64NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
+; X64NOBW-NEXT: vpmovdb %zmm1, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xcd]
+; X64NOBW-NEXT: vpand %xmm4, %xmm5, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xd1,0xdb,0xcc]
+; X64NOBW-NEXT: vpblendvb %xmm5, %xmm0, %xmm2, %xmm2 # encoding: [0xc4,0xe3,0x69,0x4c,0xd0,0x50]
+; X64NOBW-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
+; X64NOBW-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64NOBW-NEXT: retq # encoding: [0xc3]
%1 = bitcast i16 %mask to <16 x i1>
%2 = call <16 x i8> @llvm.x86.vgf2p8affineinvqb.128(<16 x i8> %src1, <16 x i8> %src2, i8 3)
%3 = call <16 x i8> @llvm.x86.vgf2p8affineinvqb.128(<16 x i8> %src1, <16 x i8> %src2, i8 4)
@@ -37,25 +67,60 @@ define { <16 x i8>, <16 x i8>, <16 x i8> } @test_vgf2p8affineinvqb_128(<16 x i8>
declare <32 x i8> @llvm.x86.vgf2p8affineinvqb.256(<32 x i8>, <32 x i8>, i8)
define { <32 x i8>, <32 x i8>, <32 x i8> } @test_vgf2p8affineinvqb_256(<32 x i8> %src1, <32 x i8> %src2, <32 x i8> %passthru, i32 %mask) {
-; X86-LABEL: test_vgf2p8affineinvqb_256:
-; X86: # %bb.0:
-; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
-; X86-NEXT: vgf2p8affineinvqb $3, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xcf,0xd9,0x03]
-; X86-NEXT: vgf2p8affineinvqb $4, %ymm1, %ymm0, %ymm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0xcf,0xe1,0x04]
-; X86-NEXT: vgf2p8affineinvqb $5, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0xcf,0xd1,0x05]
-; X86-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
-; X86-NEXT: vmovdqa %ymm4, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcc]
-; X86-NEXT: retl # encoding: [0xc3]
-;
-; X64-LABEL: test_vgf2p8affineinvqb_256:
-; X64: # %bb.0:
-; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
-; X64-NEXT: vgf2p8affineinvqb $3, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xcf,0xd9,0x03]
-; X64-NEXT: vgf2p8affineinvqb $4, %ymm1, %ymm0, %ymm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0xcf,0xe1,0x04]
-; X64-NEXT: vgf2p8affineinvqb $5, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0xcf,0xd1,0x05]
-; X64-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
-; X64-NEXT: vmovdqa %ymm4, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcc]
-; X64-NEXT: retq # encoding: [0xc3]
+; X86BW-LABEL: test_vgf2p8affineinvqb_256:
+; X86BW: # %bb.0:
+; X86BW-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86BW-NEXT: vgf2p8affineinvqb $3, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xcf,0xd9,0x03]
+; X86BW-NEXT: vgf2p8affineinvqb $4, %ymm1, %ymm0, %ymm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0xcf,0xe1,0x04]
+; X86BW-NEXT: vgf2p8affineinvqb $5, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0xcf,0xd1,0x05]
+; X86BW-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
+; X86BW-NEXT: vmovdqa %ymm4, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcc]
+; X86BW-NEXT: retl # encoding: [0xc3]
+;
+; X64BW-LABEL: test_vgf2p8affineinvqb_256:
+; X64BW: # %bb.0:
+; X64BW-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64BW-NEXT: vgf2p8affineinvqb $3, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xcf,0xd9,0x03]
+; X64BW-NEXT: vgf2p8affineinvqb $4, %ymm1, %ymm0, %ymm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0xcf,0xe1,0x04]
+; X64BW-NEXT: vgf2p8affineinvqb $5, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0xcf,0xd1,0x05]
+; X64BW-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
+; X64BW-NEXT: vmovdqa %ymm4, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcc]
+; X64BW-NEXT: retq # encoding: [0xc3]
+;
+; X86NOBW-LABEL: test_vgf2p8affineinvqb_256:
+; X86NOBW: # %bb.0:
+; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k2 # encoding: [0xc5,0xf8,0x90,0x54,0x24,0x06]
+; X86NOBW-NEXT: vgf2p8affineinvqb $3, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xcf,0xd9,0x03]
+; X86NOBW-NEXT: vgf2p8affineinvqb $4, %ymm1, %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xcf,0xe1,0x04]
+; X86NOBW-NEXT: vgf2p8affineinvqb $5, %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xcf,0xc1,0x05]
+; X86NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
+; X86NOBW-NEXT: vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
+; X86NOBW-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k2} {z} # encoding: [0x62,0xf3,0x55,0xca,0x25,0xed,0xff]
+; X86NOBW-NEXT: vpmovdb %zmm5, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xed]
+; X86NOBW-NEXT: vinserti128 $1, %xmm5, %ymm1, %ymm5 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xed,0x01]
+; X86NOBW-NEXT: vpand %ymm4, %ymm5, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xd5,0xdb,0xcc]
+; X86NOBW-NEXT: vpblendvb %ymm5, %ymm0, %ymm2, %ymm2 # encoding: [0xc4,0xe3,0x6d,0x4c,0xd0,0x50]
+; X86NOBW-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
+; X86NOBW-NEXT: retl # encoding: [0xc3]
+;
+; X64NOBW-LABEL: test_vgf2p8affineinvqb_256:
+; X64NOBW: # %bb.0:
+; X64NOBW-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64NOBW-NEXT: shrl $16, %edi # encoding: [0xc1,0xef,0x10]
+; X64NOBW-NEXT: kmovw %edi, %k2 # encoding: [0xc5,0xf8,0x92,0xd7]
+; X64NOBW-NEXT: vgf2p8affineinvqb $3, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xcf,0xd9,0x03]
+; X64NOBW-NEXT: vgf2p8affineinvqb $4, %ymm1, %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xcf,0xe1,0x04]
+; X64NOBW-NEXT: vgf2p8affineinvqb $5, %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xcf,0xc1,0x05]
+; X64NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
+; X64NOBW-NEXT: vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
+; X64NOBW-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k2} {z} # encoding: [0x62,0xf3,0x55,0xca,0x25,0xed,0xff]
+; X64NOBW-NEXT: vpmovdb %zmm5, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xed]
+; X64NOBW-NEXT: vinserti128 $1, %xmm5, %ymm1, %ymm5 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xed,0x01]
+; X64NOBW-NEXT: vpand %ymm4, %ymm5, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xd5,0xdb,0xcc]
+; X64NOBW-NEXT: vpblendvb %ymm5, %ymm0, %ymm2, %ymm2 # encoding: [0xc4,0xe3,0x6d,0x4c,0xd0,0x50]
+; X64NOBW-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
+; X64NOBW-NEXT: retq # encoding: [0xc3]
%1 = bitcast i32 %mask to <32 x i1>
%2 = call <32 x i8> @llvm.x86.vgf2p8affineinvqb.256(<32 x i8> %src1, <32 x i8> %src2, i8 3)
%3 = call <32 x i8> @llvm.x86.vgf2p8affineinvqb.256(<32 x i8> %src1, <32 x i8> %src2, i8 4)
@@ -70,25 +135,80 @@ define { <32 x i8>, <32 x i8>, <32 x i8> } @test_vgf2p8affineinvqb_256(<32 x i8>
declare <64 x i8> @llvm.x86.vgf2p8affineinvqb.512(<64 x i8>, <64 x i8>, i8)
define { <64 x i8>, <64 x i8>, <64 x i8> } @test_vgf2p8affineinvqb_512(<64 x i8> %src1, <64 x i8> %src2, <64 x i8> %passthru, i64 %mask) {
-; X86-LABEL: test_vgf2p8affineinvqb_512:
-; X86: # %bb.0:
-; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
-; X86-NEXT: vgf2p8affineinvqb $3, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0xcf,0xd9,0x03]
-; X86-NEXT: vgf2p8affineinvqb $4, %zmm1, %zmm0, %zmm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0xcf,0xe1,0x04]
-; X86-NEXT: vgf2p8affineinvqb $5, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0xcf,0xd1,0x05]
-; X86-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3]
-; X86-NEXT: vmovdqa64 %zmm4, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcc]
-; X86-NEXT: retl # encoding: [0xc3]
-;
-; X64-LABEL: test_vgf2p8affineinvqb_512:
-; X64: # %bb.0:
-; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
-; X64-NEXT: vgf2p8affineinvqb $3, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0xcf,0xd9,0x03]
-; X64-NEXT: vgf2p8affineinvqb $4, %zmm1, %zmm0, %zmm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0xcf,0xe1,0x04]
-; X64-NEXT: vgf2p8affineinvqb $5, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0xcf,0xd1,0x05]
-; X64-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3]
-; X64-NEXT: vmovdqa64 %zmm4, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcc]
-; X64-NEXT: retq # encoding: [0xc3]
+; X86BW-LABEL: test_vgf2p8affineinvqb_512:
+; X86BW: # %bb.0:
+; X86BW-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
+; X86BW-NEXT: vgf2p8affineinvqb $3, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0xcf,0xd9,0x03]
+; X86BW-NEXT: vgf2p8affineinvqb $4, %zmm1, %zmm0, %zmm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0xcf,0xe1,0x04]
+; X86BW-NEXT: vgf2p8affineinvqb $5, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0xcf,0xd1,0x05]
+; X86BW-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3]
+; X86BW-NEXT: vmovdqa64 %zmm4, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcc]
+; X86BW-NEXT: retl # encoding: [0xc3]
+;
+; X64BW-LABEL: test_vgf2p8affineinvqb_512:
+; X64BW: # %bb.0:
+; X64BW-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
+; X64BW-NEXT: vgf2p8affineinvqb $3, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0xcf,0xd9,0x03]
+; X64BW-NEXT: vgf2p8affineinvqb $4, %zmm1, %zmm0, %zmm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0xcf,0xe1,0x04]
+; X64BW-NEXT: vgf2p8affineinvqb $5, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0xcf,0xd1,0x05]
+; X64BW-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3]
+; X64BW-NEXT: vmovdqa64 %zmm4, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcc]
+; X64BW-NEXT: retq # encoding: [0xc3]
+;
+; X86NOBW-LABEL: test_vgf2p8affineinvqb_512:
+; X86NOBW: # %bb.0:
+; X86NOBW-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
+; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k2 # encoding: [0xc5,0xf8,0x90,0x54,0x24,0x04]
+; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x06]
+; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k3 # encoding: [0xc5,0xf8,0x90,0x5c,0x24,0x0a]
+; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k4 # encoding: [0xc5,0xf8,0x90,0x64,0x24,0x08]
+; X86NOBW-NEXT: vgf2p8affineinvqb $3, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0xcf,0xc1,0x03]
+; X86NOBW-NEXT: vgf2p8affineinvqb $4, %zmm1, %zmm3, %zmm4 # encoding: [0x62,0xf3,0xe5,0x48,0xcf,0xe1,0x04]
+; X86NOBW-NEXT: vgf2p8affineinvqb $5, %zmm1, %zmm3, %zmm3 # encoding: [0x62,0xf3,0xe5,0x48,0xcf,0xd9,0x05]
+; X86NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k4} {z} # encoding: [0x62,0xf3,0x75,0xcc,0x25,0xc9,0xff]
+; X86NOBW-NEXT: vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
+; X86NOBW-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k3} {z} # encoding: [0x62,0xf3,0x55,0xcb,0x25,0xed,0xff]
+; X86NOBW-NEXT: vpmovdb %zmm5, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xed]
+; X86NOBW-NEXT: vinserti128 $1, %xmm5, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xcd,0x01]
+; X86NOBW-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k2} {z} # encoding: [0x62,0xf3,0x55,0xca,0x25,0xed,0xff]
+; X86NOBW-NEXT: vpmovdb %zmm5, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xed]
+; X86NOBW-NEXT: vpternlogd $255, %zmm6, %zmm6, %zmm6 {%k1} {z} # encoding: [0x62,0xf3,0x4d,0xc9,0x25,0xf6,0xff]
+; X86NOBW-NEXT: vpmovdb %zmm6, %xmm6 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xf6]
+; X86NOBW-NEXT: vinserti128 $1, %xmm6, %ymm5, %ymm5 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x55,0x38,0xee,0x01]
+; X86NOBW-NEXT: vinserti64x4 $1, %ymm1, %zmm5, %zmm5 # encoding: [0x62,0xf3,0xd5,0x48,0x3a,0xe9,0x01]
+; X86NOBW-NEXT: vpandq %zmm4, %zmm5, %zmm1 # encoding: [0x62,0xf1,0xd5,0x48,0xdb,0xcc]
+; X86NOBW-NEXT: vpternlogq $184, %zmm3, %zmm5, %zmm2 # encoding: [0x62,0xf3,0xd5,0x48,0x25,0xd3,0xb8]
+; X86NOBW-NEXT: retl # encoding: [0xc3]
+;
+; X64NOBW-LABEL: test_vgf2p8affineinvqb_512:
+; X64NOBW: # %bb.0:
+; X64NOBW-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
+; X64NOBW-NEXT: movl %edi, %ecx # encoding: [0x89,0xf9]
+; X64NOBW-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64NOBW-NEXT: shrq $32, %rdi # encoding: [0x48,0xc1,0xef,0x20]
+; X64NOBW-NEXT: shrq $48, %rax # encoding: [0x48,0xc1,0xe8,0x30]
+; X64NOBW-NEXT: shrl $16, %ecx # encoding: [0xc1,0xe9,0x10]
+; X64NOBW-NEXT: kmovw %ecx, %k2 # encoding: [0xc5,0xf8,0x92,0xd1]
+; X64NOBW-NEXT: kmovw %eax, %k3 # encoding: [0xc5,0xf8,0x92,0xd8]
+; X64NOBW-NEXT: kmovw %edi, %k4 # encoding: [0xc5,0xf8,0x92,0xe7]
+; X64NOBW-NEXT: vgf2p8affineinvqb $3, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0xcf,0xd9,0x03]
+; X64NOBW-NEXT: vgf2p8affineinvqb $4, %zmm1, %zmm0, %zmm4 # encoding: [0x62,0xf3,0xfd,0x48,0xcf,0xe1,0x04]
+; X64NOBW-NEXT: vgf2p8affineinvqb $5, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0xcf,0xc1,0x05]
+; X64NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k4} {z} # encoding: [0x62,0xf3,0x75,0xcc,0x25,0xc9,0xff]
+; X64NOBW-NEXT: vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
+; X64NOBW-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k3} {z} # encoding: [0x62,0xf3,0x55,0xcb,0x25,0xed,0xff]
+; X64NOBW-NEXT: vpmovdb %zmm5, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xed]
+; X64NOBW-NEXT: vinserti128 $1, %xmm5, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xcd,0x01]
+; X64NOBW-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k1} {z} # encoding: [0x62,0xf3,0x55,0xc9,0x25,0xed,0xff]
+; X64NOBW-NEXT: vpmovdb %zmm5, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xed]
+; X64NOBW-NEXT: vpternlogd $255, %zmm6, %zmm6, %zmm6 {%k2} {z} # encoding: [0x62,0xf3,0x4d,0xca,0x25,0xf6,0xff]
+; X64NOBW-NEXT: vpmovdb %zmm6, %xmm6 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xf6]
+; X64NOBW-NEXT: vinserti128 $1, %xmm6, %ymm5, %ymm5 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x55,0x38,0xee,0x01]
+; X64NOBW-NEXT: vinserti64x4 $1, %ymm1, %zmm5, %zmm5 # encoding: [0x62,0xf3,0xd5,0x48,0x3a,0xe9,0x01]
+; X64NOBW-NEXT: vpandq %zmm4, %zmm5, %zmm1 # encoding: [0x62,0xf1,0xd5,0x48,0xdb,0xcc]
+; X64NOBW-NEXT: vpternlogq $184, %zmm0, %zmm5, %zmm2 # encoding: [0x62,0xf3,0xd5,0x48,0x25,0xd0,0xb8]
+; X64NOBW-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3]
+; X64NOBW-NEXT: retq # encoding: [0xc3]
%1 = bitcast i64 %mask to <64 x i1>
%2 = call <64 x i8> @llvm.x86.vgf2p8affineinvqb.512(<64 x i8> %src1, <64 x i8> %src2, i8 3)
%3 = call <64 x i8> @llvm.x86.vgf2p8affineinvqb.512(<64 x i8> %src1, <64 x i8> %src2, i8 4)
@@ -103,25 +223,53 @@ define { <64 x i8>, <64 x i8>, <64 x i8> } @test_vgf2p8affineinvqb_512(<64 x i8>
declare <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8>, <16 x i8>, i8)
define { <16 x i8>, <16 x i8>, <16 x i8> } @test_vgf2p8affineqb_128(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %passthru, i16 %mask) {
-; X86-LABEL: test_vgf2p8affineqb_128:
-; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
-; X86-NEXT: vgf2p8affineqb $3, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xce,0xd9,0x03]
-; X86-NEXT: vgf2p8affineqb $4, %xmm1, %xmm0, %xmm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0xce,0xe1,0x04]
-; X86-NEXT: vgf2p8affineqb $5, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0xce,0xd1,0x05]
-; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
-; X86-NEXT: vmovdqa %xmm4, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcc]
-; X86-NEXT: retl # encoding: [0xc3]
-;
-; X64-LABEL: test_vgf2p8affineqb_128:
-; X64: # %bb.0:
-; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
-; X64-NEXT: vgf2p8affineqb $3, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xce,0xd9,0x03]
-; X64-NEXT: vgf2p8affineqb $4, %xmm1, %xmm0, %xmm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0xce,0xe1,0x04]
-; X64-NEXT: vgf2p8affineqb $5, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0xce,0xd1,0x05]
-; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
-; X64-NEXT: vmovdqa %xmm4, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcc]
-; X64-NEXT: retq # encoding: [0xc3]
+; X86BW-LABEL: test_vgf2p8affineqb_128:
+; X86BW: # %bb.0:
+; X86BW-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86BW-NEXT: vgf2p8affineqb $3, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xce,0xd9,0x03]
+; X86BW-NEXT: vgf2p8affineqb $4, %xmm1, %xmm0, %xmm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0xce,0xe1,0x04]
+; X86BW-NEXT: vgf2p8affineqb $5, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0xce,0xd1,0x05]
+; X86BW-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
+; X86BW-NEXT: vmovdqa %xmm4, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcc]
+; X86BW-NEXT: retl # encoding: [0xc3]
+;
+; X64BW-LABEL: test_vgf2p8affineqb_128:
+; X64BW: # %bb.0:
+; X64BW-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64BW-NEXT: vgf2p8affineqb $3, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xce,0xd9,0x03]
+; X64BW-NEXT: vgf2p8affineqb $4, %xmm1, %xmm0, %xmm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0xce,0xe1,0x04]
+; X64BW-NEXT: vgf2p8affineqb $5, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0xce,0xd1,0x05]
+; X64BW-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
+; X64BW-NEXT: vmovdqa %xmm4, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcc]
+; X64BW-NEXT: retq # encoding: [0xc3]
+;
+; X86NOBW-LABEL: test_vgf2p8affineqb_128:
+; X86NOBW: # %bb.0:
+; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86NOBW-NEXT: vgf2p8affineqb $3, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xce,0xd9,0x03]
+; X86NOBW-NEXT: vgf2p8affineqb $4, %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xce,0xe1,0x04]
+; X86NOBW-NEXT: vgf2p8affineqb $5, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xce,0xc1,0x05]
+; X86NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
+; X86NOBW-NEXT: vpmovdb %zmm1, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xcd]
+; X86NOBW-NEXT: vpand %xmm4, %xmm5, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xd1,0xdb,0xcc]
+; X86NOBW-NEXT: vpblendvb %xmm5, %xmm0, %xmm2, %xmm2 # encoding: [0xc4,0xe3,0x69,0x4c,0xd0,0x50]
+; X86NOBW-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
+; X86NOBW-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86NOBW-NEXT: retl # encoding: [0xc3]
+;
+; X64NOBW-LABEL: test_vgf2p8affineqb_128:
+; X64NOBW: # %bb.0:
+; X64NOBW-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64NOBW-NEXT: vgf2p8affineqb $3, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xce,0xd9,0x03]
+; X64NOBW-NEXT: vgf2p8affineqb $4, %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xce,0xe1,0x04]
+; X64NOBW-NEXT: vgf2p8affineqb $5, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xce,0xc1,0x05]
+; X64NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
+; X64NOBW-NEXT: vpmovdb %zmm1, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xcd]
+; X64NOBW-NEXT: vpand %xmm4, %xmm5, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xd1,0xdb,0xcc]
+; X64NOBW-NEXT: vpblendvb %xmm5, %xmm0, %xmm2, %xmm2 # encoding: [0xc4,0xe3,0x69,0x4c,0xd0,0x50]
+; X64NOBW-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
+; X64NOBW-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64NOBW-NEXT: retq # encoding: [0xc3]
%1 = bitcast i16 %mask to <16 x i1>
%2 = call <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8> %src1, <16 x i8> %src2, i8 3)
%3 = call <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8> %src1, <16 x i8> %src2, i8 4)
@@ -136,25 +284,60 @@ define { <16 x i8>, <16 x i8>, <16 x i8> } @test_vgf2p8affineqb_128(<16 x i8> %s
declare <32 x i8> @llvm.x86.vgf2p8affineqb.256(<32 x i8>, <32 x i8>, i8)
define { <32 x i8>, <32 x i8>, <32 x i8> } @test_vgf2p8affineqb_256(<32 x i8> %src1, <32 x i8> %src2, <32 x i8> %passthru, i32 %mask) {
-; X86-LABEL: test_vgf2p8affineqb_256:
-; X86: # %bb.0:
-; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
-; X86-NEXT: vgf2p8affineqb $3, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xce,0xd9,0x03]
-; X86-NEXT: vgf2p8affineqb $4, %ymm1, %ymm0, %ymm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0xce,0xe1,0x04]
-; X86-NEXT: vgf2p8affineqb $5, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0xce,0xd1,0x05]
-; X86-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
-; X86-NEXT: vmovdqa %ymm4, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcc]
-; X86-NEXT: retl # encoding: [0xc3]
-;
-; X64-LABEL: test_vgf2p8affineqb_256:
-; X64: # %bb.0:
-; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
-; X64-NEXT: vgf2p8affineqb $3, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xce,0xd9,0x03]
-; X64-NEXT: vgf2p8affineqb $4, %ymm1, %ymm0, %ymm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0xce,0xe1,0x04]
-; X64-NEXT: vgf2p8affineqb $5, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0xce,0xd1,0x05]
-; X64-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
-; X64-NEXT: vmovdqa %ymm4, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcc]
-; X64-NEXT: retq # encoding: [0xc3]
+; X86BW-LABEL: test_vgf2p8affineqb_256:
+; X86BW: # %bb.0:
+; X86BW-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86BW-NEXT: vgf2p8affineqb $3, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xce,0xd9,0x03]
+; X86BW-NEXT: vgf2p8affineqb $4, %ymm1, %ymm0, %ymm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0xce,0xe1,0x04]
+; X86BW-NEXT: vgf2p8affineqb $5, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0xce,0xd1,0x05]
+; X86BW-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
+; X86BW-NEXT: vmovdqa %ymm4, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcc]
+; X86BW-NEXT: retl # encoding: [0xc3]
+;
+; X64BW-LABEL: test_vgf2p8affineqb_256:
+; X64BW: # %bb.0:
+; X64BW-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64BW-NEXT: vgf2p8affineqb $3, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xce,0xd9,0x03]
+; X64BW-NEXT: vgf2p8affineqb $4, %ymm1, %ymm0, %ymm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0xce,0xe1,0x04]
+; X64BW-NEXT: vgf2p8affineqb $5, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0xce,0xd1,0x05]
+; X64BW-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
+; X64BW-NEXT: vmovdqa %ymm4, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcc]
+; X64BW-NEXT: retq # encoding: [0xc3]
+;
+; X86NOBW-LABEL: test_vgf2p8affineqb_256:
+; X86NOBW: # %bb.0:
+; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k2 # encoding: [0xc5,0xf8,0x90,0x54,0x24,0x06]
+; X86NOBW-NEXT: vgf2p8affineqb $3, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xce,0xd9,0x03]
+; X86NOBW-NEXT: vgf2p8affineqb $4, %ymm1, %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xce,0xe1,0x04]
+; X86NOBW-NEXT: vgf2p8affineqb $5, %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xce,0xc1,0x05]
+; X86NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
+; X86NOBW-NEXT: vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
+; X86NOBW-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k2} {z} # encoding: [0x62,0xf3,0x55,0xca,0x25,0xed,0xff]
+; X86NOBW-NEXT: vpmovdb %zmm5, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xed]
+; X86NOBW-NEXT: vinserti128 $1, %xmm5, %ymm1, %ymm5 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xed,0x01]
+; X86NOBW-NEXT: vpand %ymm4, %ymm5, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xd5,0xdb,0xcc]
+; X86NOBW-NEXT: vpblendvb %ymm5, %ymm0, %ymm2, %ymm2 # encoding: [0xc4,0xe3,0x6d,0x4c,0xd0,0x50]
+; X86NOBW-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
+; X86NOBW-NEXT: retl # encoding: [0xc3]
+;
+; X64NOBW-LABEL: test_vgf2p8affineqb_256:
+; X64NOBW: # %bb.0:
+; X64NOBW-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64NOBW-NEXT: shrl $16, %edi # encoding: [0xc1,0xef,0x10]
+; X64NOBW-NEXT: kmovw %edi, %k2 # encoding: [0xc5,0xf8,0x92,0xd7]
+; X64NOBW-NEXT: vgf2p8affineqb $3, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xce,0xd9,0x03]
+; X64NOBW-NEXT: vgf2p8affineqb $4, %ymm1, %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xce,0xe1,0x04]
+; X64NOBW-NEXT: vgf2p8affineqb $5, %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xce,0xc1,0x05]
+; X64NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
+; X64NOBW-NEXT: vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
+; X64NOBW-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k2} {z} # encoding: [0x62,0xf3,0x55,0xca,0x25,0xed,0xff]
+; X64NOBW-NEXT: vpmovdb %zmm5, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xed]
+; X64NOBW-NEXT: vinserti128 $1, %xmm5, %ymm1, %ymm5 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xed,0x01]
+; X64NOBW-NEXT: vpand %ymm4, %ymm5, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xd5,0xdb,0xcc]
+; X64NOBW-NEXT: vpblendvb %ymm5, %ymm0, %ymm2, %ymm2 # encoding: [0xc4,0xe3,0x6d,0x4c,0xd0,0x50]
+; X64NOBW-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
+; X64NOBW-NEXT: retq # encoding: [0xc3]
%1 = bitcast i32 %mask to <32 x i1>
%2 = call <32 x i8> @llvm.x86.vgf2p8affineqb.256(<32 x i8> %src1, <32 x i8> %src2, i8 3)
%3 = call <32 x i8> @llvm.x86.vgf2p8affineqb.256(<32 x i8> %src1, <32 x i8> %src2, i8 4)
@@ -169,25 +352,80 @@ define { <32 x i8>, <32 x i8>, <32 x i8> } @test_vgf2p8affineqb_256(<32 x i8> %s
declare <64 x i8> @llvm.x86.vgf2p8affineqb.512(<64 x i8>, <64 x i8>, i8)
define { <64 x i8>, <64 x i8>, <64 x i8> } @test_vgf2p8affineqb_512(<64 x i8> %src1, <64 x i8> %src2, <64 x i8> %passthru, i64 %mask) {
-; X86-LABEL: test_vgf2p8affineqb_512:
-; X86: # %bb.0:
-; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
-; X86-NEXT: vgf2p8affineqb $3, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0xce,0xd9,0x03]
-; X86-NEXT: vgf2p8affineqb $4, %zmm1, %zmm0, %zmm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0xce,0xe1,0x04]
-; X86-NEXT: vgf2p8affineqb $5, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0xce,0xd1,0x05]
-; X86-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3]
-; X86-NEXT: vmovdqa64 %zmm4, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcc]
-; X86-NEXT: retl # encoding: [0xc3]
-;
-; X64-LABEL: test_vgf2p8affineqb_512:
-; X64: # %bb.0:
-; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
-; X64-NEXT: vgf2p8affineqb $3, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0xce,0xd9,0x03]
-; X64-NEXT: vgf2p8affineqb $4, %zmm1, %zmm0, %zmm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0xce,0xe1,0x04]
-; X64-NEXT: vgf2p8affineqb $5, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0xce,0xd1,0x05]
-; X64-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3]
-; X64-NEXT: vmovdqa64 %zmm4, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcc]
-; X64-NEXT: retq # encoding: [0xc3]
+; X86BW-LABEL: test_vgf2p8affineqb_512:
+; X86BW: # %bb.0:
+; X86BW-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
+; X86BW-NEXT: vgf2p8affineqb $3, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0xce,0xd9,0x03]
+; X86BW-NEXT: vgf2p8affineqb $4, %zmm1, %zmm0, %zmm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0xce,0xe1,0x04]
+; X86BW-NEXT: vgf2p8affineqb $5, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0xce,0xd1,0x05]
+; X86BW-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3]
+; X86BW-NEXT: vmovdqa64 %zmm4, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcc]
+; X86BW-NEXT: retl # encoding: [0xc3]
+;
+; X64BW-LABEL: test_vgf2p8affineqb_512:
+; X64BW: # %bb.0:
+; X64BW-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
+; X64BW-NEXT: vgf2p8affineqb $3, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0xce,0xd9,0x03]
+; X64BW-NEXT: vgf2p8affineqb $4, %zmm1, %zmm0, %zmm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0xce,0xe1,0x04]
+; X64BW-NEXT: vgf2p8affineqb $5, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0xce,0xd1,0x05]
+; X64BW-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3]
+; X64BW-NEXT: vmovdqa64 %zmm4, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcc]
+; X64BW-NEXT: retq # encoding: [0xc3]
+;
+; X86NOBW-LABEL: test_vgf2p8affineqb_512:
+; X86NOBW: # %bb.0:
+; X86NOBW-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
+; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k2 # encoding: [0xc5,0xf8,0x90,0x54,0x24,0x04]
+; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x06]
+; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k3 # encoding: [0xc5,0xf8,0x90,0x5c,0x24,0x0a]
+; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k4 # encoding: [0xc5,0xf8,0x90,0x64,0x24,0x08]
+; X86NOBW-NEXT: vgf2p8affineqb $3, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0xce,0xc1,0x03]
+; X86NOBW-NEXT: vgf2p8affineqb $4, %zmm1, %zmm3, %zmm4 # encoding: [0x62,0xf3,0xe5,0x48,0xce,0xe1,0x04]
+; X86NOBW-NEXT: vgf2p8affineqb $5, %zmm1, %zmm3, %zmm3 # encoding: [0x62,0xf3,0xe5,0x48,0xce,0xd9,0x05]
+; X86NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k4} {z} # encoding: [0x62,0xf3,0x75,0xcc,0x25,0xc9,0xff]
+; X86NOBW-NEXT: vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
+; X86NOBW-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k3} {z} # encoding: [0x62,0xf3,0x55,0xcb,0x25,0xed,0xff]
+; X86NOBW-NEXT: vpmovdb %zmm5, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xed]
+; X86NOBW-NEXT: vinserti128 $1, %xmm5, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xcd,0x01]
+; X86NOBW-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k2} {z} # encoding: [0x62,0xf3,0x55,0xca,0x25,0xed,0xff]
+; X86NOBW-NEXT: vpmovdb %zmm5, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xed]
+; X86NOBW-NEXT: vpternlogd $255, %zmm6, %zmm6, %zmm6 {%k1} {z} # encoding: [0x62,0xf3,0x4d,0xc9,0x25,0xf6,0xff]
+; X86NOBW-NEXT: vpmovdb %zmm6, %xmm6 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xf6]
+; X86NOBW-NEXT: vinserti128 $1, %xmm6, %ymm5, %ymm5 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x55,0x38,0xee,0x01]
+; X86NOBW-NEXT: vinserti64x4 $1, %ymm1, %zmm5, %zmm5 # encoding: [0x62,0xf3,0xd5,0x48,0x3a,0xe9,0x01]
+; X86NOBW-NEXT: vpandq %zmm4, %zmm5, %zmm1 # encoding: [0x62,0xf1,0xd5,0x48,0xdb,0xcc]
+; X86NOBW-NEXT: vpternlogq $184, %zmm3, %zmm5, %zmm2 # encoding: [0x62,0xf3,0xd5,0x48,0x25,0xd3,0xb8]
+; X86NOBW-NEXT: retl # encoding: [0xc3]
+;
+; X64NOBW-LABEL: test_vgf2p8affineqb_512:
+; X64NOBW: # %bb.0:
+; X64NOBW-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
+; X64NOBW-NEXT: movl %edi, %ecx # encoding: [0x89,0xf9]
+; X64NOBW-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64NOBW-NEXT: shrq $32, %rdi # encoding: [0x48,0xc1,0xef,0x20]
+; X64NOBW-NEXT: shrq $48, %rax # encoding: [0x48,0xc1,0xe8,0x30]
+; X64NOBW-NEXT: shrl $16, %ecx # encoding: [0xc1,0xe9,0x10]
+; X64NOBW-NEXT: kmovw %ecx, %k2 # encoding: [0xc5,0xf8,0x92,0xd1]
+; X64NOBW-NEXT: kmovw %eax, %k3 # encoding: [0xc5,0xf8,0x92,0xd8]
+; X64NOBW-NEXT: kmovw %edi, %k4 # encoding: [0xc5,0xf8,0x92,0xe7]
+; X64NOBW-NEXT: vgf2p8affineqb $3, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0xce,0xd9,0x03]
+; X64NOBW-NEXT: vgf2p8affineqb $4, %zmm1, %zmm0, %zmm4 # encoding: [0x62,0xf3,0xfd,0x48,0xce,0xe1,0x04]
+; X64NOBW-NEXT: vgf2p8affineqb $5, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0xce,0xc1,0x05]
+; X64NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k4} {z} # encoding: [0x62,0xf3,0x75,0xcc,0x25,0xc9,0xff]
+; X64NOBW-NEXT: vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
+; X64NOBW-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k3} {z} # encoding: [0x62,0xf3,0x55,0xcb,0x25,0xed,0xff]
+; X64NOBW-NEXT: vpmovdb %zmm5, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xed]
+; X64NOBW-NEXT: vinserti128 $1, %xmm5, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xcd,0x01]
+; X64NOBW-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k1} {z} # encoding: [0x62,0xf3,0x55,0xc9,0x25,0xed,0xff]
+; X64NOBW-NEXT: vpmovdb %zmm5, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xed]
+; X64NOBW-NEXT: vpternlogd $255, %zmm6, %zmm6, %zmm6 {%k2} {z} # encoding: [0x62,0xf3,0x4d,0xca,0x25,0xf6,0xff]
+; X64NOBW-NEXT: vpmovdb %zmm6, %xmm6 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xf6]
+; X64NOBW-NEXT: vinserti128 $1, %xmm6, %ymm5, %ymm5 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x55,0x38,0xee,0x01]
+; X64NOBW-NEXT: vinserti64x4 $1, %ymm1, %zmm5, %zmm5 # encoding: [0x62,0xf3,0xd5,0x48,0x3a,0xe9,0x01]
+; X64NOBW-NEXT: vpandq %zmm4, %zmm5, %zmm1 # encoding: [0x62,0xf1,0xd5,0x48,0xdb,0xcc]
+; X64NOBW-NEXT: vpternlogq $184, %zmm0, %zmm5, %zmm2 # encoding: [0x62,0xf3,0xd5,0x48,0x25,0xd0,0xb8]
+; X64NOBW-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3]
+; X64NOBW-NEXT: retq # encoding: [0xc3]
%1 = bitcast i64 %mask to <64 x i1>
%2 = call <64 x i8> @llvm.x86.vgf2p8affineqb.512(<64 x i8> %src1, <64 x i8> %src2, i8 3)
%3 = call <64 x i8> @llvm.x86.vgf2p8affineqb.512(<64 x i8> %src1, <64 x i8> %src2, i8 4)
@@ -211,19 +449,39 @@ define <16 x i8> @test_vgf2p8mulb_128(<16 x i8> %src1, <16 x i8> %src2) {
}
define <16 x i8> @test_vgf2p8mulb_128_mask(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %passthru, i16 %mask) {
-; X86-LABEL: test_vgf2p8mulb_128_mask:
-; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
-; X86-NEXT: vgf2p8mulb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xcf,0xd1]
-; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
-; X86-NEXT: retl # encoding: [0xc3]
-;
-; X64-LABEL: test_vgf2p8mulb_128_mask:
-; X64: # %bb.0:
-; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
-; X64-NEXT: vgf2p8mulb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xcf,0xd1]
-; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
-; X64-NEXT: retq # encoding: [0xc3]
+; X86BW-LABEL: test_vgf2p8mulb_128_mask:
+; X86BW: # %bb.0:
+; X86BW-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86BW-NEXT: vgf2p8mulb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xcf,0xd1]
+; X86BW-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
+; X86BW-NEXT: retl # encoding: [0xc3]
+;
+; X64BW-LABEL: test_vgf2p8mulb_128_mask:
+; X64BW: # %bb.0:
+; X64BW-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64BW-NEXT: vgf2p8mulb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xcf,0xd1]
+; X64BW-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
+; X64BW-NEXT: retq # encoding: [0xc3]
+;
+; X86NOBW-LABEL: test_vgf2p8mulb_128_mask:
+; X86NOBW: # %bb.0:
+; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86NOBW-NEXT: vgf2p8mulb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xcf,0xc1]
+; X86NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
+; X86NOBW-NEXT: vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
+; X86NOBW-NEXT: vpblendvb %xmm1, %xmm0, %xmm2, %xmm0 # encoding: [0xc4,0xe3,0x69,0x4c,0xc0,0x10]
+; X86NOBW-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86NOBW-NEXT: retl # encoding: [0xc3]
+;
+; X64NOBW-LABEL: test_vgf2p8mulb_128_mask:
+; X64NOBW: # %bb.0:
+; X64NOBW-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64NOBW-NEXT: vgf2p8mulb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xcf,0xc1]
+; X64NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
+; X64NOBW-NEXT: vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
+; X64NOBW-NEXT: vpblendvb %xmm1, %xmm0, %xmm2, %xmm0 # encoding: [0xc4,0xe3,0x69,0x4c,0xc0,0x10]
+; X64NOBW-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64NOBW-NEXT: retq # encoding: [0xc3]
%1 = bitcast i16 %mask to <16 x i1>
%2 = call <16 x i8> @llvm.x86.vgf2p8mulb.128(<16 x i8> %src1, <16 x i8> %src2)
%3 = select <16 x i1> %1, <16 x i8> %2, <16 x i8> %passthru
@@ -231,17 +489,37 @@ define <16 x i8> @test_vgf2p8mulb_128_mask(<16 x i8> %src1, <16 x i8> %src2, <16
}
define <16 x i8> @test_vgf2p8mulb_128_maskz(<16 x i8> %src1, <16 x i8> %src2, i16 %mask) {
-; X86-LABEL: test_vgf2p8mulb_128_maskz:
-; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
-; X86-NEXT: vgf2p8mulb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0xcf,0xc1]
-; X86-NEXT: retl # encoding: [0xc3]
-;
-; X64-LABEL: test_vgf2p8mulb_128_maskz:
-; X64: # %bb.0:
-; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
-; X64-NEXT: vgf2p8mulb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0xcf,0xc1]
-; X64-NEXT: retq # encoding: [0xc3]
+; X86BW-LABEL: test_vgf2p8mulb_128_maskz:
+; X86BW: # %bb.0:
+; X86BW-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86BW-NEXT: vgf2p8mulb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0xcf,0xc1]
+; X86BW-NEXT: retl # encoding: [0xc3]
+;
+; X64BW-LABEL: test_vgf2p8mulb_128_maskz:
+; X64BW: # %bb.0:
+; X64BW-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64BW-NEXT: vgf2p8mulb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0xcf,0xc1]
+; X64BW-NEXT: retq # encoding: [0xc3]
+;
+; X86NOBW-LABEL: test_vgf2p8mulb_128_maskz:
+; X86NOBW: # %bb.0:
+; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86NOBW-NEXT: vgf2p8mulb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xcf,0xc1]
+; X86NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
+; X86NOBW-NEXT: vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
+; X86NOBW-NEXT: vpand %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xdb,0xc0]
+; X86NOBW-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86NOBW-NEXT: retl # encoding: [0xc3]
+;
+; X64NOBW-LABEL: test_vgf2p8mulb_128_maskz:
+; X64NOBW: # %bb.0:
+; X64NOBW-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64NOBW-NEXT: vgf2p8mulb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xcf,0xc1]
+; X64NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
+; X64NOBW-NEXT: vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
+; X64NOBW-NEXT: vpand %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xdb,0xc0]
+; X64NOBW-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64NOBW-NEXT: retq # encoding: [0xc3]
%1 = bitcast i16 %mask to <16 x i1>
%2 = call <16 x i8> @llvm.x86.vgf2p8mulb.128(<16 x i8> %src1, <16 x i8> %src2)
%3 = select <16 x i1> %1, <16 x i8> %2, <16 x i8> zeroinitializer
@@ -259,19 +537,46 @@ define <32 x i8> @test_vgf2p8mulb_256(<32 x i8> %src1, <32 x i8> %src2) {
}
define <32 x i8> @test_vgf2p8mulb_256_mask(<32 x i8> %src1, <32 x i8> %src2, <32 x i8> %passthru, i32 %mask) {
-; X86-LABEL: test_vgf2p8mulb_256_mask:
-; X86: # %bb.0:
-; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
-; X86-NEXT: vgf2p8mulb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xcf,0xd1]
-; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
-; X86-NEXT: retl # encoding: [0xc3]
-;
-; X64-LABEL: test_vgf2p8mulb_256_mask:
-; X64: # %bb.0:
-; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
-; X64-NEXT: vgf2p8mulb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xcf,0xd1]
-; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
-; X64-NEXT: retq # encoding: [0xc3]
+; X86BW-LABEL: test_vgf2p8mulb_256_mask:
+; X86BW: # %bb.0:
+; X86BW-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86BW-NEXT: vgf2p8mulb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xcf,0xd1]
+; X86BW-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
+; X86BW-NEXT: retl # encoding: [0xc3]
+;
+; X64BW-LABEL: test_vgf2p8mulb_256_mask:
+; X64BW: # %bb.0:
+; X64BW-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64BW-NEXT: vgf2p8mulb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xcf,0xd1]
+; X64BW-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
+; X64BW-NEXT: retq # encoding: [0xc3]
+;
+; X86NOBW-LABEL: test_vgf2p8mulb_256_mask:
+; X86NOBW: # %bb.0:
+; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k2 # encoding: [0xc5,0xf8,0x90,0x54,0x24,0x06]
+; X86NOBW-NEXT: vgf2p8mulb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xcf,0xc1]
+; X86NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
+; X86NOBW-NEXT: vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
+; X86NOBW-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} # encoding: [0x62,0xf3,0x65,0xca,0x25,0xdb,0xff]
+; X86NOBW-NEXT: vpmovdb %zmm3, %xmm3 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xdb]
+; X86NOBW-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xcb,0x01]
+; X86NOBW-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm0 # encoding: [0xc4,0xe3,0x6d,0x4c,0xc0,0x10]
+; X86NOBW-NEXT: retl # encoding: [0xc3]
+;
+; X64NOBW-LABEL: test_vgf2p8mulb_256_mask:
+; X64NOBW: # %bb.0:
+; X64NOBW-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64NOBW-NEXT: shrl $16, %edi # encoding: [0xc1,0xef,0x10]
+; X64NOBW-NEXT: kmovw %edi, %k2 # encoding: [0xc5,0xf8,0x92,0xd7]
+; X64NOBW-NEXT: vgf2p8mulb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xcf,0xc1]
+; X64NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
+; X64NOBW-NEXT: vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
+; X64NOBW-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} # encoding: [0x62,0xf3,0x65,0xca,0x25,0xdb,0xff]
+; X64NOBW-NEXT: vpmovdb %zmm3, %xmm3 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xdb]
+; X64NOBW-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xcb,0x01]
+; X64NOBW-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm0 # encoding: [0xc4,0xe3,0x6d,0x4c,0xc0,0x10]
+; X64NOBW-NEXT: retq # encoding: [0xc3]
%1 = bitcast i32 %mask to <32 x i1>
%2 = call <32 x i8> @llvm.x86.vgf2p8mulb.256(<32 x i8> %src1, <32 x i8> %src2)
%3 = select <32 x i1> %1, <32 x i8> %2, <32 x i8> %passthru
@@ -279,17 +584,44 @@ define <32 x i8> @test_vgf2p8mulb_256_mask(<32 x i8> %src1, <32 x i8> %src2, <32
}
define <32 x i8> @test_vgf2p8mulb_256_maskz(<32 x i8> %src1, <32 x i8> %src2, i32 %mask) {
-; X86-LABEL: test_vgf2p8mulb_256_maskz:
-; X86: # %bb.0:
-; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
-; X86-NEXT: vgf2p8mulb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0xcf,0xc1]
-; X86-NEXT: retl # encoding: [0xc3]
-;
-; X64-LABEL: test_vgf2p8mulb_256_maskz:
-; X64: # %bb.0:
-; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
-; X64-NEXT: vgf2p8mulb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0xcf,0xc1]
-; X64-NEXT: retq # encoding: [0xc3]
+; X86BW-LABEL: test_vgf2p8mulb_256_maskz:
+; X86BW: # %bb.0:
+; X86BW-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86BW-NEXT: vgf2p8mulb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0xcf,0xc1]
+; X86BW-NEXT: retl # encoding: [0xc3]
+;
+; X64BW-LABEL: test_vgf2p8mulb_256_maskz:
+; X64BW: # %bb.0:
+; X64BW-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64BW-NEXT: vgf2p8mulb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0xcf,0xc1]
+; X64BW-NEXT: retq # encoding: [0xc3]
+;
+; X86NOBW-LABEL: test_vgf2p8mulb_256_maskz:
+; X86NOBW: # %bb.0:
+; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k2 # encoding: [0xc5,0xf8,0x90,0x54,0x24,0x06]
+; X86NOBW-NEXT: vgf2p8mulb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xcf,0xc1]
+; X86NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
+; X86NOBW-NEXT: vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
+; X86NOBW-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} # encoding: [0x62,0xf3,0x6d,0xca,0x25,0xd2,0xff]
+; X86NOBW-NEXT: vpmovdb %zmm2, %xmm2 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xd2]
+; X86NOBW-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xca,0x01]
+; X86NOBW-NEXT: vpand %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xdb,0xc0]
+; X86NOBW-NEXT: retl # encoding: [0xc3]
+;
+; X64NOBW-LABEL: test_vgf2p8mulb_256_maskz:
+; X64NOBW: # %bb.0:
+; X64NOBW-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64NOBW-NEXT: shrl $16, %edi # encoding: [0xc1,0xef,0x10]
+; X64NOBW-NEXT: kmovw %edi, %k2 # encoding: [0xc5,0xf8,0x92,0xd7]
+; X64NOBW-NEXT: vgf2p8mulb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xcf,0xc1]
+; X64NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
+; X64NOBW-NEXT: vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
+; X64NOBW-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} # encoding: [0x62,0xf3,0x6d,0xca,0x25,0xd2,0xff]
+; X64NOBW-NEXT: vpmovdb %zmm2, %xmm2 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xd2]
+; X64NOBW-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xca,0x01]
+; X64NOBW-NEXT: vpand %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xdb,0xc0]
+; X64NOBW-NEXT: retq # encoding: [0xc3]
%1 = bitcast i32 %mask to <32 x i1>
%2 = call <32 x i8> @llvm.x86.vgf2p8mulb.256(<32 x i8> %src1, <32 x i8> %src2)
%3 = select <32 x i1> %1, <32 x i8> %2, <32 x i8> zeroinitializer
@@ -307,19 +639,66 @@ define <64 x i8> @test_vgf2p8mulb_512(<64 x i8> %src1, <64 x i8> %src2) {
}
define <64 x i8> @test_vgf2p8mulb_512_mask(<64 x i8> %src1, <64 x i8> %src2, <64 x i8> %passthru, i64 %mask) {
-; X86-LABEL: test_vgf2p8mulb_512_mask:
-; X86: # %bb.0:
-; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
-; X86-NEXT: vgf2p8mulb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0xcf,0xd1]
-; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
-; X86-NEXT: retl # encoding: [0xc3]
-;
-; X64-LABEL: test_vgf2p8mulb_512_mask:
-; X64: # %bb.0:
-; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
-; X64-NEXT: vgf2p8mulb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0xcf,0xd1]
-; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
-; X64-NEXT: retq # encoding: [0xc3]
+; X86BW-LABEL: test_vgf2p8mulb_512_mask:
+; X86BW: # %bb.0:
+; X86BW-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
+; X86BW-NEXT: vgf2p8mulb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0xcf,0xd1]
+; X86BW-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
+; X86BW-NEXT: retl # encoding: [0xc3]
+;
+; X64BW-LABEL: test_vgf2p8mulb_512_mask:
+; X64BW: # %bb.0:
+; X64BW-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
+; X64BW-NEXT: vgf2p8mulb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0xcf,0xd1]
+; X64BW-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
+; X64BW-NEXT: retq # encoding: [0xc3]
+;
+; X86NOBW-LABEL: test_vgf2p8mulb_512_mask:
+; X86NOBW: # %bb.0:
+; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k2 # encoding: [0xc5,0xf8,0x90,0x54,0x24,0x06]
+; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k3 # encoding: [0xc5,0xf8,0x90,0x5c,0x24,0x0a]
+; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k4 # encoding: [0xc5,0xf8,0x90,0x64,0x24,0x08]
+; X86NOBW-NEXT: vgf2p8mulb %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf2,0x7d,0x48,0xcf,0xc9]
+; X86NOBW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k4} {z} # encoding: [0x62,0xf3,0x7d,0xcc,0x25,0xc0,0xff]
+; X86NOBW-NEXT: vpmovdb %zmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc0]
+; X86NOBW-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k3} {z} # encoding: [0x62,0xf3,0x65,0xcb,0x25,0xdb,0xff]
+; X86NOBW-NEXT: vpmovdb %zmm3, %xmm3 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xdb]
+; X86NOBW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xc3,0x01]
+; X86NOBW-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k1} {z} # encoding: [0x62,0xf3,0x65,0xc9,0x25,0xdb,0xff]
+; X86NOBW-NEXT: vpmovdb %zmm3, %xmm3 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xdb]
+; X86NOBW-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k2} {z} # encoding: [0x62,0xf3,0x5d,0xca,0x25,0xe4,0xff]
+; X86NOBW-NEXT: vpmovdb %zmm4, %xmm4 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xe4]
+; X86NOBW-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x65,0x38,0xdc,0x01]
+; X86NOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm3, %zmm0 # encoding: [0x62,0xf3,0xe5,0x48,0x3a,0xc0,0x01]
+; X86NOBW-NEXT: vpternlogq $202, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf3,0xf5,0x48,0x25,0xc2,0xca]
+; X86NOBW-NEXT: retl # encoding: [0xc3]
+;
+; X64NOBW-LABEL: test_vgf2p8mulb_512_mask:
+; X64NOBW: # %bb.0:
+; X64NOBW-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
+; X64NOBW-NEXT: movl %edi, %ecx # encoding: [0x89,0xf9]
+; X64NOBW-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64NOBW-NEXT: shrq $32, %rdi # encoding: [0x48,0xc1,0xef,0x20]
+; X64NOBW-NEXT: shrq $48, %rax # encoding: [0x48,0xc1,0xe8,0x30]
+; X64NOBW-NEXT: shrl $16, %ecx # encoding: [0xc1,0xe9,0x10]
+; X64NOBW-NEXT: kmovw %ecx, %k2 # encoding: [0xc5,0xf8,0x92,0xd1]
+; X64NOBW-NEXT: kmovw %eax, %k3 # encoding: [0xc5,0xf8,0x92,0xd8]
+; X64NOBW-NEXT: kmovw %edi, %k4 # encoding: [0xc5,0xf8,0x92,0xe7]
+; X64NOBW-NEXT: vgf2p8mulb %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf2,0x7d,0x48,0xcf,0xc9]
+; X64NOBW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k4} {z} # encoding: [0x62,0xf3,0x7d,0xcc,0x25,0xc0,0xff]
+; X64NOBW-NEXT: vpmovdb %zmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc0]
+; X64NOBW-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k3} {z} # encoding: [0x62,0xf3,0x65,0xcb,0x25,0xdb,0xff]
+; X64NOBW-NEXT: vpmovdb %zmm3, %xmm3 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xdb]
+; X64NOBW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xc3,0x01]
+; X64NOBW-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k1} {z} # encoding: [0x62,0xf3,0x65,0xc9,0x25,0xdb,0xff]
+; X64NOBW-NEXT: vpmovdb %zmm3, %xmm3 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xdb]
+; X64NOBW-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k2} {z} # encoding: [0x62,0xf3,0x5d,0xca,0x25,0xe4,0xff]
+; X64NOBW-NEXT: vpmovdb %zmm4, %xmm4 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xe4]
+; X64NOBW-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x65,0x38,0xdc,0x01]
+; X64NOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm3, %zmm0 # encoding: [0x62,0xf3,0xe5,0x48,0x3a,0xc0,0x01]
+; X64NOBW-NEXT: vpternlogq $202, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf3,0xf5,0x48,0x25,0xc2,0xca]
+; X64NOBW-NEXT: retq # encoding: [0xc3]
%1 = bitcast i64 %mask to <64 x i1>
%2 = call <64 x i8> @llvm.x86.vgf2p8mulb.512(<64 x i8> %src1, <64 x i8> %src2)
%3 = select <64 x i1> %1, <64 x i8> %2, <64 x i8> %passthru
@@ -327,17 +706,64 @@ define <64 x i8> @test_vgf2p8mulb_512_mask(<64 x i8> %src1, <64 x i8> %src2, <64
}
define <64 x i8> @test_vgf2p8mulb_512_maskz(<64 x i8> %src1, <64 x i8> %src2, i64 %mask) {
-; X86-LABEL: test_vgf2p8mulb_512_maskz:
-; X86: # %bb.0:
-; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
-; X86-NEXT: vgf2p8mulb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0xcf,0xc1]
-; X86-NEXT: retl # encoding: [0xc3]
-;
-; X64-LABEL: test_vgf2p8mulb_512_maskz:
-; X64: # %bb.0:
-; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
-; X64-NEXT: vgf2p8mulb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0xcf,0xc1]
-; X64-NEXT: retq # encoding: [0xc3]
+; X86BW-LABEL: test_vgf2p8mulb_512_maskz:
+; X86BW: # %bb.0:
+; X86BW-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
+; X86BW-NEXT: vgf2p8mulb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0xcf,0xc1]
+; X86BW-NEXT: retl # encoding: [0xc3]
+;
+; X64BW-LABEL: test_vgf2p8mulb_512_maskz:
+; X64BW: # %bb.0:
+; X64BW-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
+; X64BW-NEXT: vgf2p8mulb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0xcf,0xc1]
+; X64BW-NEXT: retq # encoding: [0xc3]
+;
+; X86NOBW-LABEL: test_vgf2p8mulb_512_maskz:
+; X86NOBW: # %bb.0:
+; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k2 # encoding: [0xc5,0xf8,0x90,0x54,0x24,0x06]
+; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k3 # encoding: [0xc5,0xf8,0x90,0x5c,0x24,0x0a]
+; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k4 # encoding: [0xc5,0xf8,0x90,0x64,0x24,0x08]
+; X86NOBW-NEXT: vgf2p8mulb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0xcf,0xc1]
+; X86NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k4} {z} # encoding: [0x62,0xf3,0x75,0xcc,0x25,0xc9,0xff]
+; X86NOBW-NEXT: vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
+; X86NOBW-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k3} {z} # encoding: [0x62,0xf3,0x6d,0xcb,0x25,0xd2,0xff]
+; X86NOBW-NEXT: vpmovdb %zmm2, %xmm2 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xd2]
+; X86NOBW-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xca,0x01]
+; X86NOBW-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} # encoding: [0x62,0xf3,0x6d,0xc9,0x25,0xd2,0xff]
+; X86NOBW-NEXT: vpmovdb %zmm2, %xmm2 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xd2]
+; X86NOBW-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} # encoding: [0x62,0xf3,0x65,0xca,0x25,0xdb,0xff]
+; X86NOBW-NEXT: vpmovdb %zmm3, %xmm3 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xdb]
+; X86NOBW-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x6d,0x38,0xd3,0x01]
+; X86NOBW-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1 # encoding: [0x62,0xf3,0xed,0x48,0x3a,0xc9,0x01]
+; X86NOBW-NEXT: vpandq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xdb,0xc0]
+; X86NOBW-NEXT: retl # encoding: [0xc3]
+;
+; X64NOBW-LABEL: test_vgf2p8mulb_512_maskz:
+; X64NOBW: # %bb.0:
+; X64NOBW-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
+; X64NOBW-NEXT: movl %edi, %ecx # encoding: [0x89,0xf9]
+; X64NOBW-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64NOBW-NEXT: shrq $32, %rdi # encoding: [0x48,0xc1,0xef,0x20]
+; X64NOBW-NEXT: shrq $48, %rax # encoding: [0x48,0xc1,0xe8,0x30]
+; X64NOBW-NEXT: shrl $16, %ecx # encoding: [0xc1,0xe9,0x10]
+; X64NOBW-NEXT: kmovw %ecx, %k2 # encoding: [0xc5,0xf8,0x92,0xd1]
+; X64NOBW-NEXT: kmovw %eax, %k3 # encoding: [0xc5,0xf8,0x92,0xd8]
+; X64NOBW-NEXT: kmovw %edi, %k4 # encoding: [0xc5,0xf8,0x92,0xe7]
+; X64NOBW-NEXT: vgf2p8mulb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0xcf,0xc1]
+; X64NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k4} {z} # encoding: [0x62,0xf3,0x75,0xcc,0x25,0xc9,0xff]
+; X64NOBW-NEXT: vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
+; X64NOBW-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k3} {z} # encoding: [0x62,0xf3,0x6d,0xcb,0x25,0xd2,0xff]
+; X64NOBW-NEXT: vpmovdb %zmm2, %xmm2 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xd2]
+; X64NOBW-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xca,0x01]
+; X64NOBW-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} # encoding: [0x62,0xf3,0x6d,0xc9,0x25,0xd2,0xff]
+; X64NOBW-NEXT: vpmovdb %zmm2, %xmm2 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xd2]
+; X64NOBW-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} # encoding: [0x62,0xf3,0x65,0xca,0x25,0xdb,0xff]
+; X64NOBW-NEXT: vpmovdb %zmm3, %xmm3 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xdb]
+; X64NOBW-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x6d,0x38,0xd3,0x01]
+; X64NOBW-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1 # encoding: [0x62,0xf3,0xed,0x48,0x3a,0xc9,0x01]
+; X64NOBW-NEXT: vpandq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xdb,0xc0]
+; X64NOBW-NEXT: retq # encoding: [0xc3]
%1 = bitcast i64 %mask to <64 x i1>
%2 = call <64 x i8> @llvm.x86.vgf2p8mulb.512(<64 x i8> %src1, <64 x i8> %src2)
%3 = select <64 x i1> %1, <64 x i8> %2, <64 x i8> zeroinitializer
More information about the llvm-commits
mailing list